Commit 2e662b65f05d550b6799ed6bfa9963b82279e6b7

Authored by Jens Axboe
Committed by Jens Axboe
1 parent 10fd48f237

[PATCH] elevator: abstract out the rbtree sort handling

The rbtree sort/lookup/reposition logic is mostly duplicated in
cfq/deadline/as, so move it to the elevator core. The io schedulers
still provide the actual rb root, as we don't want to impose any sort
of specific handling on the schedulers.

Introduce the helpers and rb_node in struct request to help migrate the
IO schedulers.

Signed-off-by: Jens Axboe <axboe@suse.de>

Showing 4 changed files with 130 additions and 19 deletions Inline Diff

1 /* 1 /*
2 * Block device elevator/IO-scheduler. 2 * Block device elevator/IO-scheduler.
3 * 3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * 5 *
6 * 30042000 Jens Axboe <axboe@suse.de> : 6 * 30042000 Jens Axboe <axboe@suse.de> :
7 * 7 *
8 * Split the elevator a bit so that it is possible to choose a different 8 * Split the elevator a bit so that it is possible to choose a different
9 * one or even write a new "plug in". There are three pieces: 9 * one or even write a new "plug in". There are three pieces:
10 * - elevator_fn, inserts a new request in the queue list 10 * - elevator_fn, inserts a new request in the queue list
11 * - elevator_merge_fn, decides whether a new buffer can be merged with 11 * - elevator_merge_fn, decides whether a new buffer can be merged with
12 * an existing request 12 * an existing request
13 * - elevator_dequeue_fn, called when a request is taken off the active list 13 * - elevator_dequeue_fn, called when a request is taken off the active list
14 * 14 *
15 * 20082000 Dave Jones <davej@suse.de> : 15 * 20082000 Dave Jones <davej@suse.de> :
16 * Removed tests for max-bomb-segments, which was breaking elvtune 16 * Removed tests for max-bomb-segments, which was breaking elvtune
17 * when run without -bN 17 * when run without -bN
18 * 18 *
19 * Jens: 19 * Jens:
20 * - Rework again to work with bio instead of buffer_heads 20 * - Rework again to work with bio instead of buffer_heads
21 * - loose bi_dev comparisons, partition handling is right now 21 * - loose bi_dev comparisons, partition handling is right now
22 * - completely modularize elevator setup and teardown 22 * - completely modularize elevator setup and teardown
23 * 23 *
24 */ 24 */
25 #include <linux/kernel.h> 25 #include <linux/kernel.h>
26 #include <linux/fs.h> 26 #include <linux/fs.h>
27 #include <linux/blkdev.h> 27 #include <linux/blkdev.h>
28 #include <linux/elevator.h> 28 #include <linux/elevator.h>
29 #include <linux/bio.h> 29 #include <linux/bio.h>
30 #include <linux/module.h> 30 #include <linux/module.h>
31 #include <linux/slab.h> 31 #include <linux/slab.h>
32 #include <linux/init.h> 32 #include <linux/init.h>
33 #include <linux/compiler.h> 33 #include <linux/compiler.h>
34 #include <linux/delay.h> 34 #include <linux/delay.h>
35 #include <linux/blktrace_api.h> 35 #include <linux/blktrace_api.h>
36 #include <linux/hash.h> 36 #include <linux/hash.h>
37 37
38 #include <asm/uaccess.h> 38 #include <asm/uaccess.h>
39 39
40 static DEFINE_SPINLOCK(elv_list_lock); 40 static DEFINE_SPINLOCK(elv_list_lock);
41 static LIST_HEAD(elv_list); 41 static LIST_HEAD(elv_list);
42 42
43 /* 43 /*
44 * Merge hash stuff. 44 * Merge hash stuff.
45 */ 45 */
46 static const int elv_hash_shift = 6; 46 static const int elv_hash_shift = 6;
47 #define ELV_HASH_BLOCK(sec) ((sec) >> 3) 47 #define ELV_HASH_BLOCK(sec) ((sec) >> 3)
48 #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) 48 #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
49 #define ELV_HASH_ENTRIES (1 << elv_hash_shift) 49 #define ELV_HASH_ENTRIES (1 << elv_hash_shift)
50 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 50 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
51 #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 51 #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
52 52
53 /* 53 /*
54 * can we safely merge with this request? 54 * can we safely merge with this request?
55 */ 55 */
56 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 56 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
57 { 57 {
58 if (!rq_mergeable(rq)) 58 if (!rq_mergeable(rq))
59 return 0; 59 return 0;
60 60
61 /* 61 /*
62 * different data direction or already started, don't merge 62 * different data direction or already started, don't merge
63 */ 63 */
64 if (bio_data_dir(bio) != rq_data_dir(rq)) 64 if (bio_data_dir(bio) != rq_data_dir(rq))
65 return 0; 65 return 0;
66 66
67 /* 67 /*
68 * same device and no special stuff set, merge is ok 68 * same device and no special stuff set, merge is ok
69 */ 69 */
70 if (rq->rq_disk == bio->bi_bdev->bd_disk && 70 if (rq->rq_disk == bio->bi_bdev->bd_disk &&
71 !rq->waiting && !rq->special) 71 !rq->waiting && !rq->special)
72 return 1; 72 return 1;
73 73
74 return 0; 74 return 0;
75 } 75 }
76 EXPORT_SYMBOL(elv_rq_merge_ok); 76 EXPORT_SYMBOL(elv_rq_merge_ok);
77 77
78 static inline int elv_try_merge(struct request *__rq, struct bio *bio) 78 static inline int elv_try_merge(struct request *__rq, struct bio *bio)
79 { 79 {
80 int ret = ELEVATOR_NO_MERGE; 80 int ret = ELEVATOR_NO_MERGE;
81 81
82 /* 82 /*
83 * we can merge and sequence is ok, check if it's possible 83 * we can merge and sequence is ok, check if it's possible
84 */ 84 */
85 if (elv_rq_merge_ok(__rq, bio)) { 85 if (elv_rq_merge_ok(__rq, bio)) {
86 if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 86 if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
87 ret = ELEVATOR_BACK_MERGE; 87 ret = ELEVATOR_BACK_MERGE;
88 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 88 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
89 ret = ELEVATOR_FRONT_MERGE; 89 ret = ELEVATOR_FRONT_MERGE;
90 } 90 }
91 91
92 return ret; 92 return ret;
93 } 93 }
94 94
95 static struct elevator_type *elevator_find(const char *name) 95 static struct elevator_type *elevator_find(const char *name)
96 { 96 {
97 struct elevator_type *e = NULL; 97 struct elevator_type *e = NULL;
98 struct list_head *entry; 98 struct list_head *entry;
99 99
100 list_for_each(entry, &elv_list) { 100 list_for_each(entry, &elv_list) {
101 struct elevator_type *__e; 101 struct elevator_type *__e;
102 102
103 __e = list_entry(entry, struct elevator_type, list); 103 __e = list_entry(entry, struct elevator_type, list);
104 104
105 if (!strcmp(__e->elevator_name, name)) { 105 if (!strcmp(__e->elevator_name, name)) {
106 e = __e; 106 e = __e;
107 break; 107 break;
108 } 108 }
109 } 109 }
110 110
111 return e; 111 return e;
112 } 112 }
113 113
114 static void elevator_put(struct elevator_type *e) 114 static void elevator_put(struct elevator_type *e)
115 { 115 {
116 module_put(e->elevator_owner); 116 module_put(e->elevator_owner);
117 } 117 }
118 118
119 static struct elevator_type *elevator_get(const char *name) 119 static struct elevator_type *elevator_get(const char *name)
120 { 120 {
121 struct elevator_type *e; 121 struct elevator_type *e;
122 122
123 spin_lock_irq(&elv_list_lock); 123 spin_lock_irq(&elv_list_lock);
124 124
125 e = elevator_find(name); 125 e = elevator_find(name);
126 if (e && !try_module_get(e->elevator_owner)) 126 if (e && !try_module_get(e->elevator_owner))
127 e = NULL; 127 e = NULL;
128 128
129 spin_unlock_irq(&elv_list_lock); 129 spin_unlock_irq(&elv_list_lock);
130 130
131 return e; 131 return e;
132 } 132 }
133 133
134 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) 134 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
135 { 135 {
136 return eq->ops->elevator_init_fn(q, eq); 136 return eq->ops->elevator_init_fn(q, eq);
137 } 137 }
138 138
139 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, 139 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
140 void *data) 140 void *data)
141 { 141 {
142 q->elevator = eq; 142 q->elevator = eq;
143 eq->elevator_data = data; 143 eq->elevator_data = data;
144 } 144 }
145 145
146 static char chosen_elevator[16]; 146 static char chosen_elevator[16];
147 147
148 static int __init elevator_setup(char *str) 148 static int __init elevator_setup(char *str)
149 { 149 {
150 /* 150 /*
151 * Be backwards-compatible with previous kernels, so users 151 * Be backwards-compatible with previous kernels, so users
152 * won't get the wrong elevator. 152 * won't get the wrong elevator.
153 */ 153 */
154 if (!strcmp(str, "as")) 154 if (!strcmp(str, "as"))
155 strcpy(chosen_elevator, "anticipatory"); 155 strcpy(chosen_elevator, "anticipatory");
156 else 156 else
157 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 157 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
158 return 1; 158 return 1;
159 } 159 }
160 160
161 __setup("elevator=", elevator_setup); 161 __setup("elevator=", elevator_setup);
162 162
163 static struct kobj_type elv_ktype; 163 static struct kobj_type elv_ktype;
164 164
165 static elevator_t *elevator_alloc(struct elevator_type *e) 165 static elevator_t *elevator_alloc(struct elevator_type *e)
166 { 166 {
167 elevator_t *eq; 167 elevator_t *eq;
168 int i; 168 int i;
169 169
170 eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); 170 eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
171 if (unlikely(!eq)) 171 if (unlikely(!eq))
172 goto err; 172 goto err;
173 173
174 memset(eq, 0, sizeof(*eq)); 174 memset(eq, 0, sizeof(*eq));
175 eq->ops = &e->ops; 175 eq->ops = &e->ops;
176 eq->elevator_type = e; 176 eq->elevator_type = e;
177 kobject_init(&eq->kobj); 177 kobject_init(&eq->kobj);
178 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); 178 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
179 eq->kobj.ktype = &elv_ktype; 179 eq->kobj.ktype = &elv_ktype;
180 mutex_init(&eq->sysfs_lock); 180 mutex_init(&eq->sysfs_lock);
181 181
182 eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL); 182 eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL);
183 if (!eq->hash) 183 if (!eq->hash)
184 goto err; 184 goto err;
185 185
186 for (i = 0; i < ELV_HASH_ENTRIES; i++) 186 for (i = 0; i < ELV_HASH_ENTRIES; i++)
187 INIT_HLIST_HEAD(&eq->hash[i]); 187 INIT_HLIST_HEAD(&eq->hash[i]);
188 188
189 return eq; 189 return eq;
190 err: 190 err:
191 kfree(eq); 191 kfree(eq);
192 elevator_put(e); 192 elevator_put(e);
193 return NULL; 193 return NULL;
194 } 194 }
195 195
196 static void elevator_release(struct kobject *kobj) 196 static void elevator_release(struct kobject *kobj)
197 { 197 {
198 elevator_t *e = container_of(kobj, elevator_t, kobj); 198 elevator_t *e = container_of(kobj, elevator_t, kobj);
199 199
200 elevator_put(e->elevator_type); 200 elevator_put(e->elevator_type);
201 kfree(e->hash); 201 kfree(e->hash);
202 kfree(e); 202 kfree(e);
203 } 203 }
204 204
205 int elevator_init(request_queue_t *q, char *name) 205 int elevator_init(request_queue_t *q, char *name)
206 { 206 {
207 struct elevator_type *e = NULL; 207 struct elevator_type *e = NULL;
208 struct elevator_queue *eq; 208 struct elevator_queue *eq;
209 int ret = 0; 209 int ret = 0;
210 void *data; 210 void *data;
211 211
212 INIT_LIST_HEAD(&q->queue_head); 212 INIT_LIST_HEAD(&q->queue_head);
213 q->last_merge = NULL; 213 q->last_merge = NULL;
214 q->end_sector = 0; 214 q->end_sector = 0;
215 q->boundary_rq = NULL; 215 q->boundary_rq = NULL;
216 216
217 if (name && !(e = elevator_get(name))) 217 if (name && !(e = elevator_get(name)))
218 return -EINVAL; 218 return -EINVAL;
219 219
220 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) 220 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
221 printk("I/O scheduler %s not found\n", chosen_elevator); 221 printk("I/O scheduler %s not found\n", chosen_elevator);
222 222
223 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { 223 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
224 printk("Default I/O scheduler not found, using no-op\n"); 224 printk("Default I/O scheduler not found, using no-op\n");
225 e = elevator_get("noop"); 225 e = elevator_get("noop");
226 } 226 }
227 227
228 eq = elevator_alloc(e); 228 eq = elevator_alloc(e);
229 if (!eq) 229 if (!eq)
230 return -ENOMEM; 230 return -ENOMEM;
231 231
232 data = elevator_init_queue(q, eq); 232 data = elevator_init_queue(q, eq);
233 if (!data) { 233 if (!data) {
234 kobject_put(&eq->kobj); 234 kobject_put(&eq->kobj);
235 return -ENOMEM; 235 return -ENOMEM;
236 } 236 }
237 237
238 elevator_attach(q, eq, data); 238 elevator_attach(q, eq, data);
239 return ret; 239 return ret;
240 } 240 }
241 241
242 EXPORT_SYMBOL(elevator_init);
243
242 void elevator_exit(elevator_t *e) 244 void elevator_exit(elevator_t *e)
243 { 245 {
244 mutex_lock(&e->sysfs_lock); 246 mutex_lock(&e->sysfs_lock);
245 if (e->ops->elevator_exit_fn) 247 if (e->ops->elevator_exit_fn)
246 e->ops->elevator_exit_fn(e); 248 e->ops->elevator_exit_fn(e);
247 e->ops = NULL; 249 e->ops = NULL;
248 mutex_unlock(&e->sysfs_lock); 250 mutex_unlock(&e->sysfs_lock);
249 251
250 kobject_put(&e->kobj); 252 kobject_put(&e->kobj);
251 } 253 }
252 254
255 EXPORT_SYMBOL(elevator_exit);
256
253 static inline void __elv_rqhash_del(struct request *rq) 257 static inline void __elv_rqhash_del(struct request *rq)
254 { 258 {
255 hlist_del_init(&rq->hash); 259 hlist_del_init(&rq->hash);
256 } 260 }
257 261
258 static void elv_rqhash_del(request_queue_t *q, struct request *rq) 262 static void elv_rqhash_del(request_queue_t *q, struct request *rq)
259 { 263 {
260 if (ELV_ON_HASH(rq)) 264 if (ELV_ON_HASH(rq))
261 __elv_rqhash_del(rq); 265 __elv_rqhash_del(rq);
262 } 266 }
263 267
264 static void elv_rqhash_add(request_queue_t *q, struct request *rq) 268 static void elv_rqhash_add(request_queue_t *q, struct request *rq)
265 { 269 {
266 elevator_t *e = q->elevator; 270 elevator_t *e = q->elevator;
267 271
268 BUG_ON(ELV_ON_HASH(rq)); 272 BUG_ON(ELV_ON_HASH(rq));
269 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); 273 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
270 } 274 }
271 275
272 static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) 276 static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
273 { 277 {
274 __elv_rqhash_del(rq); 278 __elv_rqhash_del(rq);
275 elv_rqhash_add(q, rq); 279 elv_rqhash_add(q, rq);
276 } 280 }
277 281
278 static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) 282 static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
279 { 283 {
280 elevator_t *e = q->elevator; 284 elevator_t *e = q->elevator;
281 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; 285 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
282 struct hlist_node *entry, *next; 286 struct hlist_node *entry, *next;
283 struct request *rq; 287 struct request *rq;
284 288
285 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { 289 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
286 BUG_ON(!ELV_ON_HASH(rq)); 290 BUG_ON(!ELV_ON_HASH(rq));
287 291
288 if (unlikely(!rq_mergeable(rq))) { 292 if (unlikely(!rq_mergeable(rq))) {
289 __elv_rqhash_del(rq); 293 __elv_rqhash_del(rq);
290 continue; 294 continue;
291 } 295 }
292 296
293 if (rq_hash_key(rq) == offset) 297 if (rq_hash_key(rq) == offset)
294 return rq; 298 return rq;
295 } 299 }
296 300
297 return NULL; 301 return NULL;
298 } 302 }
299 303
300 /* 304 /*
305 * RB-tree support functions for inserting/lookup/removal of requests
306 * in a sorted RB tree.
307 */
308 struct request *elv_rb_add(struct rb_root *root, struct request *rq)
309 {
310 struct rb_node **p = &root->rb_node;
311 struct rb_node *parent = NULL;
312 struct request *__rq;
313
314 while (*p) {
315 parent = *p;
316 __rq = rb_entry(parent, struct request, rb_node);
317
318 if (rq->sector < __rq->sector)
319 p = &(*p)->rb_left;
320 else if (rq->sector > __rq->sector)
321 p = &(*p)->rb_right;
322 else
323 return __rq;
324 }
325
326 rb_link_node(&rq->rb_node, parent, p);
327 rb_insert_color(&rq->rb_node, root);
328 return NULL;
329 }
330
331 EXPORT_SYMBOL(elv_rb_add);
332
333 void elv_rb_del(struct rb_root *root, struct request *rq)
334 {
335 BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
336 rb_erase(&rq->rb_node, root);
337 RB_CLEAR_NODE(&rq->rb_node);
338 }
339
340 EXPORT_SYMBOL(elv_rb_del);
341
342 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
343 {
344 struct rb_node *n = root->rb_node;
345 struct request *rq;
346
347 while (n) {
348 rq = rb_entry(n, struct request, rb_node);
349
350 if (sector < rq->sector)
351 n = n->rb_left;
352 else if (sector > rq->sector)
353 n = n->rb_right;
354 else
355 return rq;
356 }
357
358 return NULL;
359 }
360
361 EXPORT_SYMBOL(elv_rb_find);
362
363 /*
301 * Insert rq into dispatch queue of q. Queue lock must be held on 364 * Insert rq into dispatch queue of q. Queue lock must be held on
302 * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be 365 * entry. rq is sort insted into the dispatch queue. To be used by
303 * appended to the dispatch queue. To be used by specific elevators. 366 * specific elevators.
304 */ 367 */
305 void elv_dispatch_sort(request_queue_t *q, struct request *rq) 368 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
306 { 369 {
307 sector_t boundary; 370 sector_t boundary;
308 struct list_head *entry; 371 struct list_head *entry;
309 372
310 if (q->last_merge == rq) 373 if (q->last_merge == rq)
311 q->last_merge = NULL; 374 q->last_merge = NULL;
312 375
313 elv_rqhash_del(q, rq); 376 elv_rqhash_del(q, rq);
314 377
315 q->nr_sorted--; 378 q->nr_sorted--;
316 379
317 boundary = q->end_sector; 380 boundary = q->end_sector;
318 381
319 list_for_each_prev(entry, &q->queue_head) { 382 list_for_each_prev(entry, &q->queue_head) {
320 struct request *pos = list_entry_rq(entry); 383 struct request *pos = list_entry_rq(entry);
321 384
322 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 385 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
323 break; 386 break;
324 if (rq->sector >= boundary) { 387 if (rq->sector >= boundary) {
325 if (pos->sector < boundary) 388 if (pos->sector < boundary)
326 continue; 389 continue;
327 } else { 390 } else {
328 if (pos->sector >= boundary) 391 if (pos->sector >= boundary)
329 break; 392 break;
330 } 393 }
331 if (rq->sector >= pos->sector) 394 if (rq->sector >= pos->sector)
332 break; 395 break;
333 } 396 }
334 397
335 list_add(&rq->queuelist, entry); 398 list_add(&rq->queuelist, entry);
336 } 399 }
337 400
401 EXPORT_SYMBOL(elv_dispatch_sort);
402
338 /* 403 /*
339 * This should be in elevator.h, but that requires pulling in rq and q 404 * Insert rq into dispatch queue of q. Queue lock must be held on
405 * entry. rq is added to the back of the dispatch queue. To be used by
406 * specific elevators.
340 */ 407 */
341 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 408 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
342 { 409 {
343 if (q->last_merge == rq) 410 if (q->last_merge == rq)
344 q->last_merge = NULL; 411 q->last_merge = NULL;
345 412
346 elv_rqhash_del(q, rq); 413 elv_rqhash_del(q, rq);
347 414
348 q->nr_sorted--; 415 q->nr_sorted--;
349 416
350 q->end_sector = rq_end_sector(rq); 417 q->end_sector = rq_end_sector(rq);
351 q->boundary_rq = rq; 418 q->boundary_rq = rq;
352 list_add_tail(&rq->queuelist, &q->queue_head); 419 list_add_tail(&rq->queuelist, &q->queue_head);
353 } 420 }
354 421
422 EXPORT_SYMBOL(elv_dispatch_add_tail);
423
355 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 424 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
356 { 425 {
357 elevator_t *e = q->elevator; 426 elevator_t *e = q->elevator;
358 struct request *__rq; 427 struct request *__rq;
359 int ret; 428 int ret;
360 429
361 /* 430 /*
362 * First try one-hit cache. 431 * First try one-hit cache.
363 */ 432 */
364 if (q->last_merge) { 433 if (q->last_merge) {
365 ret = elv_try_merge(q->last_merge, bio); 434 ret = elv_try_merge(q->last_merge, bio);
366 if (ret != ELEVATOR_NO_MERGE) { 435 if (ret != ELEVATOR_NO_MERGE) {
367 *req = q->last_merge; 436 *req = q->last_merge;
368 return ret; 437 return ret;
369 } 438 }
370 } 439 }
371 440
372 /* 441 /*
373 * See if our hash lookup can find a potential backmerge. 442 * See if our hash lookup can find a potential backmerge.
374 */ 443 */
375 __rq = elv_rqhash_find(q, bio->bi_sector); 444 __rq = elv_rqhash_find(q, bio->bi_sector);
376 if (__rq && elv_rq_merge_ok(__rq, bio)) { 445 if (__rq && elv_rq_merge_ok(__rq, bio)) {
377 *req = __rq; 446 *req = __rq;
378 return ELEVATOR_BACK_MERGE; 447 return ELEVATOR_BACK_MERGE;
379 } 448 }
380 449
381 if (e->ops->elevator_merge_fn) 450 if (e->ops->elevator_merge_fn)
382 return e->ops->elevator_merge_fn(q, req, bio); 451 return e->ops->elevator_merge_fn(q, req, bio);
383 452
384 return ELEVATOR_NO_MERGE; 453 return ELEVATOR_NO_MERGE;
385 } 454 }
386 455
387 void elv_merged_request(request_queue_t *q, struct request *rq) 456 void elv_merged_request(request_queue_t *q, struct request *rq, int type)
388 { 457 {
389 elevator_t *e = q->elevator; 458 elevator_t *e = q->elevator;
390 459
391 if (e->ops->elevator_merged_fn) 460 if (e->ops->elevator_merged_fn)
392 e->ops->elevator_merged_fn(q, rq); 461 e->ops->elevator_merged_fn(q, rq, type);
393 462
394 elv_rqhash_reposition(q, rq); 463 if (type == ELEVATOR_BACK_MERGE)
464 elv_rqhash_reposition(q, rq);
395 465
396 q->last_merge = rq; 466 q->last_merge = rq;
397 } 467 }
398 468
399 void elv_merge_requests(request_queue_t *q, struct request *rq, 469 void elv_merge_requests(request_queue_t *q, struct request *rq,
400 struct request *next) 470 struct request *next)
401 { 471 {
402 elevator_t *e = q->elevator; 472 elevator_t *e = q->elevator;
403 473
404 if (e->ops->elevator_merge_req_fn) 474 if (e->ops->elevator_merge_req_fn)
405 e->ops->elevator_merge_req_fn(q, rq, next); 475 e->ops->elevator_merge_req_fn(q, rq, next);
406 476
407 elv_rqhash_reposition(q, rq); 477 elv_rqhash_reposition(q, rq);
408 elv_rqhash_del(q, next); 478 elv_rqhash_del(q, next);
409 479
410 q->nr_sorted--; 480 q->nr_sorted--;
411 q->last_merge = rq; 481 q->last_merge = rq;
412 } 482 }
413 483
414 void elv_requeue_request(request_queue_t *q, struct request *rq) 484 void elv_requeue_request(request_queue_t *q, struct request *rq)
415 { 485 {
416 elevator_t *e = q->elevator; 486 elevator_t *e = q->elevator;
417 487
418 /* 488 /*
419 * it already went through dequeue, we need to decrement the 489 * it already went through dequeue, we need to decrement the
420 * in_flight count again 490 * in_flight count again
421 */ 491 */
422 if (blk_account_rq(rq)) { 492 if (blk_account_rq(rq)) {
423 q->in_flight--; 493 q->in_flight--;
424 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) 494 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
425 e->ops->elevator_deactivate_req_fn(q, rq); 495 e->ops->elevator_deactivate_req_fn(q, rq);
426 } 496 }
427 497
428 rq->cmd_flags &= ~REQ_STARTED; 498 rq->cmd_flags &= ~REQ_STARTED;
429 499
430 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 500 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
431 } 501 }
432 502
433 static void elv_drain_elevator(request_queue_t *q) 503 static void elv_drain_elevator(request_queue_t *q)
434 { 504 {
435 static int printed; 505 static int printed;
436 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 506 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
437 ; 507 ;
438 if (q->nr_sorted == 0) 508 if (q->nr_sorted == 0)
439 return; 509 return;
440 if (printed++ < 10) { 510 if (printed++ < 10) {
441 printk(KERN_ERR "%s: forced dispatching is broken " 511 printk(KERN_ERR "%s: forced dispatching is broken "
442 "(nr_sorted=%u), please report this\n", 512 "(nr_sorted=%u), please report this\n",
443 q->elevator->elevator_type->elevator_name, q->nr_sorted); 513 q->elevator->elevator_type->elevator_name, q->nr_sorted);
444 } 514 }
445 } 515 }
446 516
447 void elv_insert(request_queue_t *q, struct request *rq, int where) 517 void elv_insert(request_queue_t *q, struct request *rq, int where)
448 { 518 {
449 struct list_head *pos; 519 struct list_head *pos;
450 unsigned ordseq; 520 unsigned ordseq;
451 int unplug_it = 1; 521 int unplug_it = 1;
452 522
453 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 523 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
454 524
455 rq->q = q; 525 rq->q = q;
456 526
457 switch (where) { 527 switch (where) {
458 case ELEVATOR_INSERT_FRONT: 528 case ELEVATOR_INSERT_FRONT:
459 rq->cmd_flags |= REQ_SOFTBARRIER; 529 rq->cmd_flags |= REQ_SOFTBARRIER;
460 530
461 list_add(&rq->queuelist, &q->queue_head); 531 list_add(&rq->queuelist, &q->queue_head);
462 break; 532 break;
463 533
464 case ELEVATOR_INSERT_BACK: 534 case ELEVATOR_INSERT_BACK:
465 rq->cmd_flags |= REQ_SOFTBARRIER; 535 rq->cmd_flags |= REQ_SOFTBARRIER;
466 elv_drain_elevator(q); 536 elv_drain_elevator(q);
467 list_add_tail(&rq->queuelist, &q->queue_head); 537 list_add_tail(&rq->queuelist, &q->queue_head);
468 /* 538 /*
469 * We kick the queue here for the following reasons. 539 * We kick the queue here for the following reasons.
470 * - The elevator might have returned NULL previously 540 * - The elevator might have returned NULL previously
471 * to delay requests and returned them now. As the 541 * to delay requests and returned them now. As the
472 * queue wasn't empty before this request, ll_rw_blk 542 * queue wasn't empty before this request, ll_rw_blk
473 * won't run the queue on return, resulting in hang. 543 * won't run the queue on return, resulting in hang.
474 * - Usually, back inserted requests won't be merged 544 * - Usually, back inserted requests won't be merged
475 * with anything. There's no point in delaying queue 545 * with anything. There's no point in delaying queue
476 * processing. 546 * processing.
477 */ 547 */
478 blk_remove_plug(q); 548 blk_remove_plug(q);
479 q->request_fn(q); 549 q->request_fn(q);
480 break; 550 break;
481 551
482 case ELEVATOR_INSERT_SORT: 552 case ELEVATOR_INSERT_SORT:
483 BUG_ON(!blk_fs_request(rq)); 553 BUG_ON(!blk_fs_request(rq));
484 rq->cmd_flags |= REQ_SORTED; 554 rq->cmd_flags |= REQ_SORTED;
485 q->nr_sorted++; 555 q->nr_sorted++;
486 if (rq_mergeable(rq)) { 556 if (rq_mergeable(rq)) {
487 elv_rqhash_add(q, rq); 557 elv_rqhash_add(q, rq);
488 if (!q->last_merge) 558 if (!q->last_merge)
489 q->last_merge = rq; 559 q->last_merge = rq;
490 } 560 }
491 561
492 /* 562 /*
493 * Some ioscheds (cfq) run q->request_fn directly, so 563 * Some ioscheds (cfq) run q->request_fn directly, so
494 * rq cannot be accessed after calling 564 * rq cannot be accessed after calling
495 * elevator_add_req_fn. 565 * elevator_add_req_fn.
496 */ 566 */
497 q->elevator->ops->elevator_add_req_fn(q, rq); 567 q->elevator->ops->elevator_add_req_fn(q, rq);
498 break; 568 break;
499 569
500 case ELEVATOR_INSERT_REQUEUE: 570 case ELEVATOR_INSERT_REQUEUE:
501 /* 571 /*
502 * If ordered flush isn't in progress, we do front 572 * If ordered flush isn't in progress, we do front
503 * insertion; otherwise, requests should be requeued 573 * insertion; otherwise, requests should be requeued
504 * in ordseq order. 574 * in ordseq order.
505 */ 575 */
506 rq->cmd_flags |= REQ_SOFTBARRIER; 576 rq->cmd_flags |= REQ_SOFTBARRIER;
507 577
508 if (q->ordseq == 0) { 578 if (q->ordseq == 0) {
509 list_add(&rq->queuelist, &q->queue_head); 579 list_add(&rq->queuelist, &q->queue_head);
510 break; 580 break;
511 } 581 }
512 582
513 ordseq = blk_ordered_req_seq(rq); 583 ordseq = blk_ordered_req_seq(rq);
514 584
515 list_for_each(pos, &q->queue_head) { 585 list_for_each(pos, &q->queue_head) {
516 struct request *pos_rq = list_entry_rq(pos); 586 struct request *pos_rq = list_entry_rq(pos);
517 if (ordseq <= blk_ordered_req_seq(pos_rq)) 587 if (ordseq <= blk_ordered_req_seq(pos_rq))
518 break; 588 break;
519 } 589 }
520 590
521 list_add_tail(&rq->queuelist, pos); 591 list_add_tail(&rq->queuelist, pos);
522 /* 592 /*
523 * most requeues happen because of a busy condition, don't 593 * most requeues happen because of a busy condition, don't
524 * force unplug of the queue for that case. 594 * force unplug of the queue for that case.
525 */ 595 */
526 unplug_it = 0; 596 unplug_it = 0;
527 break; 597 break;
528 598
529 default: 599 default:
530 printk(KERN_ERR "%s: bad insertion point %d\n", 600 printk(KERN_ERR "%s: bad insertion point %d\n",
531 __FUNCTION__, where); 601 __FUNCTION__, where);
532 BUG(); 602 BUG();
533 } 603 }
534 604
535 if (unplug_it && blk_queue_plugged(q)) { 605 if (unplug_it && blk_queue_plugged(q)) {
536 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 606 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
537 - q->in_flight; 607 - q->in_flight;
538 608
539 if (nrq >= q->unplug_thresh) 609 if (nrq >= q->unplug_thresh)
540 __generic_unplug_device(q); 610 __generic_unplug_device(q);
541 } 611 }
542 } 612 }
543 613
544 void __elv_add_request(request_queue_t *q, struct request *rq, int where, 614 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
545 int plug) 615 int plug)
546 { 616 {
547 if (q->ordcolor) 617 if (q->ordcolor)
548 rq->cmd_flags |= REQ_ORDERED_COLOR; 618 rq->cmd_flags |= REQ_ORDERED_COLOR;
549 619
550 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 620 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
551 /* 621 /*
552 * toggle ordered color 622 * toggle ordered color
553 */ 623 */
554 if (blk_barrier_rq(rq)) 624 if (blk_barrier_rq(rq))
555 q->ordcolor ^= 1; 625 q->ordcolor ^= 1;
556 626
557 /* 627 /*
558 * barriers implicitly indicate back insertion 628 * barriers implicitly indicate back insertion
559 */ 629 */
560 if (where == ELEVATOR_INSERT_SORT) 630 if (where == ELEVATOR_INSERT_SORT)
561 where = ELEVATOR_INSERT_BACK; 631 where = ELEVATOR_INSERT_BACK;
562 632
563 /* 633 /*
564 * this request is scheduling boundary, update 634 * this request is scheduling boundary, update
565 * end_sector 635 * end_sector
566 */ 636 */
567 if (blk_fs_request(rq)) { 637 if (blk_fs_request(rq)) {
568 q->end_sector = rq_end_sector(rq); 638 q->end_sector = rq_end_sector(rq);
569 q->boundary_rq = rq; 639 q->boundary_rq = rq;
570 } 640 }
571 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 641 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
572 where = ELEVATOR_INSERT_BACK; 642 where = ELEVATOR_INSERT_BACK;
573 643
574 if (plug) 644 if (plug)
575 blk_plug_device(q); 645 blk_plug_device(q);
576 646
577 elv_insert(q, rq, where); 647 elv_insert(q, rq, where);
578 } 648 }
579 649
650 EXPORT_SYMBOL(__elv_add_request);
651
580 void elv_add_request(request_queue_t *q, struct request *rq, int where, 652 void elv_add_request(request_queue_t *q, struct request *rq, int where,
581 int plug) 653 int plug)
582 { 654 {
583 unsigned long flags; 655 unsigned long flags;
584 656
585 spin_lock_irqsave(q->queue_lock, flags); 657 spin_lock_irqsave(q->queue_lock, flags);
586 __elv_add_request(q, rq, where, plug); 658 __elv_add_request(q, rq, where, plug);
587 spin_unlock_irqrestore(q->queue_lock, flags); 659 spin_unlock_irqrestore(q->queue_lock, flags);
588 } 660 }
589 661
662 EXPORT_SYMBOL(elv_add_request);
663
590 static inline struct request *__elv_next_request(request_queue_t *q) 664 static inline struct request *__elv_next_request(request_queue_t *q)
591 { 665 {
592 struct request *rq; 666 struct request *rq;
593 667
594 while (1) { 668 while (1) {
595 while (!list_empty(&q->queue_head)) { 669 while (!list_empty(&q->queue_head)) {
596 rq = list_entry_rq(q->queue_head.next); 670 rq = list_entry_rq(q->queue_head.next);
597 if (blk_do_ordered(q, &rq)) 671 if (blk_do_ordered(q, &rq))
598 return rq; 672 return rq;
599 } 673 }
600 674
601 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 675 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
602 return NULL; 676 return NULL;
603 } 677 }
604 } 678 }
605 679
606 struct request *elv_next_request(request_queue_t *q) 680 struct request *elv_next_request(request_queue_t *q)
607 { 681 {
608 struct request *rq; 682 struct request *rq;
609 int ret; 683 int ret;
610 684
611 while ((rq = __elv_next_request(q)) != NULL) { 685 while ((rq = __elv_next_request(q)) != NULL) {
612 if (!(rq->cmd_flags & REQ_STARTED)) { 686 if (!(rq->cmd_flags & REQ_STARTED)) {
613 elevator_t *e = q->elevator; 687 elevator_t *e = q->elevator;
614 688
615 /* 689 /*
616 * This is the first time the device driver 690 * This is the first time the device driver
617 * sees this request (possibly after 691 * sees this request (possibly after
618 * requeueing). Notify IO scheduler. 692 * requeueing). Notify IO scheduler.
619 */ 693 */
620 if (blk_sorted_rq(rq) && 694 if (blk_sorted_rq(rq) &&
621 e->ops->elevator_activate_req_fn) 695 e->ops->elevator_activate_req_fn)
622 e->ops->elevator_activate_req_fn(q, rq); 696 e->ops->elevator_activate_req_fn(q, rq);
623 697
624 /* 698 /*
625 * just mark as started even if we don't start 699 * just mark as started even if we don't start
626 * it, a request that has been delayed should 700 * it, a request that has been delayed should
627 * not be passed by new incoming requests 701 * not be passed by new incoming requests
628 */ 702 */
629 rq->cmd_flags |= REQ_STARTED; 703 rq->cmd_flags |= REQ_STARTED;
630 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 704 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
631 } 705 }
632 706
633 if (!q->boundary_rq || q->boundary_rq == rq) { 707 if (!q->boundary_rq || q->boundary_rq == rq) {
634 q->end_sector = rq_end_sector(rq); 708 q->end_sector = rq_end_sector(rq);
635 q->boundary_rq = NULL; 709 q->boundary_rq = NULL;
636 } 710 }
637 711
638 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) 712 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
639 break; 713 break;
640 714
641 ret = q->prep_rq_fn(q, rq); 715 ret = q->prep_rq_fn(q, rq);
642 if (ret == BLKPREP_OK) { 716 if (ret == BLKPREP_OK) {
643 break; 717 break;
644 } else if (ret == BLKPREP_DEFER) { 718 } else if (ret == BLKPREP_DEFER) {
645 /* 719 /*
646 * the request may have been (partially) prepped. 720 * the request may have been (partially) prepped.
647 * we need to keep this request in the front to 721 * we need to keep this request in the front to
648 * avoid resource deadlock. REQ_STARTED will 722 * avoid resource deadlock. REQ_STARTED will
649 * prevent other fs requests from passing this one. 723 * prevent other fs requests from passing this one.
650 */ 724 */
651 rq = NULL; 725 rq = NULL;
652 break; 726 break;
653 } else if (ret == BLKPREP_KILL) { 727 } else if (ret == BLKPREP_KILL) {
654 int nr_bytes = rq->hard_nr_sectors << 9; 728 int nr_bytes = rq->hard_nr_sectors << 9;
655 729
656 if (!nr_bytes) 730 if (!nr_bytes)
657 nr_bytes = rq->data_len; 731 nr_bytes = rq->data_len;
658 732
659 blkdev_dequeue_request(rq); 733 blkdev_dequeue_request(rq);
660 rq->cmd_flags |= REQ_QUIET; 734 rq->cmd_flags |= REQ_QUIET;
661 end_that_request_chunk(rq, 0, nr_bytes); 735 end_that_request_chunk(rq, 0, nr_bytes);
662 end_that_request_last(rq, 0); 736 end_that_request_last(rq, 0);
663 } else { 737 } else {
664 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 738 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
665 ret); 739 ret);
666 break; 740 break;
667 } 741 }
668 } 742 }
669 743
670 return rq; 744 return rq;
671 } 745 }
672 746
747 EXPORT_SYMBOL(elv_next_request);
748
673 void elv_dequeue_request(request_queue_t *q, struct request *rq) 749 void elv_dequeue_request(request_queue_t *q, struct request *rq)
674 { 750 {
675 BUG_ON(list_empty(&rq->queuelist)); 751 BUG_ON(list_empty(&rq->queuelist));
676 BUG_ON(ELV_ON_HASH(rq)); 752 BUG_ON(ELV_ON_HASH(rq));
677 753
678 list_del_init(&rq->queuelist); 754 list_del_init(&rq->queuelist);
679 755
680 /* 756 /*
681 * the time frame between a request being removed from the lists 757 * the time frame between a request being removed from the lists
682 * and to it is freed is accounted as io that is in progress at 758 * and to it is freed is accounted as io that is in progress at
683 * the driver side. 759 * the driver side.
684 */ 760 */
685 if (blk_account_rq(rq)) 761 if (blk_account_rq(rq))
686 q->in_flight++; 762 q->in_flight++;
687 } 763 }
688 764
765 EXPORT_SYMBOL(elv_dequeue_request);
766
689 int elv_queue_empty(request_queue_t *q) 767 int elv_queue_empty(request_queue_t *q)
690 { 768 {
691 elevator_t *e = q->elevator; 769 elevator_t *e = q->elevator;
692 770
693 if (!list_empty(&q->queue_head)) 771 if (!list_empty(&q->queue_head))
694 return 0; 772 return 0;
695 773
696 if (e->ops->elevator_queue_empty_fn) 774 if (e->ops->elevator_queue_empty_fn)
697 return e->ops->elevator_queue_empty_fn(q); 775 return e->ops->elevator_queue_empty_fn(q);
698 776
699 return 1; 777 return 1;
700 } 778 }
701 779
780 EXPORT_SYMBOL(elv_queue_empty);
781
702 struct request *elv_latter_request(request_queue_t *q, struct request *rq) 782 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
703 { 783 {
704 elevator_t *e = q->elevator; 784 elevator_t *e = q->elevator;
705 785
706 if (e->ops->elevator_latter_req_fn) 786 if (e->ops->elevator_latter_req_fn)
707 return e->ops->elevator_latter_req_fn(q, rq); 787 return e->ops->elevator_latter_req_fn(q, rq);
708 return NULL; 788 return NULL;
709 } 789 }
710 790
711 struct request *elv_former_request(request_queue_t *q, struct request *rq) 791 struct request *elv_former_request(request_queue_t *q, struct request *rq)
712 { 792 {
713 elevator_t *e = q->elevator; 793 elevator_t *e = q->elevator;
714 794
715 if (e->ops->elevator_former_req_fn) 795 if (e->ops->elevator_former_req_fn)
716 return e->ops->elevator_former_req_fn(q, rq); 796 return e->ops->elevator_former_req_fn(q, rq);
717 return NULL; 797 return NULL;
718 } 798 }
719 799
720 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 800 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
721 gfp_t gfp_mask) 801 gfp_t gfp_mask)
722 { 802 {
723 elevator_t *e = q->elevator; 803 elevator_t *e = q->elevator;
724 804
725 if (e->ops->elevator_set_req_fn) 805 if (e->ops->elevator_set_req_fn)
726 return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); 806 return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
727 807
728 rq->elevator_private = NULL; 808 rq->elevator_private = NULL;
729 return 0; 809 return 0;
730 } 810 }
731 811
732 void elv_put_request(request_queue_t *q, struct request *rq) 812 void elv_put_request(request_queue_t *q, struct request *rq)
733 { 813 {
734 elevator_t *e = q->elevator; 814 elevator_t *e = q->elevator;
735 815
736 if (e->ops->elevator_put_req_fn) 816 if (e->ops->elevator_put_req_fn)
737 e->ops->elevator_put_req_fn(q, rq); 817 e->ops->elevator_put_req_fn(q, rq);
738 } 818 }
739 819
740 int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) 820 int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
741 { 821 {
742 elevator_t *e = q->elevator; 822 elevator_t *e = q->elevator;
743 823
744 if (e->ops->elevator_may_queue_fn) 824 if (e->ops->elevator_may_queue_fn)
745 return e->ops->elevator_may_queue_fn(q, rw, bio); 825 return e->ops->elevator_may_queue_fn(q, rw, bio);
746 826
747 return ELV_MQUEUE_MAY; 827 return ELV_MQUEUE_MAY;
748 } 828 }
749 829
750 void elv_completed_request(request_queue_t *q, struct request *rq) 830 void elv_completed_request(request_queue_t *q, struct request *rq)
751 { 831 {
752 elevator_t *e = q->elevator; 832 elevator_t *e = q->elevator;
753 833
754 /* 834 /*
755 * request is released from the driver, io must be done 835 * request is released from the driver, io must be done
756 */ 836 */
757 if (blk_account_rq(rq)) { 837 if (blk_account_rq(rq)) {
758 q->in_flight--; 838 q->in_flight--;
759 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 839 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
760 e->ops->elevator_completed_req_fn(q, rq); 840 e->ops->elevator_completed_req_fn(q, rq);
761 } 841 }
762 842
763 /* 843 /*
764 * Check if the queue is waiting for fs requests to be 844 * Check if the queue is waiting for fs requests to be
765 * drained for flush sequence. 845 * drained for flush sequence.
766 */ 846 */
767 if (unlikely(q->ordseq)) { 847 if (unlikely(q->ordseq)) {
768 struct request *first_rq = list_entry_rq(q->queue_head.next); 848 struct request *first_rq = list_entry_rq(q->queue_head.next);
769 if (q->in_flight == 0 && 849 if (q->in_flight == 0 &&
770 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 850 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
771 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 851 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
772 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 852 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
773 q->request_fn(q); 853 q->request_fn(q);
774 } 854 }
775 } 855 }
776 } 856 }
777 857
778 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 858 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
779 859
780 static ssize_t 860 static ssize_t
781 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 861 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
782 { 862 {
783 elevator_t *e = container_of(kobj, elevator_t, kobj); 863 elevator_t *e = container_of(kobj, elevator_t, kobj);
784 struct elv_fs_entry *entry = to_elv(attr); 864 struct elv_fs_entry *entry = to_elv(attr);
785 ssize_t error; 865 ssize_t error;
786 866
787 if (!entry->show) 867 if (!entry->show)
788 return -EIO; 868 return -EIO;
789 869
790 mutex_lock(&e->sysfs_lock); 870 mutex_lock(&e->sysfs_lock);
791 error = e->ops ? entry->show(e, page) : -ENOENT; 871 error = e->ops ? entry->show(e, page) : -ENOENT;
792 mutex_unlock(&e->sysfs_lock); 872 mutex_unlock(&e->sysfs_lock);
793 return error; 873 return error;
794 } 874 }
795 875
796 static ssize_t 876 static ssize_t
797 elv_attr_store(struct kobject *kobj, struct attribute *attr, 877 elv_attr_store(struct kobject *kobj, struct attribute *attr,
798 const char *page, size_t length) 878 const char *page, size_t length)
799 { 879 {
800 elevator_t *e = container_of(kobj, elevator_t, kobj); 880 elevator_t *e = container_of(kobj, elevator_t, kobj);
801 struct elv_fs_entry *entry = to_elv(attr); 881 struct elv_fs_entry *entry = to_elv(attr);
802 ssize_t error; 882 ssize_t error;
803 883
804 if (!entry->store) 884 if (!entry->store)
805 return -EIO; 885 return -EIO;
806 886
807 mutex_lock(&e->sysfs_lock); 887 mutex_lock(&e->sysfs_lock);
808 error = e->ops ? entry->store(e, page, length) : -ENOENT; 888 error = e->ops ? entry->store(e, page, length) : -ENOENT;
809 mutex_unlock(&e->sysfs_lock); 889 mutex_unlock(&e->sysfs_lock);
810 return error; 890 return error;
811 } 891 }
812 892
813 static struct sysfs_ops elv_sysfs_ops = { 893 static struct sysfs_ops elv_sysfs_ops = {
814 .show = elv_attr_show, 894 .show = elv_attr_show,
815 .store = elv_attr_store, 895 .store = elv_attr_store,
816 }; 896 };
817 897
818 static struct kobj_type elv_ktype = { 898 static struct kobj_type elv_ktype = {
819 .sysfs_ops = &elv_sysfs_ops, 899 .sysfs_ops = &elv_sysfs_ops,
820 .release = elevator_release, 900 .release = elevator_release,
821 }; 901 };
822 902
823 int elv_register_queue(struct request_queue *q) 903 int elv_register_queue(struct request_queue *q)
824 { 904 {
825 elevator_t *e = q->elevator; 905 elevator_t *e = q->elevator;
826 int error; 906 int error;
827 907
828 e->kobj.parent = &q->kobj; 908 e->kobj.parent = &q->kobj;
829 909
830 error = kobject_add(&e->kobj); 910 error = kobject_add(&e->kobj);
831 if (!error) { 911 if (!error) {
832 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; 912 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
833 if (attr) { 913 if (attr) {
834 while (attr->attr.name) { 914 while (attr->attr.name) {
835 if (sysfs_create_file(&e->kobj, &attr->attr)) 915 if (sysfs_create_file(&e->kobj, &attr->attr))
836 break; 916 break;
837 attr++; 917 attr++;
838 } 918 }
839 } 919 }
840 kobject_uevent(&e->kobj, KOBJ_ADD); 920 kobject_uevent(&e->kobj, KOBJ_ADD);
841 } 921 }
842 return error; 922 return error;
843 } 923 }
844 924
845 static void __elv_unregister_queue(elevator_t *e) 925 static void __elv_unregister_queue(elevator_t *e)
846 { 926 {
847 kobject_uevent(&e->kobj, KOBJ_REMOVE); 927 kobject_uevent(&e->kobj, KOBJ_REMOVE);
848 kobject_del(&e->kobj); 928 kobject_del(&e->kobj);
849 } 929 }
850 930
851 void elv_unregister_queue(struct request_queue *q) 931 void elv_unregister_queue(struct request_queue *q)
852 { 932 {
853 if (q) 933 if (q)
854 __elv_unregister_queue(q->elevator); 934 __elv_unregister_queue(q->elevator);
855 } 935 }
856 936
857 int elv_register(struct elevator_type *e) 937 int elv_register(struct elevator_type *e)
858 { 938 {
859 spin_lock_irq(&elv_list_lock); 939 spin_lock_irq(&elv_list_lock);
860 BUG_ON(elevator_find(e->elevator_name)); 940 BUG_ON(elevator_find(e->elevator_name));
861 list_add_tail(&e->list, &elv_list); 941 list_add_tail(&e->list, &elv_list);
862 spin_unlock_irq(&elv_list_lock); 942 spin_unlock_irq(&elv_list_lock);
863 943
864 printk(KERN_INFO "io scheduler %s registered", e->elevator_name); 944 printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
865 if (!strcmp(e->elevator_name, chosen_elevator) || 945 if (!strcmp(e->elevator_name, chosen_elevator) ||
866 (!*chosen_elevator && 946 (!*chosen_elevator &&
867 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 947 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
868 printk(" (default)"); 948 printk(" (default)");
869 printk("\n"); 949 printk("\n");
870 return 0; 950 return 0;
871 } 951 }
872 EXPORT_SYMBOL_GPL(elv_register); 952 EXPORT_SYMBOL_GPL(elv_register);
873 953
874 void elv_unregister(struct elevator_type *e) 954 void elv_unregister(struct elevator_type *e)
875 { 955 {
876 struct task_struct *g, *p; 956 struct task_struct *g, *p;
877 957
878 /* 958 /*
879 * Iterate every thread in the process to remove the io contexts. 959 * Iterate every thread in the process to remove the io contexts.
880 */ 960 */
881 if (e->ops.trim) { 961 if (e->ops.trim) {
882 read_lock(&tasklist_lock); 962 read_lock(&tasklist_lock);
883 do_each_thread(g, p) { 963 do_each_thread(g, p) {
884 task_lock(p); 964 task_lock(p);
885 if (p->io_context) 965 if (p->io_context)
886 e->ops.trim(p->io_context); 966 e->ops.trim(p->io_context);
887 task_unlock(p); 967 task_unlock(p);
888 } while_each_thread(g, p); 968 } while_each_thread(g, p);
889 read_unlock(&tasklist_lock); 969 read_unlock(&tasklist_lock);
890 } 970 }
891 971
892 spin_lock_irq(&elv_list_lock); 972 spin_lock_irq(&elv_list_lock);
893 list_del_init(&e->list); 973 list_del_init(&e->list);
894 spin_unlock_irq(&elv_list_lock); 974 spin_unlock_irq(&elv_list_lock);
895 } 975 }
896 EXPORT_SYMBOL_GPL(elv_unregister); 976 EXPORT_SYMBOL_GPL(elv_unregister);
897 977
898 /* 978 /*
899 * switch to new_e io scheduler. be careful not to introduce deadlocks - 979 * switch to new_e io scheduler. be careful not to introduce deadlocks -
900 * we don't free the old io scheduler, before we have allocated what we 980 * we don't free the old io scheduler, before we have allocated what we
901 * need for the new one. this way we have a chance of going back to the old 981 * need for the new one. this way we have a chance of going back to the old
902 * one, if the new one fails init for some reason. 982 * one, if the new one fails init for some reason.
903 */ 983 */
904 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) 984 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
905 { 985 {
906 elevator_t *old_elevator, *e; 986 elevator_t *old_elevator, *e;
907 void *data; 987 void *data;
908 988
909 /* 989 /*
910 * Allocate new elevator 990 * Allocate new elevator
911 */ 991 */
912 e = elevator_alloc(new_e); 992 e = elevator_alloc(new_e);
913 if (!e) 993 if (!e)
914 return 0; 994 return 0;
915 995
916 data = elevator_init_queue(q, e); 996 data = elevator_init_queue(q, e);
917 if (!data) { 997 if (!data) {
918 kobject_put(&e->kobj); 998 kobject_put(&e->kobj);
919 return 0; 999 return 0;
920 } 1000 }
921 1001
922 /* 1002 /*
923 * Turn on BYPASS and drain all requests w/ elevator private data 1003 * Turn on BYPASS and drain all requests w/ elevator private data
924 */ 1004 */
925 spin_lock_irq(q->queue_lock); 1005 spin_lock_irq(q->queue_lock);
926 1006
927 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1007 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
928 1008
929 elv_drain_elevator(q); 1009 elv_drain_elevator(q);
930 1010
931 while (q->rq.elvpriv) { 1011 while (q->rq.elvpriv) {
932 blk_remove_plug(q); 1012 blk_remove_plug(q);
933 q->request_fn(q); 1013 q->request_fn(q);
934 spin_unlock_irq(q->queue_lock); 1014 spin_unlock_irq(q->queue_lock);
935 msleep(10); 1015 msleep(10);
936 spin_lock_irq(q->queue_lock); 1016 spin_lock_irq(q->queue_lock);
937 elv_drain_elevator(q); 1017 elv_drain_elevator(q);
938 } 1018 }
939 1019
940 /* 1020 /*
941 * Remember old elevator. 1021 * Remember old elevator.
942 */ 1022 */
943 old_elevator = q->elevator; 1023 old_elevator = q->elevator;
944 1024
945 /* 1025 /*
946 * attach and start new elevator 1026 * attach and start new elevator
947 */ 1027 */
948 elevator_attach(q, e, data); 1028 elevator_attach(q, e, data);
949 1029
950 spin_unlock_irq(q->queue_lock); 1030 spin_unlock_irq(q->queue_lock);
951 1031
952 __elv_unregister_queue(old_elevator); 1032 __elv_unregister_queue(old_elevator);
953 1033
954 if (elv_register_queue(q)) 1034 if (elv_register_queue(q))
955 goto fail_register; 1035 goto fail_register;
956 1036
957 /* 1037 /*
958 * finally exit old elevator and turn off BYPASS. 1038 * finally exit old elevator and turn off BYPASS.
959 */ 1039 */
960 elevator_exit(old_elevator); 1040 elevator_exit(old_elevator);
961 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1041 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
962 return 1; 1042 return 1;
963 1043
964 fail_register: 1044 fail_register:
965 /* 1045 /*
966 * switch failed, exit the new io scheduler and reattach the old 1046 * switch failed, exit the new io scheduler and reattach the old
967 * one again (along with re-adding the sysfs dir) 1047 * one again (along with re-adding the sysfs dir)
968 */ 1048 */
969 elevator_exit(e); 1049 elevator_exit(e);
970 q->elevator = old_elevator; 1050 q->elevator = old_elevator;
971 elv_register_queue(q); 1051 elv_register_queue(q);
972 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1052 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
973 return 0; 1053 return 0;
974 } 1054 }
975 1055
976 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) 1056 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
977 { 1057 {
978 char elevator_name[ELV_NAME_MAX]; 1058 char elevator_name[ELV_NAME_MAX];
979 size_t len; 1059 size_t len;
980 struct elevator_type *e; 1060 struct elevator_type *e;
981 1061
982 elevator_name[sizeof(elevator_name) - 1] = '\0'; 1062 elevator_name[sizeof(elevator_name) - 1] = '\0';
983 strncpy(elevator_name, name, sizeof(elevator_name) - 1); 1063 strncpy(elevator_name, name, sizeof(elevator_name) - 1);
984 len = strlen(elevator_name); 1064 len = strlen(elevator_name);
985 1065
986 if (len && elevator_name[len - 1] == '\n') 1066 if (len && elevator_name[len - 1] == '\n')
987 elevator_name[len - 1] = '\0'; 1067 elevator_name[len - 1] = '\0';
988 1068
989 e = elevator_get(elevator_name); 1069 e = elevator_get(elevator_name);
990 if (!e) { 1070 if (!e) {
991 printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 1071 printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
992 return -EINVAL; 1072 return -EINVAL;
993 } 1073 }
994 1074
995 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { 1075 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
996 elevator_put(e); 1076 elevator_put(e);
997 return count; 1077 return count;
998 } 1078 }
999 1079
1000 if (!elevator_switch(q, e)) 1080 if (!elevator_switch(q, e))
1001 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); 1081 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
1002 return count; 1082 return count;
1003 } 1083 }
1004 1084
1005 ssize_t elv_iosched_show(request_queue_t *q, char *name) 1085 ssize_t elv_iosched_show(request_queue_t *q, char *name)
1006 { 1086 {
1007 elevator_t *e = q->elevator; 1087 elevator_t *e = q->elevator;
1008 struct elevator_type *elv = e->elevator_type; 1088 struct elevator_type *elv = e->elevator_type;
1009 struct list_head *entry; 1089 struct list_head *entry;
1010 int len = 0; 1090 int len = 0;
1011 1091
1012 spin_lock_irq(q->queue_lock); 1092 spin_lock_irq(q->queue_lock);
1013 list_for_each(entry, &elv_list) { 1093 list_for_each(entry, &elv_list) {
1014 struct elevator_type *__e; 1094 struct elevator_type *__e;
1015 1095
1016 __e = list_entry(entry, struct elevator_type, list); 1096 __e = list_entry(entry, struct elevator_type, list);
1017 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1097 if (!strcmp(elv->elevator_name, __e->elevator_name))
1018 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1098 len += sprintf(name+len, "[%s] ", elv->elevator_name);
1019 else 1099 else
1020 len += sprintf(name+len, "%s ", __e->elevator_name); 1100 len += sprintf(name+len, "%s ", __e->elevator_name);
1021 } 1101 }
1022 spin_unlock_irq(q->queue_lock); 1102 spin_unlock_irq(q->queue_lock);
1023 1103
1024 len += sprintf(len+name, "\n"); 1104 len += sprintf(len+name, "\n");
1025 return len; 1105 return len;
1026 } 1106 }
1027 1107
1028 EXPORT_SYMBOL(elv_dispatch_sort); 1108 struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
1029 EXPORT_SYMBOL(elv_add_request); 1109 {
1030 EXPORT_SYMBOL(__elv_add_request); 1110 struct rb_node *rbprev = rb_prev(&rq->rb_node);
1031 EXPORT_SYMBOL(elv_next_request); 1111
1032 EXPORT_SYMBOL(elv_dequeue_request); 1112 if (rbprev)
1033 EXPORT_SYMBOL(elv_queue_empty); 1113 return rb_entry_rq(rbprev);
1034 EXPORT_SYMBOL(elevator_exit); 1114
1035 EXPORT_SYMBOL(elevator_init); 1115 return NULL;
1116 }
1117
1118 EXPORT_SYMBOL(elv_rb_former_request);
1119
1120 struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
1121 {
1122 struct rb_node *rbnext = rb_next(&rq->rb_node);
1123
1124 if (rbnext)
1125 return rb_entry_rq(rbnext);
1126
1127 return NULL;
1128 }
1129
1130 EXPORT_SYMBOL(elv_rb_latter_request);
1036 1131
1 /* 1 /*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
8 */ 8 */
9 9
10 /* 10 /*
11 * This handles all read/write requests to block devices 11 * This handles all read/write requests to block devices
12 */ 12 */
13 #include <linux/kernel.h> 13 #include <linux/kernel.h>
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/backing-dev.h> 15 #include <linux/backing-dev.h>
16 #include <linux/bio.h> 16 #include <linux/bio.h>
17 #include <linux/blkdev.h> 17 #include <linux/blkdev.h>
18 #include <linux/highmem.h> 18 #include <linux/highmem.h>
19 #include <linux/mm.h> 19 #include <linux/mm.h>
20 #include <linux/kernel_stat.h> 20 #include <linux/kernel_stat.h>
21 #include <linux/string.h> 21 #include <linux/string.h>
22 #include <linux/init.h> 22 #include <linux/init.h>
23 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ 23 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
24 #include <linux/completion.h> 24 #include <linux/completion.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/swap.h> 26 #include <linux/swap.h>
27 #include <linux/writeback.h> 27 #include <linux/writeback.h>
28 #include <linux/interrupt.h> 28 #include <linux/interrupt.h>
29 #include <linux/cpu.h> 29 #include <linux/cpu.h>
30 #include <linux/blktrace_api.h> 30 #include <linux/blktrace_api.h>
31 31
32 /* 32 /*
33 * for max sense size 33 * for max sense size
34 */ 34 */
35 #include <scsi/scsi_cmnd.h> 35 #include <scsi/scsi_cmnd.h>
36 36
37 static void blk_unplug_work(void *data); 37 static void blk_unplug_work(void *data);
38 static void blk_unplug_timeout(unsigned long data); 38 static void blk_unplug_timeout(unsigned long data);
39 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); 39 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
40 static void init_request_from_bio(struct request *req, struct bio *bio); 40 static void init_request_from_bio(struct request *req, struct bio *bio);
41 static int __make_request(request_queue_t *q, struct bio *bio); 41 static int __make_request(request_queue_t *q, struct bio *bio);
42 42
43 /* 43 /*
44 * For the allocated request tables 44 * For the allocated request tables
45 */ 45 */
46 static kmem_cache_t *request_cachep; 46 static kmem_cache_t *request_cachep;
47 47
48 /* 48 /*
49 * For queue allocation 49 * For queue allocation
50 */ 50 */
51 static kmem_cache_t *requestq_cachep; 51 static kmem_cache_t *requestq_cachep;
52 52
53 /* 53 /*
54 * For io context allocations 54 * For io context allocations
55 */ 55 */
56 static kmem_cache_t *iocontext_cachep; 56 static kmem_cache_t *iocontext_cachep;
57 57
58 static wait_queue_head_t congestion_wqh[2] = { 58 static wait_queue_head_t congestion_wqh[2] = {
59 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), 59 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
60 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) 60 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
61 }; 61 };
62 62
63 /* 63 /*
64 * Controlling structure to kblockd 64 * Controlling structure to kblockd
65 */ 65 */
66 static struct workqueue_struct *kblockd_workqueue; 66 static struct workqueue_struct *kblockd_workqueue;
67 67
68 unsigned long blk_max_low_pfn, blk_max_pfn; 68 unsigned long blk_max_low_pfn, blk_max_pfn;
69 69
70 EXPORT_SYMBOL(blk_max_low_pfn); 70 EXPORT_SYMBOL(blk_max_low_pfn);
71 EXPORT_SYMBOL(blk_max_pfn); 71 EXPORT_SYMBOL(blk_max_pfn);
72 72
73 static DEFINE_PER_CPU(struct list_head, blk_cpu_done); 73 static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
74 74
75 /* Amount of time in which a process may batch requests */ 75 /* Amount of time in which a process may batch requests */
76 #define BLK_BATCH_TIME (HZ/50UL) 76 #define BLK_BATCH_TIME (HZ/50UL)
77 77
78 /* Number of requests a "batching" process may submit */ 78 /* Number of requests a "batching" process may submit */
79 #define BLK_BATCH_REQ 32 79 #define BLK_BATCH_REQ 32
80 80
81 /* 81 /*
82 * Return the threshold (number of used requests) at which the queue is 82 * Return the threshold (number of used requests) at which the queue is
83 * considered to be congested. It include a little hysteresis to keep the 83 * considered to be congested. It include a little hysteresis to keep the
84 * context switch rate down. 84 * context switch rate down.
85 */ 85 */
86 static inline int queue_congestion_on_threshold(struct request_queue *q) 86 static inline int queue_congestion_on_threshold(struct request_queue *q)
87 { 87 {
88 return q->nr_congestion_on; 88 return q->nr_congestion_on;
89 } 89 }
90 90
91 /* 91 /*
92 * The threshold at which a queue is considered to be uncongested 92 * The threshold at which a queue is considered to be uncongested
93 */ 93 */
94 static inline int queue_congestion_off_threshold(struct request_queue *q) 94 static inline int queue_congestion_off_threshold(struct request_queue *q)
95 { 95 {
96 return q->nr_congestion_off; 96 return q->nr_congestion_off;
97 } 97 }
98 98
99 static void blk_queue_congestion_threshold(struct request_queue *q) 99 static void blk_queue_congestion_threshold(struct request_queue *q)
100 { 100 {
101 int nr; 101 int nr;
102 102
103 nr = q->nr_requests - (q->nr_requests / 8) + 1; 103 nr = q->nr_requests - (q->nr_requests / 8) + 1;
104 if (nr > q->nr_requests) 104 if (nr > q->nr_requests)
105 nr = q->nr_requests; 105 nr = q->nr_requests;
106 q->nr_congestion_on = nr; 106 q->nr_congestion_on = nr;
107 107
108 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; 108 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
109 if (nr < 1) 109 if (nr < 1)
110 nr = 1; 110 nr = 1;
111 q->nr_congestion_off = nr; 111 q->nr_congestion_off = nr;
112 } 112 }
113 113
114 /* 114 /*
115 * A queue has just exitted congestion. Note this in the global counter of 115 * A queue has just exitted congestion. Note this in the global counter of
116 * congested queues, and wake up anyone who was waiting for requests to be 116 * congested queues, and wake up anyone who was waiting for requests to be
117 * put back. 117 * put back.
118 */ 118 */
119 static void clear_queue_congested(request_queue_t *q, int rw) 119 static void clear_queue_congested(request_queue_t *q, int rw)
120 { 120 {
121 enum bdi_state bit; 121 enum bdi_state bit;
122 wait_queue_head_t *wqh = &congestion_wqh[rw]; 122 wait_queue_head_t *wqh = &congestion_wqh[rw];
123 123
124 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; 124 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
125 clear_bit(bit, &q->backing_dev_info.state); 125 clear_bit(bit, &q->backing_dev_info.state);
126 smp_mb__after_clear_bit(); 126 smp_mb__after_clear_bit();
127 if (waitqueue_active(wqh)) 127 if (waitqueue_active(wqh))
128 wake_up(wqh); 128 wake_up(wqh);
129 } 129 }
130 130
131 /* 131 /*
132 * A queue has just entered congestion. Flag that in the queue's VM-visible 132 * A queue has just entered congestion. Flag that in the queue's VM-visible
133 * state flags and increment the global gounter of congested queues. 133 * state flags and increment the global gounter of congested queues.
134 */ 134 */
135 static void set_queue_congested(request_queue_t *q, int rw) 135 static void set_queue_congested(request_queue_t *q, int rw)
136 { 136 {
137 enum bdi_state bit; 137 enum bdi_state bit;
138 138
139 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; 139 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
140 set_bit(bit, &q->backing_dev_info.state); 140 set_bit(bit, &q->backing_dev_info.state);
141 } 141 }
142 142
143 /** 143 /**
144 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info 144 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
145 * @bdev: device 145 * @bdev: device
146 * 146 *
147 * Locates the passed device's request queue and returns the address of its 147 * Locates the passed device's request queue and returns the address of its
148 * backing_dev_info 148 * backing_dev_info
149 * 149 *
150 * Will return NULL if the request queue cannot be located. 150 * Will return NULL if the request queue cannot be located.
151 */ 151 */
152 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) 152 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
153 { 153 {
154 struct backing_dev_info *ret = NULL; 154 struct backing_dev_info *ret = NULL;
155 request_queue_t *q = bdev_get_queue(bdev); 155 request_queue_t *q = bdev_get_queue(bdev);
156 156
157 if (q) 157 if (q)
158 ret = &q->backing_dev_info; 158 ret = &q->backing_dev_info;
159 return ret; 159 return ret;
160 } 160 }
161 161
162 EXPORT_SYMBOL(blk_get_backing_dev_info); 162 EXPORT_SYMBOL(blk_get_backing_dev_info);
163 163
164 void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data) 164 void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
165 { 165 {
166 q->activity_fn = fn; 166 q->activity_fn = fn;
167 q->activity_data = data; 167 q->activity_data = data;
168 } 168 }
169 169
170 EXPORT_SYMBOL(blk_queue_activity_fn); 170 EXPORT_SYMBOL(blk_queue_activity_fn);
171 171
172 /** 172 /**
173 * blk_queue_prep_rq - set a prepare_request function for queue 173 * blk_queue_prep_rq - set a prepare_request function for queue
174 * @q: queue 174 * @q: queue
175 * @pfn: prepare_request function 175 * @pfn: prepare_request function
176 * 176 *
177 * It's possible for a queue to register a prepare_request callback which 177 * It's possible for a queue to register a prepare_request callback which
178 * is invoked before the request is handed to the request_fn. The goal of 178 * is invoked before the request is handed to the request_fn. The goal of
179 * the function is to prepare a request for I/O, it can be used to build a 179 * the function is to prepare a request for I/O, it can be used to build a
180 * cdb from the request data for instance. 180 * cdb from the request data for instance.
181 * 181 *
182 */ 182 */
183 void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn) 183 void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)
184 { 184 {
185 q->prep_rq_fn = pfn; 185 q->prep_rq_fn = pfn;
186 } 186 }
187 187
188 EXPORT_SYMBOL(blk_queue_prep_rq); 188 EXPORT_SYMBOL(blk_queue_prep_rq);
189 189
190 /** 190 /**
191 * blk_queue_merge_bvec - set a merge_bvec function for queue 191 * blk_queue_merge_bvec - set a merge_bvec function for queue
192 * @q: queue 192 * @q: queue
193 * @mbfn: merge_bvec_fn 193 * @mbfn: merge_bvec_fn
194 * 194 *
195 * Usually queues have static limitations on the max sectors or segments that 195 * Usually queues have static limitations on the max sectors or segments that
196 * we can put in a request. Stacking drivers may have some settings that 196 * we can put in a request. Stacking drivers may have some settings that
197 * are dynamic, and thus we have to query the queue whether it is ok to 197 * are dynamic, and thus we have to query the queue whether it is ok to
198 * add a new bio_vec to a bio at a given offset or not. If the block device 198 * add a new bio_vec to a bio at a given offset or not. If the block device
199 * has such limitations, it needs to register a merge_bvec_fn to control 199 * has such limitations, it needs to register a merge_bvec_fn to control
200 * the size of bio's sent to it. Note that a block device *must* allow a 200 * the size of bio's sent to it. Note that a block device *must* allow a
201 * single page to be added to an empty bio. The block device driver may want 201 * single page to be added to an empty bio. The block device driver may want
202 * to use the bio_split() function to deal with these bio's. By default 202 * to use the bio_split() function to deal with these bio's. By default
203 * no merge_bvec_fn is defined for a queue, and only the fixed limits are 203 * no merge_bvec_fn is defined for a queue, and only the fixed limits are
204 * honored. 204 * honored.
205 */ 205 */
206 void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) 206 void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
207 { 207 {
208 q->merge_bvec_fn = mbfn; 208 q->merge_bvec_fn = mbfn;
209 } 209 }
210 210
211 EXPORT_SYMBOL(blk_queue_merge_bvec); 211 EXPORT_SYMBOL(blk_queue_merge_bvec);
212 212
213 void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) 213 void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)
214 { 214 {
215 q->softirq_done_fn = fn; 215 q->softirq_done_fn = fn;
216 } 216 }
217 217
218 EXPORT_SYMBOL(blk_queue_softirq_done); 218 EXPORT_SYMBOL(blk_queue_softirq_done);
219 219
220 /** 220 /**
221 * blk_queue_make_request - define an alternate make_request function for a device 221 * blk_queue_make_request - define an alternate make_request function for a device
222 * @q: the request queue for the device to be affected 222 * @q: the request queue for the device to be affected
223 * @mfn: the alternate make_request function 223 * @mfn: the alternate make_request function
224 * 224 *
225 * Description: 225 * Description:
226 * The normal way for &struct bios to be passed to a device 226 * The normal way for &struct bios to be passed to a device
227 * driver is for them to be collected into requests on a request 227 * driver is for them to be collected into requests on a request
228 * queue, and then to allow the device driver to select requests 228 * queue, and then to allow the device driver to select requests
229 * off that queue when it is ready. This works well for many block 229 * off that queue when it is ready. This works well for many block
230 * devices. However some block devices (typically virtual devices 230 * devices. However some block devices (typically virtual devices
231 * such as md or lvm) do not benefit from the processing on the 231 * such as md or lvm) do not benefit from the processing on the
232 * request queue, and are served best by having the requests passed 232 * request queue, and are served best by having the requests passed
233 * directly to them. This can be achieved by providing a function 233 * directly to them. This can be achieved by providing a function
234 * to blk_queue_make_request(). 234 * to blk_queue_make_request().
235 * 235 *
236 * Caveat: 236 * Caveat:
237 * The driver that does this *must* be able to deal appropriately 237 * The driver that does this *must* be able to deal appropriately
238 * with buffers in "highmemory". This can be accomplished by either calling 238 * with buffers in "highmemory". This can be accomplished by either calling
239 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling 239 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
240 * blk_queue_bounce() to create a buffer in normal memory. 240 * blk_queue_bounce() to create a buffer in normal memory.
241 **/ 241 **/
242 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) 242 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
243 { 243 {
244 /* 244 /*
245 * set defaults 245 * set defaults
246 */ 246 */
247 q->nr_requests = BLKDEV_MAX_RQ; 247 q->nr_requests = BLKDEV_MAX_RQ;
248 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); 248 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
249 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); 249 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
250 q->make_request_fn = mfn; 250 q->make_request_fn = mfn;
251 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 251 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
252 q->backing_dev_info.state = 0; 252 q->backing_dev_info.state = 0;
253 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 253 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
254 blk_queue_max_sectors(q, SAFE_MAX_SECTORS); 254 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
255 blk_queue_hardsect_size(q, 512); 255 blk_queue_hardsect_size(q, 512);
256 blk_queue_dma_alignment(q, 511); 256 blk_queue_dma_alignment(q, 511);
257 blk_queue_congestion_threshold(q); 257 blk_queue_congestion_threshold(q);
258 q->nr_batching = BLK_BATCH_REQ; 258 q->nr_batching = BLK_BATCH_REQ;
259 259
260 q->unplug_thresh = 4; /* hmm */ 260 q->unplug_thresh = 4; /* hmm */
261 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ 261 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
262 if (q->unplug_delay == 0) 262 if (q->unplug_delay == 0)
263 q->unplug_delay = 1; 263 q->unplug_delay = 1;
264 264
265 INIT_WORK(&q->unplug_work, blk_unplug_work, q); 265 INIT_WORK(&q->unplug_work, blk_unplug_work, q);
266 266
267 q->unplug_timer.function = blk_unplug_timeout; 267 q->unplug_timer.function = blk_unplug_timeout;
268 q->unplug_timer.data = (unsigned long)q; 268 q->unplug_timer.data = (unsigned long)q;
269 269
270 /* 270 /*
271 * by default assume old behaviour and bounce for any highmem page 271 * by default assume old behaviour and bounce for any highmem page
272 */ 272 */
273 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 273 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
274 274
275 blk_queue_activity_fn(q, NULL, NULL); 275 blk_queue_activity_fn(q, NULL, NULL);
276 } 276 }
277 277
278 EXPORT_SYMBOL(blk_queue_make_request); 278 EXPORT_SYMBOL(blk_queue_make_request);
279 279
280 static inline void rq_init(request_queue_t *q, struct request *rq) 280 static inline void rq_init(request_queue_t *q, struct request *rq)
281 { 281 {
282 INIT_LIST_HEAD(&rq->queuelist); 282 INIT_LIST_HEAD(&rq->queuelist);
283 INIT_LIST_HEAD(&rq->donelist); 283 INIT_LIST_HEAD(&rq->donelist);
284 INIT_HLIST_NODE(&rq->hash);
285 284
286 rq->errors = 0; 285 rq->errors = 0;
287 rq->rq_status = RQ_ACTIVE; 286 rq->rq_status = RQ_ACTIVE;
288 rq->bio = rq->biotail = NULL; 287 rq->bio = rq->biotail = NULL;
288 INIT_HLIST_NODE(&rq->hash);
289 RB_CLEAR_NODE(&rq->rb_node);
289 rq->ioprio = 0; 290 rq->ioprio = 0;
290 rq->buffer = NULL; 291 rq->buffer = NULL;
291 rq->ref_count = 1; 292 rq->ref_count = 1;
292 rq->q = q; 293 rq->q = q;
293 rq->waiting = NULL; 294 rq->waiting = NULL;
294 rq->special = NULL; 295 rq->special = NULL;
295 rq->data_len = 0; 296 rq->data_len = 0;
296 rq->data = NULL; 297 rq->data = NULL;
297 rq->nr_phys_segments = 0; 298 rq->nr_phys_segments = 0;
298 rq->sense = NULL; 299 rq->sense = NULL;
299 rq->end_io = NULL; 300 rq->end_io = NULL;
300 rq->end_io_data = NULL; 301 rq->end_io_data = NULL;
301 rq->completion_data = NULL; 302 rq->completion_data = NULL;
302 } 303 }
303 304
304 /** 305 /**
305 * blk_queue_ordered - does this queue support ordered writes 306 * blk_queue_ordered - does this queue support ordered writes
306 * @q: the request queue 307 * @q: the request queue
307 * @ordered: one of QUEUE_ORDERED_* 308 * @ordered: one of QUEUE_ORDERED_*
308 * @prepare_flush_fn: rq setup helper for cache flush ordered writes 309 * @prepare_flush_fn: rq setup helper for cache flush ordered writes
309 * 310 *
310 * Description: 311 * Description:
311 * For journalled file systems, doing ordered writes on a commit 312 * For journalled file systems, doing ordered writes on a commit
312 * block instead of explicitly doing wait_on_buffer (which is bad 313 * block instead of explicitly doing wait_on_buffer (which is bad
313 * for performance) can be a big win. Block drivers supporting this 314 * for performance) can be a big win. Block drivers supporting this
314 * feature should call this function and indicate so. 315 * feature should call this function and indicate so.
315 * 316 *
316 **/ 317 **/
317 int blk_queue_ordered(request_queue_t *q, unsigned ordered, 318 int blk_queue_ordered(request_queue_t *q, unsigned ordered,
318 prepare_flush_fn *prepare_flush_fn) 319 prepare_flush_fn *prepare_flush_fn)
319 { 320 {
320 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && 321 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
321 prepare_flush_fn == NULL) { 322 prepare_flush_fn == NULL) {
322 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); 323 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
323 return -EINVAL; 324 return -EINVAL;
324 } 325 }
325 326
326 if (ordered != QUEUE_ORDERED_NONE && 327 if (ordered != QUEUE_ORDERED_NONE &&
327 ordered != QUEUE_ORDERED_DRAIN && 328 ordered != QUEUE_ORDERED_DRAIN &&
328 ordered != QUEUE_ORDERED_DRAIN_FLUSH && 329 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
329 ordered != QUEUE_ORDERED_DRAIN_FUA && 330 ordered != QUEUE_ORDERED_DRAIN_FUA &&
330 ordered != QUEUE_ORDERED_TAG && 331 ordered != QUEUE_ORDERED_TAG &&
331 ordered != QUEUE_ORDERED_TAG_FLUSH && 332 ordered != QUEUE_ORDERED_TAG_FLUSH &&
332 ordered != QUEUE_ORDERED_TAG_FUA) { 333 ordered != QUEUE_ORDERED_TAG_FUA) {
333 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); 334 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
334 return -EINVAL; 335 return -EINVAL;
335 } 336 }
336 337
337 q->ordered = ordered; 338 q->ordered = ordered;
338 q->next_ordered = ordered; 339 q->next_ordered = ordered;
339 q->prepare_flush_fn = prepare_flush_fn; 340 q->prepare_flush_fn = prepare_flush_fn;
340 341
341 return 0; 342 return 0;
342 } 343 }
343 344
344 EXPORT_SYMBOL(blk_queue_ordered); 345 EXPORT_SYMBOL(blk_queue_ordered);
345 346
346 /** 347 /**
347 * blk_queue_issue_flush_fn - set function for issuing a flush 348 * blk_queue_issue_flush_fn - set function for issuing a flush
348 * @q: the request queue 349 * @q: the request queue
349 * @iff: the function to be called issuing the flush 350 * @iff: the function to be called issuing the flush
350 * 351 *
351 * Description: 352 * Description:
352 * If a driver supports issuing a flush command, the support is notified 353 * If a driver supports issuing a flush command, the support is notified
353 * to the block layer by defining it through this call. 354 * to the block layer by defining it through this call.
354 * 355 *
355 **/ 356 **/
356 void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff) 357 void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
357 { 358 {
358 q->issue_flush_fn = iff; 359 q->issue_flush_fn = iff;
359 } 360 }
360 361
361 EXPORT_SYMBOL(blk_queue_issue_flush_fn); 362 EXPORT_SYMBOL(blk_queue_issue_flush_fn);
362 363
363 /* 364 /*
364 * Cache flushing for ordered writes handling 365 * Cache flushing for ordered writes handling
365 */ 366 */
366 inline unsigned blk_ordered_cur_seq(request_queue_t *q) 367 inline unsigned blk_ordered_cur_seq(request_queue_t *q)
367 { 368 {
368 if (!q->ordseq) 369 if (!q->ordseq)
369 return 0; 370 return 0;
370 return 1 << ffz(q->ordseq); 371 return 1 << ffz(q->ordseq);
371 } 372 }
372 373
373 unsigned blk_ordered_req_seq(struct request *rq) 374 unsigned blk_ordered_req_seq(struct request *rq)
374 { 375 {
375 request_queue_t *q = rq->q; 376 request_queue_t *q = rq->q;
376 377
377 BUG_ON(q->ordseq == 0); 378 BUG_ON(q->ordseq == 0);
378 379
379 if (rq == &q->pre_flush_rq) 380 if (rq == &q->pre_flush_rq)
380 return QUEUE_ORDSEQ_PREFLUSH; 381 return QUEUE_ORDSEQ_PREFLUSH;
381 if (rq == &q->bar_rq) 382 if (rq == &q->bar_rq)
382 return QUEUE_ORDSEQ_BAR; 383 return QUEUE_ORDSEQ_BAR;
383 if (rq == &q->post_flush_rq) 384 if (rq == &q->post_flush_rq)
384 return QUEUE_ORDSEQ_POSTFLUSH; 385 return QUEUE_ORDSEQ_POSTFLUSH;
385 386
386 if ((rq->cmd_flags & REQ_ORDERED_COLOR) == 387 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
387 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) 388 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
388 return QUEUE_ORDSEQ_DRAIN; 389 return QUEUE_ORDSEQ_DRAIN;
389 else 390 else
390 return QUEUE_ORDSEQ_DONE; 391 return QUEUE_ORDSEQ_DONE;
391 } 392 }
392 393
393 void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) 394 void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
394 { 395 {
395 struct request *rq; 396 struct request *rq;
396 int uptodate; 397 int uptodate;
397 398
398 if (error && !q->orderr) 399 if (error && !q->orderr)
399 q->orderr = error; 400 q->orderr = error;
400 401
401 BUG_ON(q->ordseq & seq); 402 BUG_ON(q->ordseq & seq);
402 q->ordseq |= seq; 403 q->ordseq |= seq;
403 404
404 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 405 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
405 return; 406 return;
406 407
407 /* 408 /*
408 * Okay, sequence complete. 409 * Okay, sequence complete.
409 */ 410 */
410 rq = q->orig_bar_rq; 411 rq = q->orig_bar_rq;
411 uptodate = q->orderr ? q->orderr : 1; 412 uptodate = q->orderr ? q->orderr : 1;
412 413
413 q->ordseq = 0; 414 q->ordseq = 0;
414 415
415 end_that_request_first(rq, uptodate, rq->hard_nr_sectors); 416 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
416 end_that_request_last(rq, uptodate); 417 end_that_request_last(rq, uptodate);
417 } 418 }
418 419
419 static void pre_flush_end_io(struct request *rq, int error) 420 static void pre_flush_end_io(struct request *rq, int error)
420 { 421 {
421 elv_completed_request(rq->q, rq); 422 elv_completed_request(rq->q, rq);
422 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); 423 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
423 } 424 }
424 425
425 static void bar_end_io(struct request *rq, int error) 426 static void bar_end_io(struct request *rq, int error)
426 { 427 {
427 elv_completed_request(rq->q, rq); 428 elv_completed_request(rq->q, rq);
428 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); 429 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
429 } 430 }
430 431
431 static void post_flush_end_io(struct request *rq, int error) 432 static void post_flush_end_io(struct request *rq, int error)
432 { 433 {
433 elv_completed_request(rq->q, rq); 434 elv_completed_request(rq->q, rq);
434 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 435 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
435 } 436 }
436 437
437 static void queue_flush(request_queue_t *q, unsigned which) 438 static void queue_flush(request_queue_t *q, unsigned which)
438 { 439 {
439 struct request *rq; 440 struct request *rq;
440 rq_end_io_fn *end_io; 441 rq_end_io_fn *end_io;
441 442
442 if (which == QUEUE_ORDERED_PREFLUSH) { 443 if (which == QUEUE_ORDERED_PREFLUSH) {
443 rq = &q->pre_flush_rq; 444 rq = &q->pre_flush_rq;
444 end_io = pre_flush_end_io; 445 end_io = pre_flush_end_io;
445 } else { 446 } else {
446 rq = &q->post_flush_rq; 447 rq = &q->post_flush_rq;
447 end_io = post_flush_end_io; 448 end_io = post_flush_end_io;
448 } 449 }
449 450
450 rq->cmd_flags = REQ_HARDBARRIER; 451 rq->cmd_flags = REQ_HARDBARRIER;
451 rq_init(q, rq); 452 rq_init(q, rq);
452 rq->elevator_private = NULL; 453 rq->elevator_private = NULL;
453 rq->rq_disk = q->bar_rq.rq_disk; 454 rq->rq_disk = q->bar_rq.rq_disk;
454 rq->rl = NULL; 455 rq->rl = NULL;
455 rq->end_io = end_io; 456 rq->end_io = end_io;
456 q->prepare_flush_fn(q, rq); 457 q->prepare_flush_fn(q, rq);
457 458
458 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 459 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
459 } 460 }
460 461
461 static inline struct request *start_ordered(request_queue_t *q, 462 static inline struct request *start_ordered(request_queue_t *q,
462 struct request *rq) 463 struct request *rq)
463 { 464 {
464 q->bi_size = 0; 465 q->bi_size = 0;
465 q->orderr = 0; 466 q->orderr = 0;
466 q->ordered = q->next_ordered; 467 q->ordered = q->next_ordered;
467 q->ordseq |= QUEUE_ORDSEQ_STARTED; 468 q->ordseq |= QUEUE_ORDSEQ_STARTED;
468 469
469 /* 470 /*
470 * Prep proxy barrier request. 471 * Prep proxy barrier request.
471 */ 472 */
472 blkdev_dequeue_request(rq); 473 blkdev_dequeue_request(rq);
473 q->orig_bar_rq = rq; 474 q->orig_bar_rq = rq;
474 rq = &q->bar_rq; 475 rq = &q->bar_rq;
475 rq->cmd_flags = 0; 476 rq->cmd_flags = 0;
476 rq_init(q, rq); 477 rq_init(q, rq);
477 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 478 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
478 rq->cmd_flags |= REQ_RW; 479 rq->cmd_flags |= REQ_RW;
479 rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; 480 rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
480 rq->elevator_private = NULL; 481 rq->elevator_private = NULL;
481 rq->rl = NULL; 482 rq->rl = NULL;
482 init_request_from_bio(rq, q->orig_bar_rq->bio); 483 init_request_from_bio(rq, q->orig_bar_rq->bio);
483 rq->end_io = bar_end_io; 484 rq->end_io = bar_end_io;
484 485
485 /* 486 /*
486 * Queue ordered sequence. As we stack them at the head, we 487 * Queue ordered sequence. As we stack them at the head, we
487 * need to queue in reverse order. Note that we rely on that 488 * need to queue in reverse order. Note that we rely on that
488 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 489 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
489 * request gets inbetween ordered sequence. 490 * request gets inbetween ordered sequence.
490 */ 491 */
491 if (q->ordered & QUEUE_ORDERED_POSTFLUSH) 492 if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
492 queue_flush(q, QUEUE_ORDERED_POSTFLUSH); 493 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
493 else 494 else
494 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; 495 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
495 496
496 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 497 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
497 498
498 if (q->ordered & QUEUE_ORDERED_PREFLUSH) { 499 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
499 queue_flush(q, QUEUE_ORDERED_PREFLUSH); 500 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
500 rq = &q->pre_flush_rq; 501 rq = &q->pre_flush_rq;
501 } else 502 } else
502 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; 503 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
503 504
504 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) 505 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
505 q->ordseq |= QUEUE_ORDSEQ_DRAIN; 506 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
506 else 507 else
507 rq = NULL; 508 rq = NULL;
508 509
509 return rq; 510 return rq;
510 } 511 }
511 512
512 int blk_do_ordered(request_queue_t *q, struct request **rqp) 513 int blk_do_ordered(request_queue_t *q, struct request **rqp)
513 { 514 {
514 struct request *rq = *rqp; 515 struct request *rq = *rqp;
515 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 516 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
516 517
517 if (!q->ordseq) { 518 if (!q->ordseq) {
518 if (!is_barrier) 519 if (!is_barrier)
519 return 1; 520 return 1;
520 521
521 if (q->next_ordered != QUEUE_ORDERED_NONE) { 522 if (q->next_ordered != QUEUE_ORDERED_NONE) {
522 *rqp = start_ordered(q, rq); 523 *rqp = start_ordered(q, rq);
523 return 1; 524 return 1;
524 } else { 525 } else {
525 /* 526 /*
526 * This can happen when the queue switches to 527 * This can happen when the queue switches to
527 * ORDERED_NONE while this request is on it. 528 * ORDERED_NONE while this request is on it.
528 */ 529 */
529 blkdev_dequeue_request(rq); 530 blkdev_dequeue_request(rq);
530 end_that_request_first(rq, -EOPNOTSUPP, 531 end_that_request_first(rq, -EOPNOTSUPP,
531 rq->hard_nr_sectors); 532 rq->hard_nr_sectors);
532 end_that_request_last(rq, -EOPNOTSUPP); 533 end_that_request_last(rq, -EOPNOTSUPP);
533 *rqp = NULL; 534 *rqp = NULL;
534 return 0; 535 return 0;
535 } 536 }
536 } 537 }
537 538
538 /* 539 /*
539 * Ordered sequence in progress 540 * Ordered sequence in progress
540 */ 541 */
541 542
542 /* Special requests are not subject to ordering rules. */ 543 /* Special requests are not subject to ordering rules. */
543 if (!blk_fs_request(rq) && 544 if (!blk_fs_request(rq) &&
544 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 545 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
545 return 1; 546 return 1;
546 547
547 if (q->ordered & QUEUE_ORDERED_TAG) { 548 if (q->ordered & QUEUE_ORDERED_TAG) {
548 /* Ordered by tag. Blocking the next barrier is enough. */ 549 /* Ordered by tag. Blocking the next barrier is enough. */
549 if (is_barrier && rq != &q->bar_rq) 550 if (is_barrier && rq != &q->bar_rq)
550 *rqp = NULL; 551 *rqp = NULL;
551 } else { 552 } else {
552 /* Ordered by draining. Wait for turn. */ 553 /* Ordered by draining. Wait for turn. */
553 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 554 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
554 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 555 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
555 *rqp = NULL; 556 *rqp = NULL;
556 } 557 }
557 558
558 return 1; 559 return 1;
559 } 560 }
560 561
561 static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) 562 static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
562 { 563 {
563 request_queue_t *q = bio->bi_private; 564 request_queue_t *q = bio->bi_private;
564 struct bio_vec *bvec; 565 struct bio_vec *bvec;
565 int i; 566 int i;
566 567
567 /* 568 /*
568 * This is dry run, restore bio_sector and size. We'll finish 569 * This is dry run, restore bio_sector and size. We'll finish
569 * this request again with the original bi_end_io after an 570 * this request again with the original bi_end_io after an
570 * error occurs or post flush is complete. 571 * error occurs or post flush is complete.
571 */ 572 */
572 q->bi_size += bytes; 573 q->bi_size += bytes;
573 574
574 if (bio->bi_size) 575 if (bio->bi_size)
575 return 1; 576 return 1;
576 577
577 /* Rewind bvec's */ 578 /* Rewind bvec's */
578 bio->bi_idx = 0; 579 bio->bi_idx = 0;
579 bio_for_each_segment(bvec, bio, i) { 580 bio_for_each_segment(bvec, bio, i) {
580 bvec->bv_len += bvec->bv_offset; 581 bvec->bv_len += bvec->bv_offset;
581 bvec->bv_offset = 0; 582 bvec->bv_offset = 0;
582 } 583 }
583 584
584 /* Reset bio */ 585 /* Reset bio */
585 set_bit(BIO_UPTODATE, &bio->bi_flags); 586 set_bit(BIO_UPTODATE, &bio->bi_flags);
586 bio->bi_size = q->bi_size; 587 bio->bi_size = q->bi_size;
587 bio->bi_sector -= (q->bi_size >> 9); 588 bio->bi_sector -= (q->bi_size >> 9);
588 q->bi_size = 0; 589 q->bi_size = 0;
589 590
590 return 0; 591 return 0;
591 } 592 }
592 593
593 static inline int ordered_bio_endio(struct request *rq, struct bio *bio, 594 static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
594 unsigned int nbytes, int error) 595 unsigned int nbytes, int error)
595 { 596 {
596 request_queue_t *q = rq->q; 597 request_queue_t *q = rq->q;
597 bio_end_io_t *endio; 598 bio_end_io_t *endio;
598 void *private; 599 void *private;
599 600
600 if (&q->bar_rq != rq) 601 if (&q->bar_rq != rq)
601 return 0; 602 return 0;
602 603
603 /* 604 /*
604 * Okay, this is the barrier request in progress, dry finish it. 605 * Okay, this is the barrier request in progress, dry finish it.
605 */ 606 */
606 if (error && !q->orderr) 607 if (error && !q->orderr)
607 q->orderr = error; 608 q->orderr = error;
608 609
609 endio = bio->bi_end_io; 610 endio = bio->bi_end_io;
610 private = bio->bi_private; 611 private = bio->bi_private;
611 bio->bi_end_io = flush_dry_bio_endio; 612 bio->bi_end_io = flush_dry_bio_endio;
612 bio->bi_private = q; 613 bio->bi_private = q;
613 614
614 bio_endio(bio, nbytes, error); 615 bio_endio(bio, nbytes, error);
615 616
616 bio->bi_end_io = endio; 617 bio->bi_end_io = endio;
617 bio->bi_private = private; 618 bio->bi_private = private;
618 619
619 return 1; 620 return 1;
620 } 621 }
621 622
622 /** 623 /**
623 * blk_queue_bounce_limit - set bounce buffer limit for queue 624 * blk_queue_bounce_limit - set bounce buffer limit for queue
624 * @q: the request queue for the device 625 * @q: the request queue for the device
625 * @dma_addr: bus address limit 626 * @dma_addr: bus address limit
626 * 627 *
627 * Description: 628 * Description:
628 * Different hardware can have different requirements as to what pages 629 * Different hardware can have different requirements as to what pages
629 * it can do I/O directly to. A low level driver can call 630 * it can do I/O directly to. A low level driver can call
630 * blk_queue_bounce_limit to have lower memory pages allocated as bounce 631 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
631 * buffers for doing I/O to pages residing above @page. 632 * buffers for doing I/O to pages residing above @page.
632 **/ 633 **/
633 void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) 634 void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
634 { 635 {
635 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; 636 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
636 int dma = 0; 637 int dma = 0;
637 638
638 q->bounce_gfp = GFP_NOIO; 639 q->bounce_gfp = GFP_NOIO;
639 #if BITS_PER_LONG == 64 640 #if BITS_PER_LONG == 64
640 /* Assume anything <= 4GB can be handled by IOMMU. 641 /* Assume anything <= 4GB can be handled by IOMMU.
641 Actually some IOMMUs can handle everything, but I don't 642 Actually some IOMMUs can handle everything, but I don't
642 know of a way to test this here. */ 643 know of a way to test this here. */
643 if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) 644 if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
644 dma = 1; 645 dma = 1;
645 q->bounce_pfn = max_low_pfn; 646 q->bounce_pfn = max_low_pfn;
646 #else 647 #else
647 if (bounce_pfn < blk_max_low_pfn) 648 if (bounce_pfn < blk_max_low_pfn)
648 dma = 1; 649 dma = 1;
649 q->bounce_pfn = bounce_pfn; 650 q->bounce_pfn = bounce_pfn;
650 #endif 651 #endif
651 if (dma) { 652 if (dma) {
652 init_emergency_isa_pool(); 653 init_emergency_isa_pool();
653 q->bounce_gfp = GFP_NOIO | GFP_DMA; 654 q->bounce_gfp = GFP_NOIO | GFP_DMA;
654 q->bounce_pfn = bounce_pfn; 655 q->bounce_pfn = bounce_pfn;
655 } 656 }
656 } 657 }
657 658
658 EXPORT_SYMBOL(blk_queue_bounce_limit); 659 EXPORT_SYMBOL(blk_queue_bounce_limit);
659 660
660 /** 661 /**
661 * blk_queue_max_sectors - set max sectors for a request for this queue 662 * blk_queue_max_sectors - set max sectors for a request for this queue
662 * @q: the request queue for the device 663 * @q: the request queue for the device
663 * @max_sectors: max sectors in the usual 512b unit 664 * @max_sectors: max sectors in the usual 512b unit
664 * 665 *
665 * Description: 666 * Description:
666 * Enables a low level driver to set an upper limit on the size of 667 * Enables a low level driver to set an upper limit on the size of
667 * received requests. 668 * received requests.
668 **/ 669 **/
669 void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors) 670 void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors)
670 { 671 {
671 if ((max_sectors << 9) < PAGE_CACHE_SIZE) { 672 if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
672 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); 673 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
673 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); 674 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
674 } 675 }
675 676
676 if (BLK_DEF_MAX_SECTORS > max_sectors) 677 if (BLK_DEF_MAX_SECTORS > max_sectors)
677 q->max_hw_sectors = q->max_sectors = max_sectors; 678 q->max_hw_sectors = q->max_sectors = max_sectors;
678 else { 679 else {
679 q->max_sectors = BLK_DEF_MAX_SECTORS; 680 q->max_sectors = BLK_DEF_MAX_SECTORS;
680 q->max_hw_sectors = max_sectors; 681 q->max_hw_sectors = max_sectors;
681 } 682 }
682 } 683 }
683 684
684 EXPORT_SYMBOL(blk_queue_max_sectors); 685 EXPORT_SYMBOL(blk_queue_max_sectors);
685 686
686 /** 687 /**
687 * blk_queue_max_phys_segments - set max phys segments for a request for this queue 688 * blk_queue_max_phys_segments - set max phys segments for a request for this queue
688 * @q: the request queue for the device 689 * @q: the request queue for the device
689 * @max_segments: max number of segments 690 * @max_segments: max number of segments
690 * 691 *
691 * Description: 692 * Description:
692 * Enables a low level driver to set an upper limit on the number of 693 * Enables a low level driver to set an upper limit on the number of
693 * physical data segments in a request. This would be the largest sized 694 * physical data segments in a request. This would be the largest sized
694 * scatter list the driver could handle. 695 * scatter list the driver could handle.
695 **/ 696 **/
696 void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments) 697 void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)
697 { 698 {
698 if (!max_segments) { 699 if (!max_segments) {
699 max_segments = 1; 700 max_segments = 1;
700 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 701 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
701 } 702 }
702 703
703 q->max_phys_segments = max_segments; 704 q->max_phys_segments = max_segments;
704 } 705 }
705 706
706 EXPORT_SYMBOL(blk_queue_max_phys_segments); 707 EXPORT_SYMBOL(blk_queue_max_phys_segments);
707 708
708 /** 709 /**
709 * blk_queue_max_hw_segments - set max hw segments for a request for this queue 710 * blk_queue_max_hw_segments - set max hw segments for a request for this queue
710 * @q: the request queue for the device 711 * @q: the request queue for the device
711 * @max_segments: max number of segments 712 * @max_segments: max number of segments
712 * 713 *
713 * Description: 714 * Description:
714 * Enables a low level driver to set an upper limit on the number of 715 * Enables a low level driver to set an upper limit on the number of
715 * hw data segments in a request. This would be the largest number of 716 * hw data segments in a request. This would be the largest number of
716 * address/length pairs the host adapter can actually give as once 717 * address/length pairs the host adapter can actually give as once
717 * to the device. 718 * to the device.
718 **/ 719 **/
719 void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments) 720 void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)
720 { 721 {
721 if (!max_segments) { 722 if (!max_segments) {
722 max_segments = 1; 723 max_segments = 1;
723 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 724 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
724 } 725 }
725 726
726 q->max_hw_segments = max_segments; 727 q->max_hw_segments = max_segments;
727 } 728 }
728 729
729 EXPORT_SYMBOL(blk_queue_max_hw_segments); 730 EXPORT_SYMBOL(blk_queue_max_hw_segments);
730 731
731 /** 732 /**
732 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg 733 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
733 * @q: the request queue for the device 734 * @q: the request queue for the device
734 * @max_size: max size of segment in bytes 735 * @max_size: max size of segment in bytes
735 * 736 *
736 * Description: 737 * Description:
737 * Enables a low level driver to set an upper limit on the size of a 738 * Enables a low level driver to set an upper limit on the size of a
738 * coalesced segment 739 * coalesced segment
739 **/ 740 **/
740 void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size) 741 void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)
741 { 742 {
742 if (max_size < PAGE_CACHE_SIZE) { 743 if (max_size < PAGE_CACHE_SIZE) {
743 max_size = PAGE_CACHE_SIZE; 744 max_size = PAGE_CACHE_SIZE;
744 printk("%s: set to minimum %d\n", __FUNCTION__, max_size); 745 printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
745 } 746 }
746 747
747 q->max_segment_size = max_size; 748 q->max_segment_size = max_size;
748 } 749 }
749 750
750 EXPORT_SYMBOL(blk_queue_max_segment_size); 751 EXPORT_SYMBOL(blk_queue_max_segment_size);
751 752
752 /** 753 /**
753 * blk_queue_hardsect_size - set hardware sector size for the queue 754 * blk_queue_hardsect_size - set hardware sector size for the queue
754 * @q: the request queue for the device 755 * @q: the request queue for the device
755 * @size: the hardware sector size, in bytes 756 * @size: the hardware sector size, in bytes
756 * 757 *
757 * Description: 758 * Description:
758 * This should typically be set to the lowest possible sector size 759 * This should typically be set to the lowest possible sector size
759 * that the hardware can operate on (possible without reverting to 760 * that the hardware can operate on (possible without reverting to
760 * even internal read-modify-write operations). Usually the default 761 * even internal read-modify-write operations). Usually the default
761 * of 512 covers most hardware. 762 * of 512 covers most hardware.
762 **/ 763 **/
763 void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) 764 void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)
764 { 765 {
765 q->hardsect_size = size; 766 q->hardsect_size = size;
766 } 767 }
767 768
768 EXPORT_SYMBOL(blk_queue_hardsect_size); 769 EXPORT_SYMBOL(blk_queue_hardsect_size);
769 770
770 /* 771 /*
771 * Returns the minimum that is _not_ zero, unless both are zero. 772 * Returns the minimum that is _not_ zero, unless both are zero.
772 */ 773 */
773 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 774 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
774 775
775 /** 776 /**
776 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers 777 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
777 * @t: the stacking driver (top) 778 * @t: the stacking driver (top)
778 * @b: the underlying device (bottom) 779 * @b: the underlying device (bottom)
779 **/ 780 **/
780 void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) 781 void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
781 { 782 {
782 /* zero is "infinity" */ 783 /* zero is "infinity" */
783 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); 784 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
784 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); 785 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
785 786
786 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); 787 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
787 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); 788 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
788 t->max_segment_size = min(t->max_segment_size,b->max_segment_size); 789 t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
789 t->hardsect_size = max(t->hardsect_size,b->hardsect_size); 790 t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
790 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) 791 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
791 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); 792 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
792 } 793 }
793 794
794 EXPORT_SYMBOL(blk_queue_stack_limits); 795 EXPORT_SYMBOL(blk_queue_stack_limits);
795 796
796 /** 797 /**
797 * blk_queue_segment_boundary - set boundary rules for segment merging 798 * blk_queue_segment_boundary - set boundary rules for segment merging
798 * @q: the request queue for the device 799 * @q: the request queue for the device
799 * @mask: the memory boundary mask 800 * @mask: the memory boundary mask
800 **/ 801 **/
801 void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask) 802 void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)
802 { 803 {
803 if (mask < PAGE_CACHE_SIZE - 1) { 804 if (mask < PAGE_CACHE_SIZE - 1) {
804 mask = PAGE_CACHE_SIZE - 1; 805 mask = PAGE_CACHE_SIZE - 1;
805 printk("%s: set to minimum %lx\n", __FUNCTION__, mask); 806 printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
806 } 807 }
807 808
808 q->seg_boundary_mask = mask; 809 q->seg_boundary_mask = mask;
809 } 810 }
810 811
811 EXPORT_SYMBOL(blk_queue_segment_boundary); 812 EXPORT_SYMBOL(blk_queue_segment_boundary);
812 813
813 /** 814 /**
814 * blk_queue_dma_alignment - set dma length and memory alignment 815 * blk_queue_dma_alignment - set dma length and memory alignment
815 * @q: the request queue for the device 816 * @q: the request queue for the device
816 * @mask: alignment mask 817 * @mask: alignment mask
817 * 818 *
818 * description: 819 * description:
819 * set required memory and length aligment for direct dma transactions. 820 * set required memory and length aligment for direct dma transactions.
820 * this is used when buiding direct io requests for the queue. 821 * this is used when buiding direct io requests for the queue.
821 * 822 *
822 **/ 823 **/
823 void blk_queue_dma_alignment(request_queue_t *q, int mask) 824 void blk_queue_dma_alignment(request_queue_t *q, int mask)
824 { 825 {
825 q->dma_alignment = mask; 826 q->dma_alignment = mask;
826 } 827 }
827 828
828 EXPORT_SYMBOL(blk_queue_dma_alignment); 829 EXPORT_SYMBOL(blk_queue_dma_alignment);
829 830
830 /** 831 /**
831 * blk_queue_find_tag - find a request by its tag and queue 832 * blk_queue_find_tag - find a request by its tag and queue
832 * @q: The request queue for the device 833 * @q: The request queue for the device
833 * @tag: The tag of the request 834 * @tag: The tag of the request
834 * 835 *
835 * Notes: 836 * Notes:
836 * Should be used when a device returns a tag and you want to match 837 * Should be used when a device returns a tag and you want to match
837 * it with a request. 838 * it with a request.
838 * 839 *
839 * no locks need be held. 840 * no locks need be held.
840 **/ 841 **/
841 struct request *blk_queue_find_tag(request_queue_t *q, int tag) 842 struct request *blk_queue_find_tag(request_queue_t *q, int tag)
842 { 843 {
843 struct blk_queue_tag *bqt = q->queue_tags; 844 struct blk_queue_tag *bqt = q->queue_tags;
844 845
845 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) 846 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
846 return NULL; 847 return NULL;
847 848
848 return bqt->tag_index[tag]; 849 return bqt->tag_index[tag];
849 } 850 }
850 851
851 EXPORT_SYMBOL(blk_queue_find_tag); 852 EXPORT_SYMBOL(blk_queue_find_tag);
852 853
853 /** 854 /**
854 * __blk_free_tags - release a given set of tag maintenance info 855 * __blk_free_tags - release a given set of tag maintenance info
855 * @bqt: the tag map to free 856 * @bqt: the tag map to free
856 * 857 *
857 * Tries to free the specified @bqt@. Returns true if it was 858 * Tries to free the specified @bqt@. Returns true if it was
858 * actually freed and false if there are still references using it 859 * actually freed and false if there are still references using it
859 */ 860 */
860 static int __blk_free_tags(struct blk_queue_tag *bqt) 861 static int __blk_free_tags(struct blk_queue_tag *bqt)
861 { 862 {
862 int retval; 863 int retval;
863 864
864 retval = atomic_dec_and_test(&bqt->refcnt); 865 retval = atomic_dec_and_test(&bqt->refcnt);
865 if (retval) { 866 if (retval) {
866 BUG_ON(bqt->busy); 867 BUG_ON(bqt->busy);
867 BUG_ON(!list_empty(&bqt->busy_list)); 868 BUG_ON(!list_empty(&bqt->busy_list));
868 869
869 kfree(bqt->tag_index); 870 kfree(bqt->tag_index);
870 bqt->tag_index = NULL; 871 bqt->tag_index = NULL;
871 872
872 kfree(bqt->tag_map); 873 kfree(bqt->tag_map);
873 bqt->tag_map = NULL; 874 bqt->tag_map = NULL;
874 875
875 kfree(bqt); 876 kfree(bqt);
876 877
877 } 878 }
878 879
879 return retval; 880 return retval;
880 } 881 }
881 882
882 /** 883 /**
883 * __blk_queue_free_tags - release tag maintenance info 884 * __blk_queue_free_tags - release tag maintenance info
884 * @q: the request queue for the device 885 * @q: the request queue for the device
885 * 886 *
886 * Notes: 887 * Notes:
887 * blk_cleanup_queue() will take care of calling this function, if tagging 888 * blk_cleanup_queue() will take care of calling this function, if tagging
888 * has been used. So there's no need to call this directly. 889 * has been used. So there's no need to call this directly.
889 **/ 890 **/
890 static void __blk_queue_free_tags(request_queue_t *q) 891 static void __blk_queue_free_tags(request_queue_t *q)
891 { 892 {
892 struct blk_queue_tag *bqt = q->queue_tags; 893 struct blk_queue_tag *bqt = q->queue_tags;
893 894
894 if (!bqt) 895 if (!bqt)
895 return; 896 return;
896 897
897 __blk_free_tags(bqt); 898 __blk_free_tags(bqt);
898 899
899 q->queue_tags = NULL; 900 q->queue_tags = NULL;
900 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); 901 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
901 } 902 }
902 903
903 904
904 /** 905 /**
905 * blk_free_tags - release a given set of tag maintenance info 906 * blk_free_tags - release a given set of tag maintenance info
906 * @bqt: the tag map to free 907 * @bqt: the tag map to free
907 * 908 *
908 * For externally managed @bqt@ frees the map. Callers of this 909 * For externally managed @bqt@ frees the map. Callers of this
909 * function must guarantee to have released all the queues that 910 * function must guarantee to have released all the queues that
910 * might have been using this tag map. 911 * might have been using this tag map.
911 */ 912 */
912 void blk_free_tags(struct blk_queue_tag *bqt) 913 void blk_free_tags(struct blk_queue_tag *bqt)
913 { 914 {
914 if (unlikely(!__blk_free_tags(bqt))) 915 if (unlikely(!__blk_free_tags(bqt)))
915 BUG(); 916 BUG();
916 } 917 }
917 EXPORT_SYMBOL(blk_free_tags); 918 EXPORT_SYMBOL(blk_free_tags);
918 919
919 /** 920 /**
920 * blk_queue_free_tags - release tag maintenance info 921 * blk_queue_free_tags - release tag maintenance info
921 * @q: the request queue for the device 922 * @q: the request queue for the device
922 * 923 *
923 * Notes: 924 * Notes:
924 * This is used to disabled tagged queuing to a device, yet leave 925 * This is used to disabled tagged queuing to a device, yet leave
925 * queue in function. 926 * queue in function.
926 **/ 927 **/
927 void blk_queue_free_tags(request_queue_t *q) 928 void blk_queue_free_tags(request_queue_t *q)
928 { 929 {
929 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); 930 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
930 } 931 }
931 932
932 EXPORT_SYMBOL(blk_queue_free_tags); 933 EXPORT_SYMBOL(blk_queue_free_tags);
933 934
934 static int 935 static int
935 init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) 936 init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
936 { 937 {
937 struct request **tag_index; 938 struct request **tag_index;
938 unsigned long *tag_map; 939 unsigned long *tag_map;
939 int nr_ulongs; 940 int nr_ulongs;
940 941
941 if (q && depth > q->nr_requests * 2) { 942 if (q && depth > q->nr_requests * 2) {
942 depth = q->nr_requests * 2; 943 depth = q->nr_requests * 2;
943 printk(KERN_ERR "%s: adjusted depth to %d\n", 944 printk(KERN_ERR "%s: adjusted depth to %d\n",
944 __FUNCTION__, depth); 945 __FUNCTION__, depth);
945 } 946 }
946 947
947 tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); 948 tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC);
948 if (!tag_index) 949 if (!tag_index)
949 goto fail; 950 goto fail;
950 951
951 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; 952 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
952 tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); 953 tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
953 if (!tag_map) 954 if (!tag_map)
954 goto fail; 955 goto fail;
955 956
956 tags->real_max_depth = depth; 957 tags->real_max_depth = depth;
957 tags->max_depth = depth; 958 tags->max_depth = depth;
958 tags->tag_index = tag_index; 959 tags->tag_index = tag_index;
959 tags->tag_map = tag_map; 960 tags->tag_map = tag_map;
960 961
961 return 0; 962 return 0;
962 fail: 963 fail:
963 kfree(tag_index); 964 kfree(tag_index);
964 return -ENOMEM; 965 return -ENOMEM;
965 } 966 }
966 967
967 static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, 968 static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
968 int depth) 969 int depth)
969 { 970 {
970 struct blk_queue_tag *tags; 971 struct blk_queue_tag *tags;
971 972
972 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); 973 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
973 if (!tags) 974 if (!tags)
974 goto fail; 975 goto fail;
975 976
976 if (init_tag_map(q, tags, depth)) 977 if (init_tag_map(q, tags, depth))
977 goto fail; 978 goto fail;
978 979
979 INIT_LIST_HEAD(&tags->busy_list); 980 INIT_LIST_HEAD(&tags->busy_list);
980 tags->busy = 0; 981 tags->busy = 0;
981 atomic_set(&tags->refcnt, 1); 982 atomic_set(&tags->refcnt, 1);
982 return tags; 983 return tags;
983 fail: 984 fail:
984 kfree(tags); 985 kfree(tags);
985 return NULL; 986 return NULL;
986 } 987 }
987 988
988 /** 989 /**
989 * blk_init_tags - initialize the tag info for an external tag map 990 * blk_init_tags - initialize the tag info for an external tag map
990 * @depth: the maximum queue depth supported 991 * @depth: the maximum queue depth supported
991 * @tags: the tag to use 992 * @tags: the tag to use
992 **/ 993 **/
993 struct blk_queue_tag *blk_init_tags(int depth) 994 struct blk_queue_tag *blk_init_tags(int depth)
994 { 995 {
995 return __blk_queue_init_tags(NULL, depth); 996 return __blk_queue_init_tags(NULL, depth);
996 } 997 }
997 EXPORT_SYMBOL(blk_init_tags); 998 EXPORT_SYMBOL(blk_init_tags);
998 999
999 /** 1000 /**
1000 * blk_queue_init_tags - initialize the queue tag info 1001 * blk_queue_init_tags - initialize the queue tag info
1001 * @q: the request queue for the device 1002 * @q: the request queue for the device
1002 * @depth: the maximum queue depth supported 1003 * @depth: the maximum queue depth supported
1003 * @tags: the tag to use 1004 * @tags: the tag to use
1004 **/ 1005 **/
1005 int blk_queue_init_tags(request_queue_t *q, int depth, 1006 int blk_queue_init_tags(request_queue_t *q, int depth,
1006 struct blk_queue_tag *tags) 1007 struct blk_queue_tag *tags)
1007 { 1008 {
1008 int rc; 1009 int rc;
1009 1010
1010 BUG_ON(tags && q->queue_tags && tags != q->queue_tags); 1011 BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
1011 1012
1012 if (!tags && !q->queue_tags) { 1013 if (!tags && !q->queue_tags) {
1013 tags = __blk_queue_init_tags(q, depth); 1014 tags = __blk_queue_init_tags(q, depth);
1014 1015
1015 if (!tags) 1016 if (!tags)
1016 goto fail; 1017 goto fail;
1017 } else if (q->queue_tags) { 1018 } else if (q->queue_tags) {
1018 if ((rc = blk_queue_resize_tags(q, depth))) 1019 if ((rc = blk_queue_resize_tags(q, depth)))
1019 return rc; 1020 return rc;
1020 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); 1021 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
1021 return 0; 1022 return 0;
1022 } else 1023 } else
1023 atomic_inc(&tags->refcnt); 1024 atomic_inc(&tags->refcnt);
1024 1025
1025 /* 1026 /*
1026 * assign it, all done 1027 * assign it, all done
1027 */ 1028 */
1028 q->queue_tags = tags; 1029 q->queue_tags = tags;
1029 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); 1030 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
1030 return 0; 1031 return 0;
1031 fail: 1032 fail:
1032 kfree(tags); 1033 kfree(tags);
1033 return -ENOMEM; 1034 return -ENOMEM;
1034 } 1035 }
1035 1036
1036 EXPORT_SYMBOL(blk_queue_init_tags); 1037 EXPORT_SYMBOL(blk_queue_init_tags);
1037 1038
1038 /** 1039 /**
1039 * blk_queue_resize_tags - change the queueing depth 1040 * blk_queue_resize_tags - change the queueing depth
1040 * @q: the request queue for the device 1041 * @q: the request queue for the device
1041 * @new_depth: the new max command queueing depth 1042 * @new_depth: the new max command queueing depth
1042 * 1043 *
1043 * Notes: 1044 * Notes:
1044 * Must be called with the queue lock held. 1045 * Must be called with the queue lock held.
1045 **/ 1046 **/
1046 int blk_queue_resize_tags(request_queue_t *q, int new_depth) 1047 int blk_queue_resize_tags(request_queue_t *q, int new_depth)
1047 { 1048 {
1048 struct blk_queue_tag *bqt = q->queue_tags; 1049 struct blk_queue_tag *bqt = q->queue_tags;
1049 struct request **tag_index; 1050 struct request **tag_index;
1050 unsigned long *tag_map; 1051 unsigned long *tag_map;
1051 int max_depth, nr_ulongs; 1052 int max_depth, nr_ulongs;
1052 1053
1053 if (!bqt) 1054 if (!bqt)
1054 return -ENXIO; 1055 return -ENXIO;
1055 1056
1056 /* 1057 /*
1057 * if we already have large enough real_max_depth. just 1058 * if we already have large enough real_max_depth. just
1058 * adjust max_depth. *NOTE* as requests with tag value 1059 * adjust max_depth. *NOTE* as requests with tag value
1059 * between new_depth and real_max_depth can be in-flight, tag 1060 * between new_depth and real_max_depth can be in-flight, tag
1060 * map can not be shrunk blindly here. 1061 * map can not be shrunk blindly here.
1061 */ 1062 */
1062 if (new_depth <= bqt->real_max_depth) { 1063 if (new_depth <= bqt->real_max_depth) {
1063 bqt->max_depth = new_depth; 1064 bqt->max_depth = new_depth;
1064 return 0; 1065 return 0;
1065 } 1066 }
1066 1067
1067 /* 1068 /*
1068 * Currently cannot replace a shared tag map with a new 1069 * Currently cannot replace a shared tag map with a new
1069 * one, so error out if this is the case 1070 * one, so error out if this is the case
1070 */ 1071 */
1071 if (atomic_read(&bqt->refcnt) != 1) 1072 if (atomic_read(&bqt->refcnt) != 1)
1072 return -EBUSY; 1073 return -EBUSY;
1073 1074
1074 /* 1075 /*
1075 * save the old state info, so we can copy it back 1076 * save the old state info, so we can copy it back
1076 */ 1077 */
1077 tag_index = bqt->tag_index; 1078 tag_index = bqt->tag_index;
1078 tag_map = bqt->tag_map; 1079 tag_map = bqt->tag_map;
1079 max_depth = bqt->real_max_depth; 1080 max_depth = bqt->real_max_depth;
1080 1081
1081 if (init_tag_map(q, bqt, new_depth)) 1082 if (init_tag_map(q, bqt, new_depth))
1082 return -ENOMEM; 1083 return -ENOMEM;
1083 1084
1084 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); 1085 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
1085 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; 1086 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
1086 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); 1087 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
1087 1088
1088 kfree(tag_index); 1089 kfree(tag_index);
1089 kfree(tag_map); 1090 kfree(tag_map);
1090 return 0; 1091 return 0;
1091 } 1092 }
1092 1093
1093 EXPORT_SYMBOL(blk_queue_resize_tags); 1094 EXPORT_SYMBOL(blk_queue_resize_tags);
1094 1095
1095 /** 1096 /**
1096 * blk_queue_end_tag - end tag operations for a request 1097 * blk_queue_end_tag - end tag operations for a request
1097 * @q: the request queue for the device 1098 * @q: the request queue for the device
1098 * @rq: the request that has completed 1099 * @rq: the request that has completed
1099 * 1100 *
1100 * Description: 1101 * Description:
1101 * Typically called when end_that_request_first() returns 0, meaning 1102 * Typically called when end_that_request_first() returns 0, meaning
1102 * all transfers have been done for a request. It's important to call 1103 * all transfers have been done for a request. It's important to call
1103 * this function before end_that_request_last(), as that will put the 1104 * this function before end_that_request_last(), as that will put the
1104 * request back on the free list thus corrupting the internal tag list. 1105 * request back on the free list thus corrupting the internal tag list.
1105 * 1106 *
1106 * Notes: 1107 * Notes:
1107 * queue lock must be held. 1108 * queue lock must be held.
1108 **/ 1109 **/
1109 void blk_queue_end_tag(request_queue_t *q, struct request *rq) 1110 void blk_queue_end_tag(request_queue_t *q, struct request *rq)
1110 { 1111 {
1111 struct blk_queue_tag *bqt = q->queue_tags; 1112 struct blk_queue_tag *bqt = q->queue_tags;
1112 int tag = rq->tag; 1113 int tag = rq->tag;
1113 1114
1114 BUG_ON(tag == -1); 1115 BUG_ON(tag == -1);
1115 1116
1116 if (unlikely(tag >= bqt->real_max_depth)) 1117 if (unlikely(tag >= bqt->real_max_depth))
1117 /* 1118 /*
1118 * This can happen after tag depth has been reduced. 1119 * This can happen after tag depth has been reduced.
1119 * FIXME: how about a warning or info message here? 1120 * FIXME: how about a warning or info message here?
1120 */ 1121 */
1121 return; 1122 return;
1122 1123
1123 if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) { 1124 if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {
1124 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", 1125 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
1125 __FUNCTION__, tag); 1126 __FUNCTION__, tag);
1126 return; 1127 return;
1127 } 1128 }
1128 1129
1129 list_del_init(&rq->queuelist); 1130 list_del_init(&rq->queuelist);
1130 rq->cmd_flags &= ~REQ_QUEUED; 1131 rq->cmd_flags &= ~REQ_QUEUED;
1131 rq->tag = -1; 1132 rq->tag = -1;
1132 1133
1133 if (unlikely(bqt->tag_index[tag] == NULL)) 1134 if (unlikely(bqt->tag_index[tag] == NULL))
1134 printk(KERN_ERR "%s: tag %d is missing\n", 1135 printk(KERN_ERR "%s: tag %d is missing\n",
1135 __FUNCTION__, tag); 1136 __FUNCTION__, tag);
1136 1137
1137 bqt->tag_index[tag] = NULL; 1138 bqt->tag_index[tag] = NULL;
1138 bqt->busy--; 1139 bqt->busy--;
1139 } 1140 }
1140 1141
1141 EXPORT_SYMBOL(blk_queue_end_tag); 1142 EXPORT_SYMBOL(blk_queue_end_tag);
1142 1143
1143 /** 1144 /**
1144 * blk_queue_start_tag - find a free tag and assign it 1145 * blk_queue_start_tag - find a free tag and assign it
1145 * @q: the request queue for the device 1146 * @q: the request queue for the device
1146 * @rq: the block request that needs tagging 1147 * @rq: the block request that needs tagging
1147 * 1148 *
1148 * Description: 1149 * Description:
1149 * This can either be used as a stand-alone helper, or possibly be 1150 * This can either be used as a stand-alone helper, or possibly be
1150 * assigned as the queue &prep_rq_fn (in which case &struct request 1151 * assigned as the queue &prep_rq_fn (in which case &struct request
1151 * automagically gets a tag assigned). Note that this function 1152 * automagically gets a tag assigned). Note that this function
1152 * assumes that any type of request can be queued! if this is not 1153 * assumes that any type of request can be queued! if this is not
1153 * true for your device, you must check the request type before 1154 * true for your device, you must check the request type before
1154 * calling this function. The request will also be removed from 1155 * calling this function. The request will also be removed from
1155 * the request queue, so it's the drivers responsibility to readd 1156 * the request queue, so it's the drivers responsibility to readd
1156 * it if it should need to be restarted for some reason. 1157 * it if it should need to be restarted for some reason.
1157 * 1158 *
1158 * Notes: 1159 * Notes:
1159 * queue lock must be held. 1160 * queue lock must be held.
1160 **/ 1161 **/
1161 int blk_queue_start_tag(request_queue_t *q, struct request *rq) 1162 int blk_queue_start_tag(request_queue_t *q, struct request *rq)
1162 { 1163 {
1163 struct blk_queue_tag *bqt = q->queue_tags; 1164 struct blk_queue_tag *bqt = q->queue_tags;
1164 int tag; 1165 int tag;
1165 1166
1166 if (unlikely((rq->cmd_flags & REQ_QUEUED))) { 1167 if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
1167 printk(KERN_ERR 1168 printk(KERN_ERR
1168 "%s: request %p for device [%s] already tagged %d", 1169 "%s: request %p for device [%s] already tagged %d",
1169 __FUNCTION__, rq, 1170 __FUNCTION__, rq,
1170 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); 1171 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
1171 BUG(); 1172 BUG();
1172 } 1173 }
1173 1174
1174 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); 1175 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
1175 if (tag >= bqt->max_depth) 1176 if (tag >= bqt->max_depth)
1176 return 1; 1177 return 1;
1177 1178
1178 __set_bit(tag, bqt->tag_map); 1179 __set_bit(tag, bqt->tag_map);
1179 1180
1180 rq->cmd_flags |= REQ_QUEUED; 1181 rq->cmd_flags |= REQ_QUEUED;
1181 rq->tag = tag; 1182 rq->tag = tag;
1182 bqt->tag_index[tag] = rq; 1183 bqt->tag_index[tag] = rq;
1183 blkdev_dequeue_request(rq); 1184 blkdev_dequeue_request(rq);
1184 list_add(&rq->queuelist, &bqt->busy_list); 1185 list_add(&rq->queuelist, &bqt->busy_list);
1185 bqt->busy++; 1186 bqt->busy++;
1186 return 0; 1187 return 0;
1187 } 1188 }
1188 1189
1189 EXPORT_SYMBOL(blk_queue_start_tag); 1190 EXPORT_SYMBOL(blk_queue_start_tag);
1190 1191
1191 /** 1192 /**
1192 * blk_queue_invalidate_tags - invalidate all pending tags 1193 * blk_queue_invalidate_tags - invalidate all pending tags
1193 * @q: the request queue for the device 1194 * @q: the request queue for the device
1194 * 1195 *
1195 * Description: 1196 * Description:
1196 * Hardware conditions may dictate a need to stop all pending requests. 1197 * Hardware conditions may dictate a need to stop all pending requests.
1197 * In this case, we will safely clear the block side of the tag queue and 1198 * In this case, we will safely clear the block side of the tag queue and
1198 * readd all requests to the request queue in the right order. 1199 * readd all requests to the request queue in the right order.
1199 * 1200 *
1200 * Notes: 1201 * Notes:
1201 * queue lock must be held. 1202 * queue lock must be held.
1202 **/ 1203 **/
1203 void blk_queue_invalidate_tags(request_queue_t *q) 1204 void blk_queue_invalidate_tags(request_queue_t *q)
1204 { 1205 {
1205 struct blk_queue_tag *bqt = q->queue_tags; 1206 struct blk_queue_tag *bqt = q->queue_tags;
1206 struct list_head *tmp, *n; 1207 struct list_head *tmp, *n;
1207 struct request *rq; 1208 struct request *rq;
1208 1209
1209 list_for_each_safe(tmp, n, &bqt->busy_list) { 1210 list_for_each_safe(tmp, n, &bqt->busy_list) {
1210 rq = list_entry_rq(tmp); 1211 rq = list_entry_rq(tmp);
1211 1212
1212 if (rq->tag == -1) { 1213 if (rq->tag == -1) {
1213 printk(KERN_ERR 1214 printk(KERN_ERR
1214 "%s: bad tag found on list\n", __FUNCTION__); 1215 "%s: bad tag found on list\n", __FUNCTION__);
1215 list_del_init(&rq->queuelist); 1216 list_del_init(&rq->queuelist);
1216 rq->cmd_flags &= ~REQ_QUEUED; 1217 rq->cmd_flags &= ~REQ_QUEUED;
1217 } else 1218 } else
1218 blk_queue_end_tag(q, rq); 1219 blk_queue_end_tag(q, rq);
1219 1220
1220 rq->cmd_flags &= ~REQ_STARTED; 1221 rq->cmd_flags &= ~REQ_STARTED;
1221 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); 1222 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1222 } 1223 }
1223 } 1224 }
1224 1225
1225 EXPORT_SYMBOL(blk_queue_invalidate_tags); 1226 EXPORT_SYMBOL(blk_queue_invalidate_tags);
1226 1227
1227 void blk_dump_rq_flags(struct request *rq, char *msg) 1228 void blk_dump_rq_flags(struct request *rq, char *msg)
1228 { 1229 {
1229 int bit; 1230 int bit;
1230 1231
1231 printk("%s: dev %s: type=%x, flags=%x\n", msg, 1232 printk("%s: dev %s: type=%x, flags=%x\n", msg,
1232 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 1233 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
1233 rq->cmd_flags); 1234 rq->cmd_flags);
1234 1235
1235 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, 1236 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
1236 rq->nr_sectors, 1237 rq->nr_sectors,
1237 rq->current_nr_sectors); 1238 rq->current_nr_sectors);
1238 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); 1239 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
1239 1240
1240 if (blk_pc_request(rq)) { 1241 if (blk_pc_request(rq)) {
1241 printk("cdb: "); 1242 printk("cdb: ");
1242 for (bit = 0; bit < sizeof(rq->cmd); bit++) 1243 for (bit = 0; bit < sizeof(rq->cmd); bit++)
1243 printk("%02x ", rq->cmd[bit]); 1244 printk("%02x ", rq->cmd[bit]);
1244 printk("\n"); 1245 printk("\n");
1245 } 1246 }
1246 } 1247 }
1247 1248
1248 EXPORT_SYMBOL(blk_dump_rq_flags); 1249 EXPORT_SYMBOL(blk_dump_rq_flags);
1249 1250
1250 void blk_recount_segments(request_queue_t *q, struct bio *bio) 1251 void blk_recount_segments(request_queue_t *q, struct bio *bio)
1251 { 1252 {
1252 struct bio_vec *bv, *bvprv = NULL; 1253 struct bio_vec *bv, *bvprv = NULL;
1253 int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster; 1254 int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
1254 int high, highprv = 1; 1255 int high, highprv = 1;
1255 1256
1256 if (unlikely(!bio->bi_io_vec)) 1257 if (unlikely(!bio->bi_io_vec))
1257 return; 1258 return;
1258 1259
1259 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); 1260 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1260 hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0; 1261 hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
1261 bio_for_each_segment(bv, bio, i) { 1262 bio_for_each_segment(bv, bio, i) {
1262 /* 1263 /*
1263 * the trick here is making sure that a high page is never 1264 * the trick here is making sure that a high page is never
1264 * considered part of another segment, since that might 1265 * considered part of another segment, since that might
1265 * change with the bounce page. 1266 * change with the bounce page.
1266 */ 1267 */
1267 high = page_to_pfn(bv->bv_page) >= q->bounce_pfn; 1268 high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;
1268 if (high || highprv) 1269 if (high || highprv)
1269 goto new_hw_segment; 1270 goto new_hw_segment;
1270 if (cluster) { 1271 if (cluster) {
1271 if (seg_size + bv->bv_len > q->max_segment_size) 1272 if (seg_size + bv->bv_len > q->max_segment_size)
1272 goto new_segment; 1273 goto new_segment;
1273 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) 1274 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
1274 goto new_segment; 1275 goto new_segment;
1275 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) 1276 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
1276 goto new_segment; 1277 goto new_segment;
1277 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) 1278 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
1278 goto new_hw_segment; 1279 goto new_hw_segment;
1279 1280
1280 seg_size += bv->bv_len; 1281 seg_size += bv->bv_len;
1281 hw_seg_size += bv->bv_len; 1282 hw_seg_size += bv->bv_len;
1282 bvprv = bv; 1283 bvprv = bv;
1283 continue; 1284 continue;
1284 } 1285 }
1285 new_segment: 1286 new_segment:
1286 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && 1287 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
1287 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) { 1288 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
1288 hw_seg_size += bv->bv_len; 1289 hw_seg_size += bv->bv_len;
1289 } else { 1290 } else {
1290 new_hw_segment: 1291 new_hw_segment:
1291 if (hw_seg_size > bio->bi_hw_front_size) 1292 if (hw_seg_size > bio->bi_hw_front_size)
1292 bio->bi_hw_front_size = hw_seg_size; 1293 bio->bi_hw_front_size = hw_seg_size;
1293 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; 1294 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
1294 nr_hw_segs++; 1295 nr_hw_segs++;
1295 } 1296 }
1296 1297
1297 nr_phys_segs++; 1298 nr_phys_segs++;
1298 bvprv = bv; 1299 bvprv = bv;
1299 seg_size = bv->bv_len; 1300 seg_size = bv->bv_len;
1300 highprv = high; 1301 highprv = high;
1301 } 1302 }
1302 if (hw_seg_size > bio->bi_hw_back_size) 1303 if (hw_seg_size > bio->bi_hw_back_size)
1303 bio->bi_hw_back_size = hw_seg_size; 1304 bio->bi_hw_back_size = hw_seg_size;
1304 if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size) 1305 if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
1305 bio->bi_hw_front_size = hw_seg_size; 1306 bio->bi_hw_front_size = hw_seg_size;
1306 bio->bi_phys_segments = nr_phys_segs; 1307 bio->bi_phys_segments = nr_phys_segs;
1307 bio->bi_hw_segments = nr_hw_segs; 1308 bio->bi_hw_segments = nr_hw_segs;
1308 bio->bi_flags |= (1 << BIO_SEG_VALID); 1309 bio->bi_flags |= (1 << BIO_SEG_VALID);
1309 } 1310 }
1310 1311
1311 1312
1312 static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio, 1313 static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
1313 struct bio *nxt) 1314 struct bio *nxt)
1314 { 1315 {
1315 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) 1316 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
1316 return 0; 1317 return 0;
1317 1318
1318 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) 1319 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
1319 return 0; 1320 return 0;
1320 if (bio->bi_size + nxt->bi_size > q->max_segment_size) 1321 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1321 return 0; 1322 return 0;
1322 1323
1323 /* 1324 /*
1324 * bio and nxt are contigous in memory, check if the queue allows 1325 * bio and nxt are contigous in memory, check if the queue allows
1325 * these two to be merged into one 1326 * these two to be merged into one
1326 */ 1327 */
1327 if (BIO_SEG_BOUNDARY(q, bio, nxt)) 1328 if (BIO_SEG_BOUNDARY(q, bio, nxt))
1328 return 1; 1329 return 1;
1329 1330
1330 return 0; 1331 return 0;
1331 } 1332 }
1332 1333
1333 static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio, 1334 static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,
1334 struct bio *nxt) 1335 struct bio *nxt)
1335 { 1336 {
1336 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 1337 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1337 blk_recount_segments(q, bio); 1338 blk_recount_segments(q, bio);
1338 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) 1339 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
1339 blk_recount_segments(q, nxt); 1340 blk_recount_segments(q, nxt);
1340 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || 1341 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
1341 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size)) 1342 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))
1342 return 0; 1343 return 0;
1343 if (bio->bi_size + nxt->bi_size > q->max_segment_size) 1344 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1344 return 0; 1345 return 0;
1345 1346
1346 return 1; 1347 return 1;
1347 } 1348 }
1348 1349
1349 /* 1350 /*
1350 * map a request to scatterlist, return number of sg entries setup. Caller 1351 * map a request to scatterlist, return number of sg entries setup. Caller
1351 * must make sure sg can hold rq->nr_phys_segments entries 1352 * must make sure sg can hold rq->nr_phys_segments entries
1352 */ 1353 */
1353 int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg) 1354 int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg)
1354 { 1355 {
1355 struct bio_vec *bvec, *bvprv; 1356 struct bio_vec *bvec, *bvprv;
1356 struct bio *bio; 1357 struct bio *bio;
1357 int nsegs, i, cluster; 1358 int nsegs, i, cluster;
1358 1359
1359 nsegs = 0; 1360 nsegs = 0;
1360 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); 1361 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1361 1362
1362 /* 1363 /*
1363 * for each bio in rq 1364 * for each bio in rq
1364 */ 1365 */
1365 bvprv = NULL; 1366 bvprv = NULL;
1366 rq_for_each_bio(bio, rq) { 1367 rq_for_each_bio(bio, rq) {
1367 /* 1368 /*
1368 * for each segment in bio 1369 * for each segment in bio
1369 */ 1370 */
1370 bio_for_each_segment(bvec, bio, i) { 1371 bio_for_each_segment(bvec, bio, i) {
1371 int nbytes = bvec->bv_len; 1372 int nbytes = bvec->bv_len;
1372 1373
1373 if (bvprv && cluster) { 1374 if (bvprv && cluster) {
1374 if (sg[nsegs - 1].length + nbytes > q->max_segment_size) 1375 if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
1375 goto new_segment; 1376 goto new_segment;
1376 1377
1377 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 1378 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
1378 goto new_segment; 1379 goto new_segment;
1379 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 1380 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
1380 goto new_segment; 1381 goto new_segment;
1381 1382
1382 sg[nsegs - 1].length += nbytes; 1383 sg[nsegs - 1].length += nbytes;
1383 } else { 1384 } else {
1384 new_segment: 1385 new_segment:
1385 memset(&sg[nsegs],0,sizeof(struct scatterlist)); 1386 memset(&sg[nsegs],0,sizeof(struct scatterlist));
1386 sg[nsegs].page = bvec->bv_page; 1387 sg[nsegs].page = bvec->bv_page;
1387 sg[nsegs].length = nbytes; 1388 sg[nsegs].length = nbytes;
1388 sg[nsegs].offset = bvec->bv_offset; 1389 sg[nsegs].offset = bvec->bv_offset;
1389 1390
1390 nsegs++; 1391 nsegs++;
1391 } 1392 }
1392 bvprv = bvec; 1393 bvprv = bvec;
1393 } /* segments in bio */ 1394 } /* segments in bio */
1394 } /* bios in rq */ 1395 } /* bios in rq */
1395 1396
1396 return nsegs; 1397 return nsegs;
1397 } 1398 }
1398 1399
1399 EXPORT_SYMBOL(blk_rq_map_sg); 1400 EXPORT_SYMBOL(blk_rq_map_sg);
1400 1401
1401 /* 1402 /*
1402 * the standard queue merge functions, can be overridden with device 1403 * the standard queue merge functions, can be overridden with device
1403 * specific ones if so desired 1404 * specific ones if so desired
1404 */ 1405 */
1405 1406
1406 static inline int ll_new_mergeable(request_queue_t *q, 1407 static inline int ll_new_mergeable(request_queue_t *q,
1407 struct request *req, 1408 struct request *req,
1408 struct bio *bio) 1409 struct bio *bio)
1409 { 1410 {
1410 int nr_phys_segs = bio_phys_segments(q, bio); 1411 int nr_phys_segs = bio_phys_segments(q, bio);
1411 1412
1412 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1413 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1413 req->cmd_flags |= REQ_NOMERGE; 1414 req->cmd_flags |= REQ_NOMERGE;
1414 if (req == q->last_merge) 1415 if (req == q->last_merge)
1415 q->last_merge = NULL; 1416 q->last_merge = NULL;
1416 return 0; 1417 return 0;
1417 } 1418 }
1418 1419
1419 /* 1420 /*
1420 * A hw segment is just getting larger, bump just the phys 1421 * A hw segment is just getting larger, bump just the phys
1421 * counter. 1422 * counter.
1422 */ 1423 */
1423 req->nr_phys_segments += nr_phys_segs; 1424 req->nr_phys_segments += nr_phys_segs;
1424 return 1; 1425 return 1;
1425 } 1426 }
1426 1427
1427 static inline int ll_new_hw_segment(request_queue_t *q, 1428 static inline int ll_new_hw_segment(request_queue_t *q,
1428 struct request *req, 1429 struct request *req,
1429 struct bio *bio) 1430 struct bio *bio)
1430 { 1431 {
1431 int nr_hw_segs = bio_hw_segments(q, bio); 1432 int nr_hw_segs = bio_hw_segments(q, bio);
1432 int nr_phys_segs = bio_phys_segments(q, bio); 1433 int nr_phys_segs = bio_phys_segments(q, bio);
1433 1434
1434 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments 1435 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1435 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1436 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1436 req->cmd_flags |= REQ_NOMERGE; 1437 req->cmd_flags |= REQ_NOMERGE;
1437 if (req == q->last_merge) 1438 if (req == q->last_merge)
1438 q->last_merge = NULL; 1439 q->last_merge = NULL;
1439 return 0; 1440 return 0;
1440 } 1441 }
1441 1442
1442 /* 1443 /*
1443 * This will form the start of a new hw segment. Bump both 1444 * This will form the start of a new hw segment. Bump both
1444 * counters. 1445 * counters.
1445 */ 1446 */
1446 req->nr_hw_segments += nr_hw_segs; 1447 req->nr_hw_segments += nr_hw_segs;
1447 req->nr_phys_segments += nr_phys_segs; 1448 req->nr_phys_segments += nr_phys_segs;
1448 return 1; 1449 return 1;
1449 } 1450 }
1450 1451
1451 static int ll_back_merge_fn(request_queue_t *q, struct request *req, 1452 static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1452 struct bio *bio) 1453 struct bio *bio)
1453 { 1454 {
1454 unsigned short max_sectors; 1455 unsigned short max_sectors;
1455 int len; 1456 int len;
1456 1457
1457 if (unlikely(blk_pc_request(req))) 1458 if (unlikely(blk_pc_request(req)))
1458 max_sectors = q->max_hw_sectors; 1459 max_sectors = q->max_hw_sectors;
1459 else 1460 else
1460 max_sectors = q->max_sectors; 1461 max_sectors = q->max_sectors;
1461 1462
1462 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1463 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1463 req->cmd_flags |= REQ_NOMERGE; 1464 req->cmd_flags |= REQ_NOMERGE;
1464 if (req == q->last_merge) 1465 if (req == q->last_merge)
1465 q->last_merge = NULL; 1466 q->last_merge = NULL;
1466 return 0; 1467 return 0;
1467 } 1468 }
1468 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) 1469 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
1469 blk_recount_segments(q, req->biotail); 1470 blk_recount_segments(q, req->biotail);
1470 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 1471 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1471 blk_recount_segments(q, bio); 1472 blk_recount_segments(q, bio);
1472 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; 1473 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
1473 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && 1474 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
1474 !BIOVEC_VIRT_OVERSIZE(len)) { 1475 !BIOVEC_VIRT_OVERSIZE(len)) {
1475 int mergeable = ll_new_mergeable(q, req, bio); 1476 int mergeable = ll_new_mergeable(q, req, bio);
1476 1477
1477 if (mergeable) { 1478 if (mergeable) {
1478 if (req->nr_hw_segments == 1) 1479 if (req->nr_hw_segments == 1)
1479 req->bio->bi_hw_front_size = len; 1480 req->bio->bi_hw_front_size = len;
1480 if (bio->bi_hw_segments == 1) 1481 if (bio->bi_hw_segments == 1)
1481 bio->bi_hw_back_size = len; 1482 bio->bi_hw_back_size = len;
1482 } 1483 }
1483 return mergeable; 1484 return mergeable;
1484 } 1485 }
1485 1486
1486 return ll_new_hw_segment(q, req, bio); 1487 return ll_new_hw_segment(q, req, bio);
1487 } 1488 }
1488 1489
1489 static int ll_front_merge_fn(request_queue_t *q, struct request *req, 1490 static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1490 struct bio *bio) 1491 struct bio *bio)
1491 { 1492 {
1492 unsigned short max_sectors; 1493 unsigned short max_sectors;
1493 int len; 1494 int len;
1494 1495
1495 if (unlikely(blk_pc_request(req))) 1496 if (unlikely(blk_pc_request(req)))
1496 max_sectors = q->max_hw_sectors; 1497 max_sectors = q->max_hw_sectors;
1497 else 1498 else
1498 max_sectors = q->max_sectors; 1499 max_sectors = q->max_sectors;
1499 1500
1500 1501
1501 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1502 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1502 req->cmd_flags |= REQ_NOMERGE; 1503 req->cmd_flags |= REQ_NOMERGE;
1503 if (req == q->last_merge) 1504 if (req == q->last_merge)
1504 q->last_merge = NULL; 1505 q->last_merge = NULL;
1505 return 0; 1506 return 0;
1506 } 1507 }
1507 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; 1508 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
1508 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 1509 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1509 blk_recount_segments(q, bio); 1510 blk_recount_segments(q, bio);
1510 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) 1511 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
1511 blk_recount_segments(q, req->bio); 1512 blk_recount_segments(q, req->bio);
1512 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && 1513 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
1513 !BIOVEC_VIRT_OVERSIZE(len)) { 1514 !BIOVEC_VIRT_OVERSIZE(len)) {
1514 int mergeable = ll_new_mergeable(q, req, bio); 1515 int mergeable = ll_new_mergeable(q, req, bio);
1515 1516
1516 if (mergeable) { 1517 if (mergeable) {
1517 if (bio->bi_hw_segments == 1) 1518 if (bio->bi_hw_segments == 1)
1518 bio->bi_hw_front_size = len; 1519 bio->bi_hw_front_size = len;
1519 if (req->nr_hw_segments == 1) 1520 if (req->nr_hw_segments == 1)
1520 req->biotail->bi_hw_back_size = len; 1521 req->biotail->bi_hw_back_size = len;
1521 } 1522 }
1522 return mergeable; 1523 return mergeable;
1523 } 1524 }
1524 1525
1525 return ll_new_hw_segment(q, req, bio); 1526 return ll_new_hw_segment(q, req, bio);
1526 } 1527 }
1527 1528
1528 static int ll_merge_requests_fn(request_queue_t *q, struct request *req, 1529 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
1529 struct request *next) 1530 struct request *next)
1530 { 1531 {
1531 int total_phys_segments; 1532 int total_phys_segments;
1532 int total_hw_segments; 1533 int total_hw_segments;
1533 1534
1534 /* 1535 /*
1535 * First check if the either of the requests are re-queued 1536 * First check if the either of the requests are re-queued
1536 * requests. Can't merge them if they are. 1537 * requests. Can't merge them if they are.
1537 */ 1538 */
1538 if (req->special || next->special) 1539 if (req->special || next->special)
1539 return 0; 1540 return 0;
1540 1541
1541 /* 1542 /*
1542 * Will it become too large? 1543 * Will it become too large?
1543 */ 1544 */
1544 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) 1545 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
1545 return 0; 1546 return 0;
1546 1547
1547 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 1548 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
1548 if (blk_phys_contig_segment(q, req->biotail, next->bio)) 1549 if (blk_phys_contig_segment(q, req->biotail, next->bio))
1549 total_phys_segments--; 1550 total_phys_segments--;
1550 1551
1551 if (total_phys_segments > q->max_phys_segments) 1552 if (total_phys_segments > q->max_phys_segments)
1552 return 0; 1553 return 0;
1553 1554
1554 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; 1555 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
1555 if (blk_hw_contig_segment(q, req->biotail, next->bio)) { 1556 if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
1556 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; 1557 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
1557 /* 1558 /*
1558 * propagate the combined length to the end of the requests 1559 * propagate the combined length to the end of the requests
1559 */ 1560 */
1560 if (req->nr_hw_segments == 1) 1561 if (req->nr_hw_segments == 1)
1561 req->bio->bi_hw_front_size = len; 1562 req->bio->bi_hw_front_size = len;
1562 if (next->nr_hw_segments == 1) 1563 if (next->nr_hw_segments == 1)
1563 next->biotail->bi_hw_back_size = len; 1564 next->biotail->bi_hw_back_size = len;
1564 total_hw_segments--; 1565 total_hw_segments--;
1565 } 1566 }
1566 1567
1567 if (total_hw_segments > q->max_hw_segments) 1568 if (total_hw_segments > q->max_hw_segments)
1568 return 0; 1569 return 0;
1569 1570
1570 /* Merge is OK... */ 1571 /* Merge is OK... */
1571 req->nr_phys_segments = total_phys_segments; 1572 req->nr_phys_segments = total_phys_segments;
1572 req->nr_hw_segments = total_hw_segments; 1573 req->nr_hw_segments = total_hw_segments;
1573 return 1; 1574 return 1;
1574 } 1575 }
1575 1576
1576 /* 1577 /*
1577 * "plug" the device if there are no outstanding requests: this will 1578 * "plug" the device if there are no outstanding requests: this will
1578 * force the transfer to start only after we have put all the requests 1579 * force the transfer to start only after we have put all the requests
1579 * on the list. 1580 * on the list.
1580 * 1581 *
1581 * This is called with interrupts off and no requests on the queue and 1582 * This is called with interrupts off and no requests on the queue and
1582 * with the queue lock held. 1583 * with the queue lock held.
1583 */ 1584 */
1584 void blk_plug_device(request_queue_t *q) 1585 void blk_plug_device(request_queue_t *q)
1585 { 1586 {
1586 WARN_ON(!irqs_disabled()); 1587 WARN_ON(!irqs_disabled());
1587 1588
1588 /* 1589 /*
1589 * don't plug a stopped queue, it must be paired with blk_start_queue() 1590 * don't plug a stopped queue, it must be paired with blk_start_queue()
1590 * which will restart the queueing 1591 * which will restart the queueing
1591 */ 1592 */
1592 if (blk_queue_stopped(q)) 1593 if (blk_queue_stopped(q))
1593 return; 1594 return;
1594 1595
1595 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { 1596 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
1596 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 1597 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1597 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); 1598 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
1598 } 1599 }
1599 } 1600 }
1600 1601
1601 EXPORT_SYMBOL(blk_plug_device); 1602 EXPORT_SYMBOL(blk_plug_device);
1602 1603
1603 /* 1604 /*
1604 * remove the queue from the plugged list, if present. called with 1605 * remove the queue from the plugged list, if present. called with
1605 * queue lock held and interrupts disabled. 1606 * queue lock held and interrupts disabled.
1606 */ 1607 */
1607 int blk_remove_plug(request_queue_t *q) 1608 int blk_remove_plug(request_queue_t *q)
1608 { 1609 {
1609 WARN_ON(!irqs_disabled()); 1610 WARN_ON(!irqs_disabled());
1610 1611
1611 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) 1612 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1612 return 0; 1613 return 0;
1613 1614
1614 del_timer(&q->unplug_timer); 1615 del_timer(&q->unplug_timer);
1615 return 1; 1616 return 1;
1616 } 1617 }
1617 1618
1618 EXPORT_SYMBOL(blk_remove_plug); 1619 EXPORT_SYMBOL(blk_remove_plug);
1619 1620
1620 /* 1621 /*
1621 * remove the plug and let it rip.. 1622 * remove the plug and let it rip..
1622 */ 1623 */
1623 void __generic_unplug_device(request_queue_t *q) 1624 void __generic_unplug_device(request_queue_t *q)
1624 { 1625 {
1625 if (unlikely(blk_queue_stopped(q))) 1626 if (unlikely(blk_queue_stopped(q)))
1626 return; 1627 return;
1627 1628
1628 if (!blk_remove_plug(q)) 1629 if (!blk_remove_plug(q))
1629 return; 1630 return;
1630 1631
1631 q->request_fn(q); 1632 q->request_fn(q);
1632 } 1633 }
1633 EXPORT_SYMBOL(__generic_unplug_device); 1634 EXPORT_SYMBOL(__generic_unplug_device);
1634 1635
1635 /** 1636 /**
1636 * generic_unplug_device - fire a request queue 1637 * generic_unplug_device - fire a request queue
1637 * @q: The &request_queue_t in question 1638 * @q: The &request_queue_t in question
1638 * 1639 *
1639 * Description: 1640 * Description:
1640 * Linux uses plugging to build bigger requests queues before letting 1641 * Linux uses plugging to build bigger requests queues before letting
1641 * the device have at them. If a queue is plugged, the I/O scheduler 1642 * the device have at them. If a queue is plugged, the I/O scheduler
1642 * is still adding and merging requests on the queue. Once the queue 1643 * is still adding and merging requests on the queue. Once the queue
1643 * gets unplugged, the request_fn defined for the queue is invoked and 1644 * gets unplugged, the request_fn defined for the queue is invoked and
1644 * transfers started. 1645 * transfers started.
1645 **/ 1646 **/
1646 void generic_unplug_device(request_queue_t *q) 1647 void generic_unplug_device(request_queue_t *q)
1647 { 1648 {
1648 spin_lock_irq(q->queue_lock); 1649 spin_lock_irq(q->queue_lock);
1649 __generic_unplug_device(q); 1650 __generic_unplug_device(q);
1650 spin_unlock_irq(q->queue_lock); 1651 spin_unlock_irq(q->queue_lock);
1651 } 1652 }
1652 EXPORT_SYMBOL(generic_unplug_device); 1653 EXPORT_SYMBOL(generic_unplug_device);
1653 1654
1654 static void blk_backing_dev_unplug(struct backing_dev_info *bdi, 1655 static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
1655 struct page *page) 1656 struct page *page)
1656 { 1657 {
1657 request_queue_t *q = bdi->unplug_io_data; 1658 request_queue_t *q = bdi->unplug_io_data;
1658 1659
1659 /* 1660 /*
1660 * devices don't necessarily have an ->unplug_fn defined 1661 * devices don't necessarily have an ->unplug_fn defined
1661 */ 1662 */
1662 if (q->unplug_fn) { 1663 if (q->unplug_fn) {
1663 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 1664 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1664 q->rq.count[READ] + q->rq.count[WRITE]); 1665 q->rq.count[READ] + q->rq.count[WRITE]);
1665 1666
1666 q->unplug_fn(q); 1667 q->unplug_fn(q);
1667 } 1668 }
1668 } 1669 }
1669 1670
1670 static void blk_unplug_work(void *data) 1671 static void blk_unplug_work(void *data)
1671 { 1672 {
1672 request_queue_t *q = data; 1673 request_queue_t *q = data;
1673 1674
1674 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 1675 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1675 q->rq.count[READ] + q->rq.count[WRITE]); 1676 q->rq.count[READ] + q->rq.count[WRITE]);
1676 1677
1677 q->unplug_fn(q); 1678 q->unplug_fn(q);
1678 } 1679 }
1679 1680
1680 static void blk_unplug_timeout(unsigned long data) 1681 static void blk_unplug_timeout(unsigned long data)
1681 { 1682 {
1682 request_queue_t *q = (request_queue_t *)data; 1683 request_queue_t *q = (request_queue_t *)data;
1683 1684
1684 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 1685 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
1685 q->rq.count[READ] + q->rq.count[WRITE]); 1686 q->rq.count[READ] + q->rq.count[WRITE]);
1686 1687
1687 kblockd_schedule_work(&q->unplug_work); 1688 kblockd_schedule_work(&q->unplug_work);
1688 } 1689 }
1689 1690
1690 /** 1691 /**
1691 * blk_start_queue - restart a previously stopped queue 1692 * blk_start_queue - restart a previously stopped queue
1692 * @q: The &request_queue_t in question 1693 * @q: The &request_queue_t in question
1693 * 1694 *
1694 * Description: 1695 * Description:
1695 * blk_start_queue() will clear the stop flag on the queue, and call 1696 * blk_start_queue() will clear the stop flag on the queue, and call
1696 * the request_fn for the queue if it was in a stopped state when 1697 * the request_fn for the queue if it was in a stopped state when
1697 * entered. Also see blk_stop_queue(). Queue lock must be held. 1698 * entered. Also see blk_stop_queue(). Queue lock must be held.
1698 **/ 1699 **/
1699 void blk_start_queue(request_queue_t *q) 1700 void blk_start_queue(request_queue_t *q)
1700 { 1701 {
1701 WARN_ON(!irqs_disabled()); 1702 WARN_ON(!irqs_disabled());
1702 1703
1703 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); 1704 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1704 1705
1705 /* 1706 /*
1706 * one level of recursion is ok and is much faster than kicking 1707 * one level of recursion is ok and is much faster than kicking
1707 * the unplug handling 1708 * the unplug handling
1708 */ 1709 */
1709 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 1710 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1710 q->request_fn(q); 1711 q->request_fn(q);
1711 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); 1712 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1712 } else { 1713 } else {
1713 blk_plug_device(q); 1714 blk_plug_device(q);
1714 kblockd_schedule_work(&q->unplug_work); 1715 kblockd_schedule_work(&q->unplug_work);
1715 } 1716 }
1716 } 1717 }
1717 1718
1718 EXPORT_SYMBOL(blk_start_queue); 1719 EXPORT_SYMBOL(blk_start_queue);
1719 1720
1720 /** 1721 /**
1721 * blk_stop_queue - stop a queue 1722 * blk_stop_queue - stop a queue
1722 * @q: The &request_queue_t in question 1723 * @q: The &request_queue_t in question
1723 * 1724 *
1724 * Description: 1725 * Description:
1725 * The Linux block layer assumes that a block driver will consume all 1726 * The Linux block layer assumes that a block driver will consume all
1726 * entries on the request queue when the request_fn strategy is called. 1727 * entries on the request queue when the request_fn strategy is called.
1727 * Often this will not happen, because of hardware limitations (queue 1728 * Often this will not happen, because of hardware limitations (queue
1728 * depth settings). If a device driver gets a 'queue full' response, 1729 * depth settings). If a device driver gets a 'queue full' response,
1729 * or if it simply chooses not to queue more I/O at one point, it can 1730 * or if it simply chooses not to queue more I/O at one point, it can
1730 * call this function to prevent the request_fn from being called until 1731 * call this function to prevent the request_fn from being called until
1731 * the driver has signalled it's ready to go again. This happens by calling 1732 * the driver has signalled it's ready to go again. This happens by calling
1732 * blk_start_queue() to restart queue operations. Queue lock must be held. 1733 * blk_start_queue() to restart queue operations. Queue lock must be held.
1733 **/ 1734 **/
1734 void blk_stop_queue(request_queue_t *q) 1735 void blk_stop_queue(request_queue_t *q)
1735 { 1736 {
1736 blk_remove_plug(q); 1737 blk_remove_plug(q);
1737 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); 1738 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1738 } 1739 }
1739 EXPORT_SYMBOL(blk_stop_queue); 1740 EXPORT_SYMBOL(blk_stop_queue);
1740 1741
1741 /** 1742 /**
1742 * blk_sync_queue - cancel any pending callbacks on a queue 1743 * blk_sync_queue - cancel any pending callbacks on a queue
1743 * @q: the queue 1744 * @q: the queue
1744 * 1745 *
1745 * Description: 1746 * Description:
1746 * The block layer may perform asynchronous callback activity 1747 * The block layer may perform asynchronous callback activity
1747 * on a queue, such as calling the unplug function after a timeout. 1748 * on a queue, such as calling the unplug function after a timeout.
1748 * A block device may call blk_sync_queue to ensure that any 1749 * A block device may call blk_sync_queue to ensure that any
1749 * such activity is cancelled, thus allowing it to release resources 1750 * such activity is cancelled, thus allowing it to release resources
1750 * the the callbacks might use. The caller must already have made sure 1751 * the the callbacks might use. The caller must already have made sure
1751 * that its ->make_request_fn will not re-add plugging prior to calling 1752 * that its ->make_request_fn will not re-add plugging prior to calling
1752 * this function. 1753 * this function.
1753 * 1754 *
1754 */ 1755 */
1755 void blk_sync_queue(struct request_queue *q) 1756 void blk_sync_queue(struct request_queue *q)
1756 { 1757 {
1757 del_timer_sync(&q->unplug_timer); 1758 del_timer_sync(&q->unplug_timer);
1758 kblockd_flush(); 1759 kblockd_flush();
1759 } 1760 }
1760 EXPORT_SYMBOL(blk_sync_queue); 1761 EXPORT_SYMBOL(blk_sync_queue);
1761 1762
1762 /** 1763 /**
1763 * blk_run_queue - run a single device queue 1764 * blk_run_queue - run a single device queue
1764 * @q: The queue to run 1765 * @q: The queue to run
1765 */ 1766 */
1766 void blk_run_queue(struct request_queue *q) 1767 void blk_run_queue(struct request_queue *q)
1767 { 1768 {
1768 unsigned long flags; 1769 unsigned long flags;
1769 1770
1770 spin_lock_irqsave(q->queue_lock, flags); 1771 spin_lock_irqsave(q->queue_lock, flags);
1771 blk_remove_plug(q); 1772 blk_remove_plug(q);
1772 1773
1773 /* 1774 /*
1774 * Only recurse once to avoid overrunning the stack, let the unplug 1775 * Only recurse once to avoid overrunning the stack, let the unplug
1775 * handling reinvoke the handler shortly if we already got there. 1776 * handling reinvoke the handler shortly if we already got there.
1776 */ 1777 */
1777 if (!elv_queue_empty(q)) { 1778 if (!elv_queue_empty(q)) {
1778 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 1779 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1779 q->request_fn(q); 1780 q->request_fn(q);
1780 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); 1781 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1781 } else { 1782 } else {
1782 blk_plug_device(q); 1783 blk_plug_device(q);
1783 kblockd_schedule_work(&q->unplug_work); 1784 kblockd_schedule_work(&q->unplug_work);
1784 } 1785 }
1785 } 1786 }
1786 1787
1787 spin_unlock_irqrestore(q->queue_lock, flags); 1788 spin_unlock_irqrestore(q->queue_lock, flags);
1788 } 1789 }
1789 EXPORT_SYMBOL(blk_run_queue); 1790 EXPORT_SYMBOL(blk_run_queue);
1790 1791
1791 /** 1792 /**
1792 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed 1793 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
1793 * @kobj: the kobj belonging of the request queue to be released 1794 * @kobj: the kobj belonging of the request queue to be released
1794 * 1795 *
1795 * Description: 1796 * Description:
1796 * blk_cleanup_queue is the pair to blk_init_queue() or 1797 * blk_cleanup_queue is the pair to blk_init_queue() or
1797 * blk_queue_make_request(). It should be called when a request queue is 1798 * blk_queue_make_request(). It should be called when a request queue is
1798 * being released; typically when a block device is being de-registered. 1799 * being released; typically when a block device is being de-registered.
1799 * Currently, its primary task it to free all the &struct request 1800 * Currently, its primary task it to free all the &struct request
1800 * structures that were allocated to the queue and the queue itself. 1801 * structures that were allocated to the queue and the queue itself.
1801 * 1802 *
1802 * Caveat: 1803 * Caveat:
1803 * Hopefully the low level driver will have finished any 1804 * Hopefully the low level driver will have finished any
1804 * outstanding requests first... 1805 * outstanding requests first...
1805 **/ 1806 **/
1806 static void blk_release_queue(struct kobject *kobj) 1807 static void blk_release_queue(struct kobject *kobj)
1807 { 1808 {
1808 request_queue_t *q = container_of(kobj, struct request_queue, kobj); 1809 request_queue_t *q = container_of(kobj, struct request_queue, kobj);
1809 struct request_list *rl = &q->rq; 1810 struct request_list *rl = &q->rq;
1810 1811
1811 blk_sync_queue(q); 1812 blk_sync_queue(q);
1812 1813
1813 if (rl->rq_pool) 1814 if (rl->rq_pool)
1814 mempool_destroy(rl->rq_pool); 1815 mempool_destroy(rl->rq_pool);
1815 1816
1816 if (q->queue_tags) 1817 if (q->queue_tags)
1817 __blk_queue_free_tags(q); 1818 __blk_queue_free_tags(q);
1818 1819
1819 blk_trace_shutdown(q); 1820 blk_trace_shutdown(q);
1820 1821
1821 kmem_cache_free(requestq_cachep, q); 1822 kmem_cache_free(requestq_cachep, q);
1822 } 1823 }
1823 1824
1824 void blk_put_queue(request_queue_t *q) 1825 void blk_put_queue(request_queue_t *q)
1825 { 1826 {
1826 kobject_put(&q->kobj); 1827 kobject_put(&q->kobj);
1827 } 1828 }
1828 EXPORT_SYMBOL(blk_put_queue); 1829 EXPORT_SYMBOL(blk_put_queue);
1829 1830
1830 void blk_cleanup_queue(request_queue_t * q) 1831 void blk_cleanup_queue(request_queue_t * q)
1831 { 1832 {
1832 mutex_lock(&q->sysfs_lock); 1833 mutex_lock(&q->sysfs_lock);
1833 set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); 1834 set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
1834 mutex_unlock(&q->sysfs_lock); 1835 mutex_unlock(&q->sysfs_lock);
1835 1836
1836 if (q->elevator) 1837 if (q->elevator)
1837 elevator_exit(q->elevator); 1838 elevator_exit(q->elevator);
1838 1839
1839 blk_put_queue(q); 1840 blk_put_queue(q);
1840 } 1841 }
1841 1842
1842 EXPORT_SYMBOL(blk_cleanup_queue); 1843 EXPORT_SYMBOL(blk_cleanup_queue);
1843 1844
1844 static int blk_init_free_list(request_queue_t *q) 1845 static int blk_init_free_list(request_queue_t *q)
1845 { 1846 {
1846 struct request_list *rl = &q->rq; 1847 struct request_list *rl = &q->rq;
1847 1848
1848 rl->count[READ] = rl->count[WRITE] = 0; 1849 rl->count[READ] = rl->count[WRITE] = 0;
1849 rl->starved[READ] = rl->starved[WRITE] = 0; 1850 rl->starved[READ] = rl->starved[WRITE] = 0;
1850 rl->elvpriv = 0; 1851 rl->elvpriv = 0;
1851 init_waitqueue_head(&rl->wait[READ]); 1852 init_waitqueue_head(&rl->wait[READ]);
1852 init_waitqueue_head(&rl->wait[WRITE]); 1853 init_waitqueue_head(&rl->wait[WRITE]);
1853 1854
1854 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 1855 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1855 mempool_free_slab, request_cachep, q->node); 1856 mempool_free_slab, request_cachep, q->node);
1856 1857
1857 if (!rl->rq_pool) 1858 if (!rl->rq_pool)
1858 return -ENOMEM; 1859 return -ENOMEM;
1859 1860
1860 return 0; 1861 return 0;
1861 } 1862 }
1862 1863
1863 request_queue_t *blk_alloc_queue(gfp_t gfp_mask) 1864 request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
1864 { 1865 {
1865 return blk_alloc_queue_node(gfp_mask, -1); 1866 return blk_alloc_queue_node(gfp_mask, -1);
1866 } 1867 }
1867 EXPORT_SYMBOL(blk_alloc_queue); 1868 EXPORT_SYMBOL(blk_alloc_queue);
1868 1869
1869 static struct kobj_type queue_ktype; 1870 static struct kobj_type queue_ktype;
1870 1871
1871 request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) 1872 request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1872 { 1873 {
1873 request_queue_t *q; 1874 request_queue_t *q;
1874 1875
1875 q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id); 1876 q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);
1876 if (!q) 1877 if (!q)
1877 return NULL; 1878 return NULL;
1878 1879
1879 memset(q, 0, sizeof(*q)); 1880 memset(q, 0, sizeof(*q));
1880 init_timer(&q->unplug_timer); 1881 init_timer(&q->unplug_timer);
1881 1882
1882 snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); 1883 snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");
1883 q->kobj.ktype = &queue_ktype; 1884 q->kobj.ktype = &queue_ktype;
1884 kobject_init(&q->kobj); 1885 kobject_init(&q->kobj);
1885 1886
1886 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; 1887 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
1887 q->backing_dev_info.unplug_io_data = q; 1888 q->backing_dev_info.unplug_io_data = q;
1888 1889
1889 mutex_init(&q->sysfs_lock); 1890 mutex_init(&q->sysfs_lock);
1890 1891
1891 return q; 1892 return q;
1892 } 1893 }
1893 EXPORT_SYMBOL(blk_alloc_queue_node); 1894 EXPORT_SYMBOL(blk_alloc_queue_node);
1894 1895
1895 /** 1896 /**
1896 * blk_init_queue - prepare a request queue for use with a block device 1897 * blk_init_queue - prepare a request queue for use with a block device
1897 * @rfn: The function to be called to process requests that have been 1898 * @rfn: The function to be called to process requests that have been
1898 * placed on the queue. 1899 * placed on the queue.
1899 * @lock: Request queue spin lock 1900 * @lock: Request queue spin lock
1900 * 1901 *
1901 * Description: 1902 * Description:
1902 * If a block device wishes to use the standard request handling procedures, 1903 * If a block device wishes to use the standard request handling procedures,
1903 * which sorts requests and coalesces adjacent requests, then it must 1904 * which sorts requests and coalesces adjacent requests, then it must
1904 * call blk_init_queue(). The function @rfn will be called when there 1905 * call blk_init_queue(). The function @rfn will be called when there
1905 * are requests on the queue that need to be processed. If the device 1906 * are requests on the queue that need to be processed. If the device
1906 * supports plugging, then @rfn may not be called immediately when requests 1907 * supports plugging, then @rfn may not be called immediately when requests
1907 * are available on the queue, but may be called at some time later instead. 1908 * are available on the queue, but may be called at some time later instead.
1908 * Plugged queues are generally unplugged when a buffer belonging to one 1909 * Plugged queues are generally unplugged when a buffer belonging to one
1909 * of the requests on the queue is needed, or due to memory pressure. 1910 * of the requests on the queue is needed, or due to memory pressure.
1910 * 1911 *
1911 * @rfn is not required, or even expected, to remove all requests off the 1912 * @rfn is not required, or even expected, to remove all requests off the
1912 * queue, but only as many as it can handle at a time. If it does leave 1913 * queue, but only as many as it can handle at a time. If it does leave
1913 * requests on the queue, it is responsible for arranging that the requests 1914 * requests on the queue, it is responsible for arranging that the requests
1914 * get dealt with eventually. 1915 * get dealt with eventually.
1915 * 1916 *
1916 * The queue spin lock must be held while manipulating the requests on the 1917 * The queue spin lock must be held while manipulating the requests on the
1917 * request queue; this lock will be taken also from interrupt context, so irq 1918 * request queue; this lock will be taken also from interrupt context, so irq
1918 * disabling is needed for it. 1919 * disabling is needed for it.
1919 * 1920 *
1920 * Function returns a pointer to the initialized request queue, or NULL if 1921 * Function returns a pointer to the initialized request queue, or NULL if
1921 * it didn't succeed. 1922 * it didn't succeed.
1922 * 1923 *
1923 * Note: 1924 * Note:
1924 * blk_init_queue() must be paired with a blk_cleanup_queue() call 1925 * blk_init_queue() must be paired with a blk_cleanup_queue() call
1925 * when the block device is deactivated (such as at module unload). 1926 * when the block device is deactivated (such as at module unload).
1926 **/ 1927 **/
1927 1928
1928 request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) 1929 request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1929 { 1930 {
1930 return blk_init_queue_node(rfn, lock, -1); 1931 return blk_init_queue_node(rfn, lock, -1);
1931 } 1932 }
1932 EXPORT_SYMBOL(blk_init_queue); 1933 EXPORT_SYMBOL(blk_init_queue);
1933 1934
1934 request_queue_t * 1935 request_queue_t *
1935 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 1936 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1936 { 1937 {
1937 request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id); 1938 request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
1938 1939
1939 if (!q) 1940 if (!q)
1940 return NULL; 1941 return NULL;
1941 1942
1942 q->node = node_id; 1943 q->node = node_id;
1943 if (blk_init_free_list(q)) { 1944 if (blk_init_free_list(q)) {
1944 kmem_cache_free(requestq_cachep, q); 1945 kmem_cache_free(requestq_cachep, q);
1945 return NULL; 1946 return NULL;
1946 } 1947 }
1947 1948
1948 /* 1949 /*
1949 * if caller didn't supply a lock, they get per-queue locking with 1950 * if caller didn't supply a lock, they get per-queue locking with
1950 * our embedded lock 1951 * our embedded lock
1951 */ 1952 */
1952 if (!lock) { 1953 if (!lock) {
1953 spin_lock_init(&q->__queue_lock); 1954 spin_lock_init(&q->__queue_lock);
1954 lock = &q->__queue_lock; 1955 lock = &q->__queue_lock;
1955 } 1956 }
1956 1957
1957 q->request_fn = rfn; 1958 q->request_fn = rfn;
1958 q->back_merge_fn = ll_back_merge_fn; 1959 q->back_merge_fn = ll_back_merge_fn;
1959 q->front_merge_fn = ll_front_merge_fn; 1960 q->front_merge_fn = ll_front_merge_fn;
1960 q->merge_requests_fn = ll_merge_requests_fn; 1961 q->merge_requests_fn = ll_merge_requests_fn;
1961 q->prep_rq_fn = NULL; 1962 q->prep_rq_fn = NULL;
1962 q->unplug_fn = generic_unplug_device; 1963 q->unplug_fn = generic_unplug_device;
1963 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); 1964 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
1964 q->queue_lock = lock; 1965 q->queue_lock = lock;
1965 1966
1966 blk_queue_segment_boundary(q, 0xffffffff); 1967 blk_queue_segment_boundary(q, 0xffffffff);
1967 1968
1968 blk_queue_make_request(q, __make_request); 1969 blk_queue_make_request(q, __make_request);
1969 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); 1970 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
1970 1971
1971 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); 1972 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
1972 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); 1973 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
1973 1974
1974 /* 1975 /*
1975 * all done 1976 * all done
1976 */ 1977 */
1977 if (!elevator_init(q, NULL)) { 1978 if (!elevator_init(q, NULL)) {
1978 blk_queue_congestion_threshold(q); 1979 blk_queue_congestion_threshold(q);
1979 return q; 1980 return q;
1980 } 1981 }
1981 1982
1982 blk_put_queue(q); 1983 blk_put_queue(q);
1983 return NULL; 1984 return NULL;
1984 } 1985 }
1985 EXPORT_SYMBOL(blk_init_queue_node); 1986 EXPORT_SYMBOL(blk_init_queue_node);
1986 1987
1987 int blk_get_queue(request_queue_t *q) 1988 int blk_get_queue(request_queue_t *q)
1988 { 1989 {
1989 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { 1990 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
1990 kobject_get(&q->kobj); 1991 kobject_get(&q->kobj);
1991 return 0; 1992 return 0;
1992 } 1993 }
1993 1994
1994 return 1; 1995 return 1;
1995 } 1996 }
1996 1997
1997 EXPORT_SYMBOL(blk_get_queue); 1998 EXPORT_SYMBOL(blk_get_queue);
1998 1999
1999 static inline void blk_free_request(request_queue_t *q, struct request *rq) 2000 static inline void blk_free_request(request_queue_t *q, struct request *rq)
2000 { 2001 {
2001 if (rq->cmd_flags & REQ_ELVPRIV) 2002 if (rq->cmd_flags & REQ_ELVPRIV)
2002 elv_put_request(q, rq); 2003 elv_put_request(q, rq);
2003 mempool_free(rq, q->rq.rq_pool); 2004 mempool_free(rq, q->rq.rq_pool);
2004 } 2005 }
2005 2006
2006 static inline struct request * 2007 static inline struct request *
2007 blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, 2008 blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
2008 int priv, gfp_t gfp_mask) 2009 int priv, gfp_t gfp_mask)
2009 { 2010 {
2010 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 2011 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
2011 2012
2012 if (!rq) 2013 if (!rq)
2013 return NULL; 2014 return NULL;
2014 2015
2015 /* 2016 /*
2016 * first three bits are identical in rq->cmd_flags and bio->bi_rw, 2017 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
2017 * see bio.h and blkdev.h 2018 * see bio.h and blkdev.h
2018 */ 2019 */
2019 rq->cmd_flags = rw; 2020 rq->cmd_flags = rw;
2020 2021
2021 if (priv) { 2022 if (priv) {
2022 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { 2023 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
2023 mempool_free(rq, q->rq.rq_pool); 2024 mempool_free(rq, q->rq.rq_pool);
2024 return NULL; 2025 return NULL;
2025 } 2026 }
2026 rq->cmd_flags |= REQ_ELVPRIV; 2027 rq->cmd_flags |= REQ_ELVPRIV;
2027 } 2028 }
2028 2029
2029 return rq; 2030 return rq;
2030 } 2031 }
2031 2032
2032 /* 2033 /*
2033 * ioc_batching returns true if the ioc is a valid batching request and 2034 * ioc_batching returns true if the ioc is a valid batching request and
2034 * should be given priority access to a request. 2035 * should be given priority access to a request.
2035 */ 2036 */
2036 static inline int ioc_batching(request_queue_t *q, struct io_context *ioc) 2037 static inline int ioc_batching(request_queue_t *q, struct io_context *ioc)
2037 { 2038 {
2038 if (!ioc) 2039 if (!ioc)
2039 return 0; 2040 return 0;
2040 2041
2041 /* 2042 /*
2042 * Make sure the process is able to allocate at least 1 request 2043 * Make sure the process is able to allocate at least 1 request
2043 * even if the batch times out, otherwise we could theoretically 2044 * even if the batch times out, otherwise we could theoretically
2044 * lose wakeups. 2045 * lose wakeups.
2045 */ 2046 */
2046 return ioc->nr_batch_requests == q->nr_batching || 2047 return ioc->nr_batch_requests == q->nr_batching ||
2047 (ioc->nr_batch_requests > 0 2048 (ioc->nr_batch_requests > 0
2048 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); 2049 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
2049 } 2050 }
2050 2051
2051 /* 2052 /*
2052 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This 2053 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
2053 * will cause the process to be a "batcher" on all queues in the system. This 2054 * will cause the process to be a "batcher" on all queues in the system. This
2054 * is the behaviour we want though - once it gets a wakeup it should be given 2055 * is the behaviour we want though - once it gets a wakeup it should be given
2055 * a nice run. 2056 * a nice run.
2056 */ 2057 */
2057 static void ioc_set_batching(request_queue_t *q, struct io_context *ioc) 2058 static void ioc_set_batching(request_queue_t *q, struct io_context *ioc)
2058 { 2059 {
2059 if (!ioc || ioc_batching(q, ioc)) 2060 if (!ioc || ioc_batching(q, ioc))
2060 return; 2061 return;
2061 2062
2062 ioc->nr_batch_requests = q->nr_batching; 2063 ioc->nr_batch_requests = q->nr_batching;
2063 ioc->last_waited = jiffies; 2064 ioc->last_waited = jiffies;
2064 } 2065 }
2065 2066
2066 static void __freed_request(request_queue_t *q, int rw) 2067 static void __freed_request(request_queue_t *q, int rw)
2067 { 2068 {
2068 struct request_list *rl = &q->rq; 2069 struct request_list *rl = &q->rq;
2069 2070
2070 if (rl->count[rw] < queue_congestion_off_threshold(q)) 2071 if (rl->count[rw] < queue_congestion_off_threshold(q))
2071 clear_queue_congested(q, rw); 2072 clear_queue_congested(q, rw);
2072 2073
2073 if (rl->count[rw] + 1 <= q->nr_requests) { 2074 if (rl->count[rw] + 1 <= q->nr_requests) {
2074 if (waitqueue_active(&rl->wait[rw])) 2075 if (waitqueue_active(&rl->wait[rw]))
2075 wake_up(&rl->wait[rw]); 2076 wake_up(&rl->wait[rw]);
2076 2077
2077 blk_clear_queue_full(q, rw); 2078 blk_clear_queue_full(q, rw);
2078 } 2079 }
2079 } 2080 }
2080 2081
2081 /* 2082 /*
2082 * A request has just been released. Account for it, update the full and 2083 * A request has just been released. Account for it, update the full and
2083 * congestion status, wake up any waiters. Called under q->queue_lock. 2084 * congestion status, wake up any waiters. Called under q->queue_lock.
2084 */ 2085 */
2085 static void freed_request(request_queue_t *q, int rw, int priv) 2086 static void freed_request(request_queue_t *q, int rw, int priv)
2086 { 2087 {
2087 struct request_list *rl = &q->rq; 2088 struct request_list *rl = &q->rq;
2088 2089
2089 rl->count[rw]--; 2090 rl->count[rw]--;
2090 if (priv) 2091 if (priv)
2091 rl->elvpriv--; 2092 rl->elvpriv--;
2092 2093
2093 __freed_request(q, rw); 2094 __freed_request(q, rw);
2094 2095
2095 if (unlikely(rl->starved[rw ^ 1])) 2096 if (unlikely(rl->starved[rw ^ 1]))
2096 __freed_request(q, rw ^ 1); 2097 __freed_request(q, rw ^ 1);
2097 } 2098 }
2098 2099
2099 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) 2100 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
2100 /* 2101 /*
2101 * Get a free request, queue_lock must be held. 2102 * Get a free request, queue_lock must be held.
2102 * Returns NULL on failure, with queue_lock held. 2103 * Returns NULL on failure, with queue_lock held.
2103 * Returns !NULL on success, with queue_lock *not held*. 2104 * Returns !NULL on success, with queue_lock *not held*.
2104 */ 2105 */
2105 static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, 2106 static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
2106 gfp_t gfp_mask) 2107 gfp_t gfp_mask)
2107 { 2108 {
2108 struct request *rq = NULL; 2109 struct request *rq = NULL;
2109 struct request_list *rl = &q->rq; 2110 struct request_list *rl = &q->rq;
2110 struct io_context *ioc = NULL; 2111 struct io_context *ioc = NULL;
2111 int may_queue, priv; 2112 int may_queue, priv;
2112 2113
2113 may_queue = elv_may_queue(q, rw, bio); 2114 may_queue = elv_may_queue(q, rw, bio);
2114 if (may_queue == ELV_MQUEUE_NO) 2115 if (may_queue == ELV_MQUEUE_NO)
2115 goto rq_starved; 2116 goto rq_starved;
2116 2117
2117 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { 2118 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
2118 if (rl->count[rw]+1 >= q->nr_requests) { 2119 if (rl->count[rw]+1 >= q->nr_requests) {
2119 ioc = current_io_context(GFP_ATOMIC); 2120 ioc = current_io_context(GFP_ATOMIC);
2120 /* 2121 /*
2121 * The queue will fill after this allocation, so set 2122 * The queue will fill after this allocation, so set
2122 * it as full, and mark this process as "batching". 2123 * it as full, and mark this process as "batching".
2123 * This process will be allowed to complete a batch of 2124 * This process will be allowed to complete a batch of
2124 * requests, others will be blocked. 2125 * requests, others will be blocked.
2125 */ 2126 */
2126 if (!blk_queue_full(q, rw)) { 2127 if (!blk_queue_full(q, rw)) {
2127 ioc_set_batching(q, ioc); 2128 ioc_set_batching(q, ioc);
2128 blk_set_queue_full(q, rw); 2129 blk_set_queue_full(q, rw);
2129 } else { 2130 } else {
2130 if (may_queue != ELV_MQUEUE_MUST 2131 if (may_queue != ELV_MQUEUE_MUST
2131 && !ioc_batching(q, ioc)) { 2132 && !ioc_batching(q, ioc)) {
2132 /* 2133 /*
2133 * The queue is full and the allocating 2134 * The queue is full and the allocating
2134 * process is not a "batcher", and not 2135 * process is not a "batcher", and not
2135 * exempted by the IO scheduler 2136 * exempted by the IO scheduler
2136 */ 2137 */
2137 goto out; 2138 goto out;
2138 } 2139 }
2139 } 2140 }
2140 } 2141 }
2141 set_queue_congested(q, rw); 2142 set_queue_congested(q, rw);
2142 } 2143 }
2143 2144
2144 /* 2145 /*
2145 * Only allow batching queuers to allocate up to 50% over the defined 2146 * Only allow batching queuers to allocate up to 50% over the defined
2146 * limit of requests, otherwise we could have thousands of requests 2147 * limit of requests, otherwise we could have thousands of requests
2147 * allocated with any setting of ->nr_requests 2148 * allocated with any setting of ->nr_requests
2148 */ 2149 */
2149 if (rl->count[rw] >= (3 * q->nr_requests / 2)) 2150 if (rl->count[rw] >= (3 * q->nr_requests / 2))
2150 goto out; 2151 goto out;
2151 2152
2152 rl->count[rw]++; 2153 rl->count[rw]++;
2153 rl->starved[rw] = 0; 2154 rl->starved[rw] = 0;
2154 2155
2155 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 2156 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
2156 if (priv) 2157 if (priv)
2157 rl->elvpriv++; 2158 rl->elvpriv++;
2158 2159
2159 spin_unlock_irq(q->queue_lock); 2160 spin_unlock_irq(q->queue_lock);
2160 2161
2161 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); 2162 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
2162 if (unlikely(!rq)) { 2163 if (unlikely(!rq)) {
2163 /* 2164 /*
2164 * Allocation failed presumably due to memory. Undo anything 2165 * Allocation failed presumably due to memory. Undo anything
2165 * we might have messed up. 2166 * we might have messed up.
2166 * 2167 *
2167 * Allocating task should really be put onto the front of the 2168 * Allocating task should really be put onto the front of the
2168 * wait queue, but this is pretty rare. 2169 * wait queue, but this is pretty rare.
2169 */ 2170 */
2170 spin_lock_irq(q->queue_lock); 2171 spin_lock_irq(q->queue_lock);
2171 freed_request(q, rw, priv); 2172 freed_request(q, rw, priv);
2172 2173
2173 /* 2174 /*
2174 * in the very unlikely event that allocation failed and no 2175 * in the very unlikely event that allocation failed and no
2175 * requests for this direction was pending, mark us starved 2176 * requests for this direction was pending, mark us starved
2176 * so that freeing of a request in the other direction will 2177 * so that freeing of a request in the other direction will
2177 * notice us. another possible fix would be to split the 2178 * notice us. another possible fix would be to split the
2178 * rq mempool into READ and WRITE 2179 * rq mempool into READ and WRITE
2179 */ 2180 */
2180 rq_starved: 2181 rq_starved:
2181 if (unlikely(rl->count[rw] == 0)) 2182 if (unlikely(rl->count[rw] == 0))
2182 rl->starved[rw] = 1; 2183 rl->starved[rw] = 1;
2183 2184
2184 goto out; 2185 goto out;
2185 } 2186 }
2186 2187
2187 /* 2188 /*
2188 * ioc may be NULL here, and ioc_batching will be false. That's 2189 * ioc may be NULL here, and ioc_batching will be false. That's
2189 * OK, if the queue is under the request limit then requests need 2190 * OK, if the queue is under the request limit then requests need
2190 * not count toward the nr_batch_requests limit. There will always 2191 * not count toward the nr_batch_requests limit. There will always
2191 * be some limit enforced by BLK_BATCH_TIME. 2192 * be some limit enforced by BLK_BATCH_TIME.
2192 */ 2193 */
2193 if (ioc_batching(q, ioc)) 2194 if (ioc_batching(q, ioc))
2194 ioc->nr_batch_requests--; 2195 ioc->nr_batch_requests--;
2195 2196
2196 rq_init(q, rq); 2197 rq_init(q, rq);
2197 rq->rl = rl; 2198 rq->rl = rl;
2198 2199
2199 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); 2200 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
2200 out: 2201 out:
2201 return rq; 2202 return rq;
2202 } 2203 }
2203 2204
2204 /* 2205 /*
2205 * No available requests for this queue, unplug the device and wait for some 2206 * No available requests for this queue, unplug the device and wait for some
2206 * requests to become available. 2207 * requests to become available.
2207 * 2208 *
2208 * Called with q->queue_lock held, and returns with it unlocked. 2209 * Called with q->queue_lock held, and returns with it unlocked.
2209 */ 2210 */
2210 static struct request *get_request_wait(request_queue_t *q, int rw, 2211 static struct request *get_request_wait(request_queue_t *q, int rw,
2211 struct bio *bio) 2212 struct bio *bio)
2212 { 2213 {
2213 struct request *rq; 2214 struct request *rq;
2214 2215
2215 rq = get_request(q, rw, bio, GFP_NOIO); 2216 rq = get_request(q, rw, bio, GFP_NOIO);
2216 while (!rq) { 2217 while (!rq) {
2217 DEFINE_WAIT(wait); 2218 DEFINE_WAIT(wait);
2218 struct request_list *rl = &q->rq; 2219 struct request_list *rl = &q->rq;
2219 2220
2220 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 2221 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
2221 TASK_UNINTERRUPTIBLE); 2222 TASK_UNINTERRUPTIBLE);
2222 2223
2223 rq = get_request(q, rw, bio, GFP_NOIO); 2224 rq = get_request(q, rw, bio, GFP_NOIO);
2224 2225
2225 if (!rq) { 2226 if (!rq) {
2226 struct io_context *ioc; 2227 struct io_context *ioc;
2227 2228
2228 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); 2229 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
2229 2230
2230 __generic_unplug_device(q); 2231 __generic_unplug_device(q);
2231 spin_unlock_irq(q->queue_lock); 2232 spin_unlock_irq(q->queue_lock);
2232 io_schedule(); 2233 io_schedule();
2233 2234
2234 /* 2235 /*
2235 * After sleeping, we become a "batching" process and 2236 * After sleeping, we become a "batching" process and
2236 * will be able to allocate at least one request, and 2237 * will be able to allocate at least one request, and
2237 * up to a big batch of them for a small period time. 2238 * up to a big batch of them for a small period time.
2238 * See ioc_batching, ioc_set_batching 2239 * See ioc_batching, ioc_set_batching
2239 */ 2240 */
2240 ioc = current_io_context(GFP_NOIO); 2241 ioc = current_io_context(GFP_NOIO);
2241 ioc_set_batching(q, ioc); 2242 ioc_set_batching(q, ioc);
2242 2243
2243 spin_lock_irq(q->queue_lock); 2244 spin_lock_irq(q->queue_lock);
2244 } 2245 }
2245 finish_wait(&rl->wait[rw], &wait); 2246 finish_wait(&rl->wait[rw], &wait);
2246 } 2247 }
2247 2248
2248 return rq; 2249 return rq;
2249 } 2250 }
2250 2251
2251 struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) 2252 struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
2252 { 2253 {
2253 struct request *rq; 2254 struct request *rq;
2254 2255
2255 BUG_ON(rw != READ && rw != WRITE); 2256 BUG_ON(rw != READ && rw != WRITE);
2256 2257
2257 spin_lock_irq(q->queue_lock); 2258 spin_lock_irq(q->queue_lock);
2258 if (gfp_mask & __GFP_WAIT) { 2259 if (gfp_mask & __GFP_WAIT) {
2259 rq = get_request_wait(q, rw, NULL); 2260 rq = get_request_wait(q, rw, NULL);
2260 } else { 2261 } else {
2261 rq = get_request(q, rw, NULL, gfp_mask); 2262 rq = get_request(q, rw, NULL, gfp_mask);
2262 if (!rq) 2263 if (!rq)
2263 spin_unlock_irq(q->queue_lock); 2264 spin_unlock_irq(q->queue_lock);
2264 } 2265 }
2265 /* q->queue_lock is unlocked at this point */ 2266 /* q->queue_lock is unlocked at this point */
2266 2267
2267 return rq; 2268 return rq;
2268 } 2269 }
2269 EXPORT_SYMBOL(blk_get_request); 2270 EXPORT_SYMBOL(blk_get_request);
2270 2271
2271 /** 2272 /**
2272 * blk_requeue_request - put a request back on queue 2273 * blk_requeue_request - put a request back on queue
2273 * @q: request queue where request should be inserted 2274 * @q: request queue where request should be inserted
2274 * @rq: request to be inserted 2275 * @rq: request to be inserted
2275 * 2276 *
2276 * Description: 2277 * Description:
2277 * Drivers often keep queueing requests until the hardware cannot accept 2278 * Drivers often keep queueing requests until the hardware cannot accept
2278 * more, when that condition happens we need to put the request back 2279 * more, when that condition happens we need to put the request back
2279 * on the queue. Must be called with queue lock held. 2280 * on the queue. Must be called with queue lock held.
2280 */ 2281 */
2281 void blk_requeue_request(request_queue_t *q, struct request *rq) 2282 void blk_requeue_request(request_queue_t *q, struct request *rq)
2282 { 2283 {
2283 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 2284 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
2284 2285
2285 if (blk_rq_tagged(rq)) 2286 if (blk_rq_tagged(rq))
2286 blk_queue_end_tag(q, rq); 2287 blk_queue_end_tag(q, rq);
2287 2288
2288 elv_requeue_request(q, rq); 2289 elv_requeue_request(q, rq);
2289 } 2290 }
2290 2291
2291 EXPORT_SYMBOL(blk_requeue_request); 2292 EXPORT_SYMBOL(blk_requeue_request);
2292 2293
2293 /** 2294 /**
2294 * blk_insert_request - insert a special request in to a request queue 2295 * blk_insert_request - insert a special request in to a request queue
2295 * @q: request queue where request should be inserted 2296 * @q: request queue where request should be inserted
2296 * @rq: request to be inserted 2297 * @rq: request to be inserted
2297 * @at_head: insert request at head or tail of queue 2298 * @at_head: insert request at head or tail of queue
2298 * @data: private data 2299 * @data: private data
2299 * 2300 *
2300 * Description: 2301 * Description:
2301 * Many block devices need to execute commands asynchronously, so they don't 2302 * Many block devices need to execute commands asynchronously, so they don't
2302 * block the whole kernel from preemption during request execution. This is 2303 * block the whole kernel from preemption during request execution. This is
2303 * accomplished normally by inserting aritficial requests tagged as 2304 * accomplished normally by inserting aritficial requests tagged as
2304 * REQ_SPECIAL in to the corresponding request queue, and letting them be 2305 * REQ_SPECIAL in to the corresponding request queue, and letting them be
2305 * scheduled for actual execution by the request queue. 2306 * scheduled for actual execution by the request queue.
2306 * 2307 *
2307 * We have the option of inserting the head or the tail of the queue. 2308 * We have the option of inserting the head or the tail of the queue.
2308 * Typically we use the tail for new ioctls and so forth. We use the head 2309 * Typically we use the tail for new ioctls and so forth. We use the head
2309 * of the queue for things like a QUEUE_FULL message from a device, or a 2310 * of the queue for things like a QUEUE_FULL message from a device, or a
2310 * host that is unable to accept a particular command. 2311 * host that is unable to accept a particular command.
2311 */ 2312 */
2312 void blk_insert_request(request_queue_t *q, struct request *rq, 2313 void blk_insert_request(request_queue_t *q, struct request *rq,
2313 int at_head, void *data) 2314 int at_head, void *data)
2314 { 2315 {
2315 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2316 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2316 unsigned long flags; 2317 unsigned long flags;
2317 2318
2318 /* 2319 /*
2319 * tell I/O scheduler that this isn't a regular read/write (ie it 2320 * tell I/O scheduler that this isn't a regular read/write (ie it
2320 * must not attempt merges on this) and that it acts as a soft 2321 * must not attempt merges on this) and that it acts as a soft
2321 * barrier 2322 * barrier
2322 */ 2323 */
2323 rq->cmd_type = REQ_TYPE_SPECIAL; 2324 rq->cmd_type = REQ_TYPE_SPECIAL;
2324 rq->cmd_flags |= REQ_SOFTBARRIER; 2325 rq->cmd_flags |= REQ_SOFTBARRIER;
2325 2326
2326 rq->special = data; 2327 rq->special = data;
2327 2328
2328 spin_lock_irqsave(q->queue_lock, flags); 2329 spin_lock_irqsave(q->queue_lock, flags);
2329 2330
2330 /* 2331 /*
2331 * If command is tagged, release the tag 2332 * If command is tagged, release the tag
2332 */ 2333 */
2333 if (blk_rq_tagged(rq)) 2334 if (blk_rq_tagged(rq))
2334 blk_queue_end_tag(q, rq); 2335 blk_queue_end_tag(q, rq);
2335 2336
2336 drive_stat_acct(rq, rq->nr_sectors, 1); 2337 drive_stat_acct(rq, rq->nr_sectors, 1);
2337 __elv_add_request(q, rq, where, 0); 2338 __elv_add_request(q, rq, where, 0);
2338 2339
2339 if (blk_queue_plugged(q)) 2340 if (blk_queue_plugged(q))
2340 __generic_unplug_device(q); 2341 __generic_unplug_device(q);
2341 else 2342 else
2342 q->request_fn(q); 2343 q->request_fn(q);
2343 spin_unlock_irqrestore(q->queue_lock, flags); 2344 spin_unlock_irqrestore(q->queue_lock, flags);
2344 } 2345 }
2345 2346
2346 EXPORT_SYMBOL(blk_insert_request); 2347 EXPORT_SYMBOL(blk_insert_request);
2347 2348
2348 /** 2349 /**
2349 * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage 2350 * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
2350 * @q: request queue where request should be inserted 2351 * @q: request queue where request should be inserted
2351 * @rq: request structure to fill 2352 * @rq: request structure to fill
2352 * @ubuf: the user buffer 2353 * @ubuf: the user buffer
2353 * @len: length of user data 2354 * @len: length of user data
2354 * 2355 *
2355 * Description: 2356 * Description:
2356 * Data will be mapped directly for zero copy io, if possible. Otherwise 2357 * Data will be mapped directly for zero copy io, if possible. Otherwise
2357 * a kernel bounce buffer is used. 2358 * a kernel bounce buffer is used.
2358 * 2359 *
2359 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2360 * A matching blk_rq_unmap_user() must be issued at the end of io, while
2360 * still in process context. 2361 * still in process context.
2361 * 2362 *
2362 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2363 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
2363 * before being submitted to the device, as pages mapped may be out of 2364 * before being submitted to the device, as pages mapped may be out of
2364 * reach. It's the callers responsibility to make sure this happens. The 2365 * reach. It's the callers responsibility to make sure this happens. The
2365 * original bio must be passed back in to blk_rq_unmap_user() for proper 2366 * original bio must be passed back in to blk_rq_unmap_user() for proper
2366 * unmapping. 2367 * unmapping.
2367 */ 2368 */
2368 int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, 2369 int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
2369 unsigned int len) 2370 unsigned int len)
2370 { 2371 {
2371 unsigned long uaddr; 2372 unsigned long uaddr;
2372 struct bio *bio; 2373 struct bio *bio;
2373 int reading; 2374 int reading;
2374 2375
2375 if (len > (q->max_hw_sectors << 9)) 2376 if (len > (q->max_hw_sectors << 9))
2376 return -EINVAL; 2377 return -EINVAL;
2377 if (!len || !ubuf) 2378 if (!len || !ubuf)
2378 return -EINVAL; 2379 return -EINVAL;
2379 2380
2380 reading = rq_data_dir(rq) == READ; 2381 reading = rq_data_dir(rq) == READ;
2381 2382
2382 /* 2383 /*
2383 * if alignment requirement is satisfied, map in user pages for 2384 * if alignment requirement is satisfied, map in user pages for
2384 * direct dma. else, set up kernel bounce buffers 2385 * direct dma. else, set up kernel bounce buffers
2385 */ 2386 */
2386 uaddr = (unsigned long) ubuf; 2387 uaddr = (unsigned long) ubuf;
2387 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) 2388 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
2388 bio = bio_map_user(q, NULL, uaddr, len, reading); 2389 bio = bio_map_user(q, NULL, uaddr, len, reading);
2389 else 2390 else
2390 bio = bio_copy_user(q, uaddr, len, reading); 2391 bio = bio_copy_user(q, uaddr, len, reading);
2391 2392
2392 if (!IS_ERR(bio)) { 2393 if (!IS_ERR(bio)) {
2393 rq->bio = rq->biotail = bio; 2394 rq->bio = rq->biotail = bio;
2394 blk_rq_bio_prep(q, rq, bio); 2395 blk_rq_bio_prep(q, rq, bio);
2395 2396
2396 rq->buffer = rq->data = NULL; 2397 rq->buffer = rq->data = NULL;
2397 rq->data_len = len; 2398 rq->data_len = len;
2398 return 0; 2399 return 0;
2399 } 2400 }
2400 2401
2401 /* 2402 /*
2402 * bio is the err-ptr 2403 * bio is the err-ptr
2403 */ 2404 */
2404 return PTR_ERR(bio); 2405 return PTR_ERR(bio);
2405 } 2406 }
2406 2407
2407 EXPORT_SYMBOL(blk_rq_map_user); 2408 EXPORT_SYMBOL(blk_rq_map_user);
2408 2409
2409 /** 2410 /**
2410 * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage 2411 * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
2411 * @q: request queue where request should be inserted 2412 * @q: request queue where request should be inserted
2412 * @rq: request to map data to 2413 * @rq: request to map data to
2413 * @iov: pointer to the iovec 2414 * @iov: pointer to the iovec
2414 * @iov_count: number of elements in the iovec 2415 * @iov_count: number of elements in the iovec
2415 * 2416 *
2416 * Description: 2417 * Description:
2417 * Data will be mapped directly for zero copy io, if possible. Otherwise 2418 * Data will be mapped directly for zero copy io, if possible. Otherwise
2418 * a kernel bounce buffer is used. 2419 * a kernel bounce buffer is used.
2419 * 2420 *
2420 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2421 * A matching blk_rq_unmap_user() must be issued at the end of io, while
2421 * still in process context. 2422 * still in process context.
2422 * 2423 *
2423 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2424 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
2424 * before being submitted to the device, as pages mapped may be out of 2425 * before being submitted to the device, as pages mapped may be out of
2425 * reach. It's the callers responsibility to make sure this happens. The 2426 * reach. It's the callers responsibility to make sure this happens. The
2426 * original bio must be passed back in to blk_rq_unmap_user() for proper 2427 * original bio must be passed back in to blk_rq_unmap_user() for proper
2427 * unmapping. 2428 * unmapping.
2428 */ 2429 */
2429 int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, 2430 int blk_rq_map_user_iov(request_queue_t *q, struct request *rq,
2430 struct sg_iovec *iov, int iov_count) 2431 struct sg_iovec *iov, int iov_count)
2431 { 2432 {
2432 struct bio *bio; 2433 struct bio *bio;
2433 2434
2434 if (!iov || iov_count <= 0) 2435 if (!iov || iov_count <= 0)
2435 return -EINVAL; 2436 return -EINVAL;
2436 2437
2437 /* we don't allow misaligned data like bio_map_user() does. If the 2438 /* we don't allow misaligned data like bio_map_user() does. If the
2438 * user is using sg, they're expected to know the alignment constraints 2439 * user is using sg, they're expected to know the alignment constraints
2439 * and respect them accordingly */ 2440 * and respect them accordingly */
2440 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); 2441 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);
2441 if (IS_ERR(bio)) 2442 if (IS_ERR(bio))
2442 return PTR_ERR(bio); 2443 return PTR_ERR(bio);
2443 2444
2444 rq->bio = rq->biotail = bio; 2445 rq->bio = rq->biotail = bio;
2445 blk_rq_bio_prep(q, rq, bio); 2446 blk_rq_bio_prep(q, rq, bio);
2446 rq->buffer = rq->data = NULL; 2447 rq->buffer = rq->data = NULL;
2447 rq->data_len = bio->bi_size; 2448 rq->data_len = bio->bi_size;
2448 return 0; 2449 return 0;
2449 } 2450 }
2450 2451
2451 EXPORT_SYMBOL(blk_rq_map_user_iov); 2452 EXPORT_SYMBOL(blk_rq_map_user_iov);
2452 2453
2453 /** 2454 /**
2454 * blk_rq_unmap_user - unmap a request with user data 2455 * blk_rq_unmap_user - unmap a request with user data
2455 * @bio: bio to be unmapped 2456 * @bio: bio to be unmapped
2456 * @ulen: length of user buffer 2457 * @ulen: length of user buffer
2457 * 2458 *
2458 * Description: 2459 * Description:
2459 * Unmap a bio previously mapped by blk_rq_map_user(). 2460 * Unmap a bio previously mapped by blk_rq_map_user().
2460 */ 2461 */
2461 int blk_rq_unmap_user(struct bio *bio, unsigned int ulen) 2462 int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)
2462 { 2463 {
2463 int ret = 0; 2464 int ret = 0;
2464 2465
2465 if (bio) { 2466 if (bio) {
2466 if (bio_flagged(bio, BIO_USER_MAPPED)) 2467 if (bio_flagged(bio, BIO_USER_MAPPED))
2467 bio_unmap_user(bio); 2468 bio_unmap_user(bio);
2468 else 2469 else
2469 ret = bio_uncopy_user(bio); 2470 ret = bio_uncopy_user(bio);
2470 } 2471 }
2471 2472
2472 return 0; 2473 return 0;
2473 } 2474 }
2474 2475
2475 EXPORT_SYMBOL(blk_rq_unmap_user); 2476 EXPORT_SYMBOL(blk_rq_unmap_user);
2476 2477
2477 /** 2478 /**
2478 * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage 2479 * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
2479 * @q: request queue where request should be inserted 2480 * @q: request queue where request should be inserted
2480 * @rq: request to fill 2481 * @rq: request to fill
2481 * @kbuf: the kernel buffer 2482 * @kbuf: the kernel buffer
2482 * @len: length of user data 2483 * @len: length of user data
2483 * @gfp_mask: memory allocation flags 2484 * @gfp_mask: memory allocation flags
2484 */ 2485 */
2485 int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, 2486 int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
2486 unsigned int len, gfp_t gfp_mask) 2487 unsigned int len, gfp_t gfp_mask)
2487 { 2488 {
2488 struct bio *bio; 2489 struct bio *bio;
2489 2490
2490 if (len > (q->max_hw_sectors << 9)) 2491 if (len > (q->max_hw_sectors << 9))
2491 return -EINVAL; 2492 return -EINVAL;
2492 if (!len || !kbuf) 2493 if (!len || !kbuf)
2493 return -EINVAL; 2494 return -EINVAL;
2494 2495
2495 bio = bio_map_kern(q, kbuf, len, gfp_mask); 2496 bio = bio_map_kern(q, kbuf, len, gfp_mask);
2496 if (IS_ERR(bio)) 2497 if (IS_ERR(bio))
2497 return PTR_ERR(bio); 2498 return PTR_ERR(bio);
2498 2499
2499 if (rq_data_dir(rq) == WRITE) 2500 if (rq_data_dir(rq) == WRITE)
2500 bio->bi_rw |= (1 << BIO_RW); 2501 bio->bi_rw |= (1 << BIO_RW);
2501 2502
2502 rq->bio = rq->biotail = bio; 2503 rq->bio = rq->biotail = bio;
2503 blk_rq_bio_prep(q, rq, bio); 2504 blk_rq_bio_prep(q, rq, bio);
2504 2505
2505 rq->buffer = rq->data = NULL; 2506 rq->buffer = rq->data = NULL;
2506 rq->data_len = len; 2507 rq->data_len = len;
2507 return 0; 2508 return 0;
2508 } 2509 }
2509 2510
2510 EXPORT_SYMBOL(blk_rq_map_kern); 2511 EXPORT_SYMBOL(blk_rq_map_kern);
2511 2512
2512 /** 2513 /**
2513 * blk_execute_rq_nowait - insert a request into queue for execution 2514 * blk_execute_rq_nowait - insert a request into queue for execution
2514 * @q: queue to insert the request in 2515 * @q: queue to insert the request in
2515 * @bd_disk: matching gendisk 2516 * @bd_disk: matching gendisk
2516 * @rq: request to insert 2517 * @rq: request to insert
2517 * @at_head: insert request at head or tail of queue 2518 * @at_head: insert request at head or tail of queue
2518 * @done: I/O completion handler 2519 * @done: I/O completion handler
2519 * 2520 *
2520 * Description: 2521 * Description:
2521 * Insert a fully prepared request at the back of the io scheduler queue 2522 * Insert a fully prepared request at the back of the io scheduler queue
2522 * for execution. Don't wait for completion. 2523 * for execution. Don't wait for completion.
2523 */ 2524 */
2524 void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, 2525 void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2525 struct request *rq, int at_head, 2526 struct request *rq, int at_head,
2526 rq_end_io_fn *done) 2527 rq_end_io_fn *done)
2527 { 2528 {
2528 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2529 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2529 2530
2530 rq->rq_disk = bd_disk; 2531 rq->rq_disk = bd_disk;
2531 rq->cmd_flags |= REQ_NOMERGE; 2532 rq->cmd_flags |= REQ_NOMERGE;
2532 rq->end_io = done; 2533 rq->end_io = done;
2533 WARN_ON(irqs_disabled()); 2534 WARN_ON(irqs_disabled());
2534 spin_lock_irq(q->queue_lock); 2535 spin_lock_irq(q->queue_lock);
2535 __elv_add_request(q, rq, where, 1); 2536 __elv_add_request(q, rq, where, 1);
2536 __generic_unplug_device(q); 2537 __generic_unplug_device(q);
2537 spin_unlock_irq(q->queue_lock); 2538 spin_unlock_irq(q->queue_lock);
2538 } 2539 }
2539 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); 2540 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2540 2541
2541 /** 2542 /**
2542 * blk_execute_rq - insert a request into queue for execution 2543 * blk_execute_rq - insert a request into queue for execution
2543 * @q: queue to insert the request in 2544 * @q: queue to insert the request in
2544 * @bd_disk: matching gendisk 2545 * @bd_disk: matching gendisk
2545 * @rq: request to insert 2546 * @rq: request to insert
2546 * @at_head: insert request at head or tail of queue 2547 * @at_head: insert request at head or tail of queue
2547 * 2548 *
2548 * Description: 2549 * Description:
2549 * Insert a fully prepared request at the back of the io scheduler queue 2550 * Insert a fully prepared request at the back of the io scheduler queue
2550 * for execution and wait for completion. 2551 * for execution and wait for completion.
2551 */ 2552 */
2552 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, 2553 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2553 struct request *rq, int at_head) 2554 struct request *rq, int at_head)
2554 { 2555 {
2555 DECLARE_COMPLETION_ONSTACK(wait); 2556 DECLARE_COMPLETION_ONSTACK(wait);
2556 char sense[SCSI_SENSE_BUFFERSIZE]; 2557 char sense[SCSI_SENSE_BUFFERSIZE];
2557 int err = 0; 2558 int err = 0;
2558 2559
2559 /* 2560 /*
2560 * we need an extra reference to the request, so we can look at 2561 * we need an extra reference to the request, so we can look at
2561 * it after io completion 2562 * it after io completion
2562 */ 2563 */
2563 rq->ref_count++; 2564 rq->ref_count++;
2564 2565
2565 if (!rq->sense) { 2566 if (!rq->sense) {
2566 memset(sense, 0, sizeof(sense)); 2567 memset(sense, 0, sizeof(sense));
2567 rq->sense = sense; 2568 rq->sense = sense;
2568 rq->sense_len = 0; 2569 rq->sense_len = 0;
2569 } 2570 }
2570 2571
2571 rq->waiting = &wait; 2572 rq->waiting = &wait;
2572 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); 2573 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2573 wait_for_completion(&wait); 2574 wait_for_completion(&wait);
2574 rq->waiting = NULL; 2575 rq->waiting = NULL;
2575 2576
2576 if (rq->errors) 2577 if (rq->errors)
2577 err = -EIO; 2578 err = -EIO;
2578 2579
2579 return err; 2580 return err;
2580 } 2581 }
2581 2582
2582 EXPORT_SYMBOL(blk_execute_rq); 2583 EXPORT_SYMBOL(blk_execute_rq);
2583 2584
2584 /** 2585 /**
2585 * blkdev_issue_flush - queue a flush 2586 * blkdev_issue_flush - queue a flush
2586 * @bdev: blockdev to issue flush for 2587 * @bdev: blockdev to issue flush for
2587 * @error_sector: error sector 2588 * @error_sector: error sector
2588 * 2589 *
2589 * Description: 2590 * Description:
2590 * Issue a flush for the block device in question. Caller can supply 2591 * Issue a flush for the block device in question. Caller can supply
2591 * room for storing the error offset in case of a flush error, if they 2592 * room for storing the error offset in case of a flush error, if they
2592 * wish to. Caller must run wait_for_completion() on its own. 2593 * wish to. Caller must run wait_for_completion() on its own.
2593 */ 2594 */
2594 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) 2595 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
2595 { 2596 {
2596 request_queue_t *q; 2597 request_queue_t *q;
2597 2598
2598 if (bdev->bd_disk == NULL) 2599 if (bdev->bd_disk == NULL)
2599 return -ENXIO; 2600 return -ENXIO;
2600 2601
2601 q = bdev_get_queue(bdev); 2602 q = bdev_get_queue(bdev);
2602 if (!q) 2603 if (!q)
2603 return -ENXIO; 2604 return -ENXIO;
2604 if (!q->issue_flush_fn) 2605 if (!q->issue_flush_fn)
2605 return -EOPNOTSUPP; 2606 return -EOPNOTSUPP;
2606 2607
2607 return q->issue_flush_fn(q, bdev->bd_disk, error_sector); 2608 return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
2608 } 2609 }
2609 2610
2610 EXPORT_SYMBOL(blkdev_issue_flush); 2611 EXPORT_SYMBOL(blkdev_issue_flush);
2611 2612
2612 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) 2613 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
2613 { 2614 {
2614 int rw = rq_data_dir(rq); 2615 int rw = rq_data_dir(rq);
2615 2616
2616 if (!blk_fs_request(rq) || !rq->rq_disk) 2617 if (!blk_fs_request(rq) || !rq->rq_disk)
2617 return; 2618 return;
2618 2619
2619 if (!new_io) { 2620 if (!new_io) {
2620 __disk_stat_inc(rq->rq_disk, merges[rw]); 2621 __disk_stat_inc(rq->rq_disk, merges[rw]);
2621 } else { 2622 } else {
2622 disk_round_stats(rq->rq_disk); 2623 disk_round_stats(rq->rq_disk);
2623 rq->rq_disk->in_flight++; 2624 rq->rq_disk->in_flight++;
2624 } 2625 }
2625 } 2626 }
2626 2627
2627 /* 2628 /*
2628 * add-request adds a request to the linked list. 2629 * add-request adds a request to the linked list.
2629 * queue lock is held and interrupts disabled, as we muck with the 2630 * queue lock is held and interrupts disabled, as we muck with the
2630 * request queue list. 2631 * request queue list.
2631 */ 2632 */
2632 static inline void add_request(request_queue_t * q, struct request * req) 2633 static inline void add_request(request_queue_t * q, struct request * req)
2633 { 2634 {
2634 drive_stat_acct(req, req->nr_sectors, 1); 2635 drive_stat_acct(req, req->nr_sectors, 1);
2635 2636
2636 if (q->activity_fn) 2637 if (q->activity_fn)
2637 q->activity_fn(q->activity_data, rq_data_dir(req)); 2638 q->activity_fn(q->activity_data, rq_data_dir(req));
2638 2639
2639 /* 2640 /*
2640 * elevator indicated where it wants this request to be 2641 * elevator indicated where it wants this request to be
2641 * inserted at elevator_merge time 2642 * inserted at elevator_merge time
2642 */ 2643 */
2643 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 2644 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
2644 } 2645 }
2645 2646
2646 /* 2647 /*
2647 * disk_round_stats() - Round off the performance stats on a struct 2648 * disk_round_stats() - Round off the performance stats on a struct
2648 * disk_stats. 2649 * disk_stats.
2649 * 2650 *
2650 * The average IO queue length and utilisation statistics are maintained 2651 * The average IO queue length and utilisation statistics are maintained
2651 * by observing the current state of the queue length and the amount of 2652 * by observing the current state of the queue length and the amount of
2652 * time it has been in this state for. 2653 * time it has been in this state for.
2653 * 2654 *
2654 * Normally, that accounting is done on IO completion, but that can result 2655 * Normally, that accounting is done on IO completion, but that can result
2655 * in more than a second's worth of IO being accounted for within any one 2656 * in more than a second's worth of IO being accounted for within any one
2656 * second, leading to >100% utilisation. To deal with that, we call this 2657 * second, leading to >100% utilisation. To deal with that, we call this
2657 * function to do a round-off before returning the results when reading 2658 * function to do a round-off before returning the results when reading
2658 * /proc/diskstats. This accounts immediately for all queue usage up to 2659 * /proc/diskstats. This accounts immediately for all queue usage up to
2659 * the current jiffies and restarts the counters again. 2660 * the current jiffies and restarts the counters again.
2660 */ 2661 */
2661 void disk_round_stats(struct gendisk *disk) 2662 void disk_round_stats(struct gendisk *disk)
2662 { 2663 {
2663 unsigned long now = jiffies; 2664 unsigned long now = jiffies;
2664 2665
2665 if (now == disk->stamp) 2666 if (now == disk->stamp)
2666 return; 2667 return;
2667 2668
2668 if (disk->in_flight) { 2669 if (disk->in_flight) {
2669 __disk_stat_add(disk, time_in_queue, 2670 __disk_stat_add(disk, time_in_queue,
2670 disk->in_flight * (now - disk->stamp)); 2671 disk->in_flight * (now - disk->stamp));
2671 __disk_stat_add(disk, io_ticks, (now - disk->stamp)); 2672 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
2672 } 2673 }
2673 disk->stamp = now; 2674 disk->stamp = now;
2674 } 2675 }
2675 2676
2676 EXPORT_SYMBOL_GPL(disk_round_stats); 2677 EXPORT_SYMBOL_GPL(disk_round_stats);
2677 2678
2678 /* 2679 /*
2679 * queue lock must be held 2680 * queue lock must be held
2680 */ 2681 */
2681 void __blk_put_request(request_queue_t *q, struct request *req) 2682 void __blk_put_request(request_queue_t *q, struct request *req)
2682 { 2683 {
2683 struct request_list *rl = req->rl; 2684 struct request_list *rl = req->rl;
2684 2685
2685 if (unlikely(!q)) 2686 if (unlikely(!q))
2686 return; 2687 return;
2687 if (unlikely(--req->ref_count)) 2688 if (unlikely(--req->ref_count))
2688 return; 2689 return;
2689 2690
2690 elv_completed_request(q, req); 2691 elv_completed_request(q, req);
2691 2692
2692 req->rq_status = RQ_INACTIVE; 2693 req->rq_status = RQ_INACTIVE;
2693 req->rl = NULL; 2694 req->rl = NULL;
2694 2695
2695 /* 2696 /*
2696 * Request may not have originated from ll_rw_blk. if not, 2697 * Request may not have originated from ll_rw_blk. if not,
2697 * it didn't come out of our reserved rq pools 2698 * it didn't come out of our reserved rq pools
2698 */ 2699 */
2699 if (rl) { 2700 if (rl) {
2700 int rw = rq_data_dir(req); 2701 int rw = rq_data_dir(req);
2701 int priv = req->cmd_flags & REQ_ELVPRIV; 2702 int priv = req->cmd_flags & REQ_ELVPRIV;
2702 2703
2703 BUG_ON(!list_empty(&req->queuelist)); 2704 BUG_ON(!list_empty(&req->queuelist));
2704 BUG_ON(!hlist_unhashed(&req->hash)); 2705 BUG_ON(!hlist_unhashed(&req->hash));
2705 2706
2706 blk_free_request(q, req); 2707 blk_free_request(q, req);
2707 freed_request(q, rw, priv); 2708 freed_request(q, rw, priv);
2708 } 2709 }
2709 } 2710 }
2710 2711
2711 EXPORT_SYMBOL_GPL(__blk_put_request); 2712 EXPORT_SYMBOL_GPL(__blk_put_request);
2712 2713
2713 void blk_put_request(struct request *req) 2714 void blk_put_request(struct request *req)
2714 { 2715 {
2715 unsigned long flags; 2716 unsigned long flags;
2716 request_queue_t *q = req->q; 2717 request_queue_t *q = req->q;
2717 2718
2718 /* 2719 /*
2719 * Gee, IDE calls in w/ NULL q. Fix IDE and remove the 2720 * Gee, IDE calls in w/ NULL q. Fix IDE and remove the
2720 * following if (q) test. 2721 * following if (q) test.
2721 */ 2722 */
2722 if (q) { 2723 if (q) {
2723 spin_lock_irqsave(q->queue_lock, flags); 2724 spin_lock_irqsave(q->queue_lock, flags);
2724 __blk_put_request(q, req); 2725 __blk_put_request(q, req);
2725 spin_unlock_irqrestore(q->queue_lock, flags); 2726 spin_unlock_irqrestore(q->queue_lock, flags);
2726 } 2727 }
2727 } 2728 }
2728 2729
2729 EXPORT_SYMBOL(blk_put_request); 2730 EXPORT_SYMBOL(blk_put_request);
2730 2731
2731 /** 2732 /**
2732 * blk_end_sync_rq - executes a completion event on a request 2733 * blk_end_sync_rq - executes a completion event on a request
2733 * @rq: request to complete 2734 * @rq: request to complete
2734 * @error: end io status of the request 2735 * @error: end io status of the request
2735 */ 2736 */
2736 void blk_end_sync_rq(struct request *rq, int error) 2737 void blk_end_sync_rq(struct request *rq, int error)
2737 { 2738 {
2738 struct completion *waiting = rq->waiting; 2739 struct completion *waiting = rq->waiting;
2739 2740
2740 rq->waiting = NULL; 2741 rq->waiting = NULL;
2741 __blk_put_request(rq->q, rq); 2742 __blk_put_request(rq->q, rq);
2742 2743
2743 /* 2744 /*
2744 * complete last, if this is a stack request the process (and thus 2745 * complete last, if this is a stack request the process (and thus
2745 * the rq pointer) could be invalid right after this complete() 2746 * the rq pointer) could be invalid right after this complete()
2746 */ 2747 */
2747 complete(waiting); 2748 complete(waiting);
2748 } 2749 }
2749 EXPORT_SYMBOL(blk_end_sync_rq); 2750 EXPORT_SYMBOL(blk_end_sync_rq);
2750 2751
2751 /** 2752 /**
2752 * blk_congestion_wait - wait for a queue to become uncongested 2753 * blk_congestion_wait - wait for a queue to become uncongested
2753 * @rw: READ or WRITE 2754 * @rw: READ or WRITE
2754 * @timeout: timeout in jiffies 2755 * @timeout: timeout in jiffies
2755 * 2756 *
2756 * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion. 2757 * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
2757 * If no queues are congested then just wait for the next request to be 2758 * If no queues are congested then just wait for the next request to be
2758 * returned. 2759 * returned.
2759 */ 2760 */
2760 long blk_congestion_wait(int rw, long timeout) 2761 long blk_congestion_wait(int rw, long timeout)
2761 { 2762 {
2762 long ret; 2763 long ret;
2763 DEFINE_WAIT(wait); 2764 DEFINE_WAIT(wait);
2764 wait_queue_head_t *wqh = &congestion_wqh[rw]; 2765 wait_queue_head_t *wqh = &congestion_wqh[rw];
2765 2766
2766 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); 2767 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
2767 ret = io_schedule_timeout(timeout); 2768 ret = io_schedule_timeout(timeout);
2768 finish_wait(wqh, &wait); 2769 finish_wait(wqh, &wait);
2769 return ret; 2770 return ret;
2770 } 2771 }
2771 2772
2772 EXPORT_SYMBOL(blk_congestion_wait); 2773 EXPORT_SYMBOL(blk_congestion_wait);
2773 2774
2774 /** 2775 /**
2775 * blk_congestion_end - wake up sleepers on a congestion queue 2776 * blk_congestion_end - wake up sleepers on a congestion queue
2776 * @rw: READ or WRITE 2777 * @rw: READ or WRITE
2777 */ 2778 */
2778 void blk_congestion_end(int rw) 2779 void blk_congestion_end(int rw)
2779 { 2780 {
2780 wait_queue_head_t *wqh = &congestion_wqh[rw]; 2781 wait_queue_head_t *wqh = &congestion_wqh[rw];
2781 2782
2782 if (waitqueue_active(wqh)) 2783 if (waitqueue_active(wqh))
2783 wake_up(wqh); 2784 wake_up(wqh);
2784 } 2785 }
2785 2786
2786 /* 2787 /*
2787 * Has to be called with the request spinlock acquired 2788 * Has to be called with the request spinlock acquired
2788 */ 2789 */
2789 static int attempt_merge(request_queue_t *q, struct request *req, 2790 static int attempt_merge(request_queue_t *q, struct request *req,
2790 struct request *next) 2791 struct request *next)
2791 { 2792 {
2792 if (!rq_mergeable(req) || !rq_mergeable(next)) 2793 if (!rq_mergeable(req) || !rq_mergeable(next))
2793 return 0; 2794 return 0;
2794 2795
2795 /* 2796 /*
2796 * not contiguous 2797 * not contiguous
2797 */ 2798 */
2798 if (req->sector + req->nr_sectors != next->sector) 2799 if (req->sector + req->nr_sectors != next->sector)
2799 return 0; 2800 return 0;
2800 2801
2801 if (rq_data_dir(req) != rq_data_dir(next) 2802 if (rq_data_dir(req) != rq_data_dir(next)
2802 || req->rq_disk != next->rq_disk 2803 || req->rq_disk != next->rq_disk
2803 || next->waiting || next->special) 2804 || next->waiting || next->special)
2804 return 0; 2805 return 0;
2805 2806
2806 /* 2807 /*
2807 * If we are allowed to merge, then append bio list 2808 * If we are allowed to merge, then append bio list
2808 * from next to rq and release next. merge_requests_fn 2809 * from next to rq and release next. merge_requests_fn
2809 * will have updated segment counts, update sector 2810 * will have updated segment counts, update sector
2810 * counts here. 2811 * counts here.
2811 */ 2812 */
2812 if (!q->merge_requests_fn(q, req, next)) 2813 if (!q->merge_requests_fn(q, req, next))
2813 return 0; 2814 return 0;
2814 2815
2815 /* 2816 /*
2816 * At this point we have either done a back merge 2817 * At this point we have either done a back merge
2817 * or front merge. We need the smaller start_time of 2818 * or front merge. We need the smaller start_time of
2818 * the merged requests to be the current request 2819 * the merged requests to be the current request
2819 * for accounting purposes. 2820 * for accounting purposes.
2820 */ 2821 */
2821 if (time_after(req->start_time, next->start_time)) 2822 if (time_after(req->start_time, next->start_time))
2822 req->start_time = next->start_time; 2823 req->start_time = next->start_time;
2823 2824
2824 req->biotail->bi_next = next->bio; 2825 req->biotail->bi_next = next->bio;
2825 req->biotail = next->biotail; 2826 req->biotail = next->biotail;
2826 2827
2827 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; 2828 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
2828 2829
2829 elv_merge_requests(q, req, next); 2830 elv_merge_requests(q, req, next);
2830 2831
2831 if (req->rq_disk) { 2832 if (req->rq_disk) {
2832 disk_round_stats(req->rq_disk); 2833 disk_round_stats(req->rq_disk);
2833 req->rq_disk->in_flight--; 2834 req->rq_disk->in_flight--;
2834 } 2835 }
2835 2836
2836 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 2837 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
2837 2838
2838 __blk_put_request(q, next); 2839 __blk_put_request(q, next);
2839 return 1; 2840 return 1;
2840 } 2841 }
2841 2842
2842 static inline int attempt_back_merge(request_queue_t *q, struct request *rq) 2843 static inline int attempt_back_merge(request_queue_t *q, struct request *rq)
2843 { 2844 {
2844 struct request *next = elv_latter_request(q, rq); 2845 struct request *next = elv_latter_request(q, rq);
2845 2846
2846 if (next) 2847 if (next)
2847 return attempt_merge(q, rq, next); 2848 return attempt_merge(q, rq, next);
2848 2849
2849 return 0; 2850 return 0;
2850 } 2851 }
2851 2852
2852 static inline int attempt_front_merge(request_queue_t *q, struct request *rq) 2853 static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
2853 { 2854 {
2854 struct request *prev = elv_former_request(q, rq); 2855 struct request *prev = elv_former_request(q, rq);
2855 2856
2856 if (prev) 2857 if (prev)
2857 return attempt_merge(q, prev, rq); 2858 return attempt_merge(q, prev, rq);
2858 2859
2859 return 0; 2860 return 0;
2860 } 2861 }
2861 2862
2862 static void init_request_from_bio(struct request *req, struct bio *bio) 2863 static void init_request_from_bio(struct request *req, struct bio *bio)
2863 { 2864 {
2864 req->cmd_type = REQ_TYPE_FS; 2865 req->cmd_type = REQ_TYPE_FS;
2865 2866
2866 /* 2867 /*
2867 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 2868 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2868 */ 2869 */
2869 if (bio_rw_ahead(bio) || bio_failfast(bio)) 2870 if (bio_rw_ahead(bio) || bio_failfast(bio))
2870 req->cmd_flags |= REQ_FAILFAST; 2871 req->cmd_flags |= REQ_FAILFAST;
2871 2872
2872 /* 2873 /*
2873 * REQ_BARRIER implies no merging, but lets make it explicit 2874 * REQ_BARRIER implies no merging, but lets make it explicit
2874 */ 2875 */
2875 if (unlikely(bio_barrier(bio))) 2876 if (unlikely(bio_barrier(bio)))
2876 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 2877 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2877 2878
2878 if (bio_sync(bio)) 2879 if (bio_sync(bio))
2879 req->cmd_flags |= REQ_RW_SYNC; 2880 req->cmd_flags |= REQ_RW_SYNC;
2880 2881
2881 req->errors = 0; 2882 req->errors = 0;
2882 req->hard_sector = req->sector = bio->bi_sector; 2883 req->hard_sector = req->sector = bio->bi_sector;
2883 req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); 2884 req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
2884 req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio); 2885 req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);
2885 req->nr_phys_segments = bio_phys_segments(req->q, bio); 2886 req->nr_phys_segments = bio_phys_segments(req->q, bio);
2886 req->nr_hw_segments = bio_hw_segments(req->q, bio); 2887 req->nr_hw_segments = bio_hw_segments(req->q, bio);
2887 req->buffer = bio_data(bio); /* see ->buffer comment above */ 2888 req->buffer = bio_data(bio); /* see ->buffer comment above */
2888 req->waiting = NULL; 2889 req->waiting = NULL;
2889 req->bio = req->biotail = bio; 2890 req->bio = req->biotail = bio;
2890 req->ioprio = bio_prio(bio); 2891 req->ioprio = bio_prio(bio);
2891 req->rq_disk = bio->bi_bdev->bd_disk; 2892 req->rq_disk = bio->bi_bdev->bd_disk;
2892 req->start_time = jiffies; 2893 req->start_time = jiffies;
2893 } 2894 }
2894 2895
2895 static int __make_request(request_queue_t *q, struct bio *bio) 2896 static int __make_request(request_queue_t *q, struct bio *bio)
2896 { 2897 {
2897 struct request *req; 2898 struct request *req;
2898 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; 2899 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
2899 unsigned short prio; 2900 unsigned short prio;
2900 sector_t sector; 2901 sector_t sector;
2901 2902
2902 sector = bio->bi_sector; 2903 sector = bio->bi_sector;
2903 nr_sectors = bio_sectors(bio); 2904 nr_sectors = bio_sectors(bio);
2904 cur_nr_sectors = bio_cur_sectors(bio); 2905 cur_nr_sectors = bio_cur_sectors(bio);
2905 prio = bio_prio(bio); 2906 prio = bio_prio(bio);
2906 2907
2907 rw = bio_data_dir(bio); 2908 rw = bio_data_dir(bio);
2908 sync = bio_sync(bio); 2909 sync = bio_sync(bio);
2909 2910
2910 /* 2911 /*
2911 * low level driver can indicate that it wants pages above a 2912 * low level driver can indicate that it wants pages above a
2912 * certain limit bounced to low memory (ie for highmem, or even 2913 * certain limit bounced to low memory (ie for highmem, or even
2913 * ISA dma in theory) 2914 * ISA dma in theory)
2914 */ 2915 */
2915 blk_queue_bounce(q, &bio); 2916 blk_queue_bounce(q, &bio);
2916 2917
2917 spin_lock_prefetch(q->queue_lock); 2918 spin_lock_prefetch(q->queue_lock);
2918 2919
2919 barrier = bio_barrier(bio); 2920 barrier = bio_barrier(bio);
2920 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { 2921 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2921 err = -EOPNOTSUPP; 2922 err = -EOPNOTSUPP;
2922 goto end_io; 2923 goto end_io;
2923 } 2924 }
2924 2925
2925 spin_lock_irq(q->queue_lock); 2926 spin_lock_irq(q->queue_lock);
2926 2927
2927 if (unlikely(barrier) || elv_queue_empty(q)) 2928 if (unlikely(barrier) || elv_queue_empty(q))
2928 goto get_rq; 2929 goto get_rq;
2929 2930
2930 el_ret = elv_merge(q, &req, bio); 2931 el_ret = elv_merge(q, &req, bio);
2931 switch (el_ret) { 2932 switch (el_ret) {
2932 case ELEVATOR_BACK_MERGE: 2933 case ELEVATOR_BACK_MERGE:
2933 BUG_ON(!rq_mergeable(req)); 2934 BUG_ON(!rq_mergeable(req));
2934 2935
2935 if (!q->back_merge_fn(q, req, bio)) 2936 if (!q->back_merge_fn(q, req, bio))
2936 break; 2937 break;
2937 2938
2938 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 2939 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
2939 2940
2940 req->biotail->bi_next = bio; 2941 req->biotail->bi_next = bio;
2941 req->biotail = bio; 2942 req->biotail = bio;
2942 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2943 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2943 req->ioprio = ioprio_best(req->ioprio, prio); 2944 req->ioprio = ioprio_best(req->ioprio, prio);
2944 drive_stat_acct(req, nr_sectors, 0); 2945 drive_stat_acct(req, nr_sectors, 0);
2945 if (!attempt_back_merge(q, req)) 2946 if (!attempt_back_merge(q, req))
2946 elv_merged_request(q, req); 2947 elv_merged_request(q, req, el_ret);
2947 goto out; 2948 goto out;
2948 2949
2949 case ELEVATOR_FRONT_MERGE: 2950 case ELEVATOR_FRONT_MERGE:
2950 BUG_ON(!rq_mergeable(req)); 2951 BUG_ON(!rq_mergeable(req));
2951 2952
2952 if (!q->front_merge_fn(q, req, bio)) 2953 if (!q->front_merge_fn(q, req, bio))
2953 break; 2954 break;
2954 2955
2955 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 2956 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
2956 2957
2957 bio->bi_next = req->bio; 2958 bio->bi_next = req->bio;
2958 req->bio = bio; 2959 req->bio = bio;
2959 2960
2960 /* 2961 /*
2961 * may not be valid. if the low level driver said 2962 * may not be valid. if the low level driver said
2962 * it didn't need a bounce buffer then it better 2963 * it didn't need a bounce buffer then it better
2963 * not touch req->buffer either... 2964 * not touch req->buffer either...
2964 */ 2965 */
2965 req->buffer = bio_data(bio); 2966 req->buffer = bio_data(bio);
2966 req->current_nr_sectors = cur_nr_sectors; 2967 req->current_nr_sectors = cur_nr_sectors;
2967 req->hard_cur_sectors = cur_nr_sectors; 2968 req->hard_cur_sectors = cur_nr_sectors;
2968 req->sector = req->hard_sector = sector; 2969 req->sector = req->hard_sector = sector;
2969 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2970 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2970 req->ioprio = ioprio_best(req->ioprio, prio); 2971 req->ioprio = ioprio_best(req->ioprio, prio);
2971 drive_stat_acct(req, nr_sectors, 0); 2972 drive_stat_acct(req, nr_sectors, 0);
2972 if (!attempt_front_merge(q, req)) 2973 if (!attempt_front_merge(q, req))
2973 elv_merged_request(q, req); 2974 elv_merged_request(q, req, el_ret);
2974 goto out; 2975 goto out;
2975 2976
2976 /* ELV_NO_MERGE: elevator says don't/can't merge. */ 2977 /* ELV_NO_MERGE: elevator says don't/can't merge. */
2977 default: 2978 default:
2978 ; 2979 ;
2979 } 2980 }
2980 2981
2981 get_rq: 2982 get_rq:
2982 /* 2983 /*
2983 * Grab a free request. This is might sleep but can not fail. 2984 * Grab a free request. This is might sleep but can not fail.
2984 * Returns with the queue unlocked. 2985 * Returns with the queue unlocked.
2985 */ 2986 */
2986 req = get_request_wait(q, rw, bio); 2987 req = get_request_wait(q, rw, bio);
2987 2988
2988 /* 2989 /*
2989 * After dropping the lock and possibly sleeping here, our request 2990 * After dropping the lock and possibly sleeping here, our request
2990 * may now be mergeable after it had proven unmergeable (above). 2991 * may now be mergeable after it had proven unmergeable (above).
2991 * We don't worry about that case for efficiency. It won't happen 2992 * We don't worry about that case for efficiency. It won't happen
2992 * often, and the elevators are able to handle it. 2993 * often, and the elevators are able to handle it.
2993 */ 2994 */
2994 init_request_from_bio(req, bio); 2995 init_request_from_bio(req, bio);
2995 2996
2996 spin_lock_irq(q->queue_lock); 2997 spin_lock_irq(q->queue_lock);
2997 if (elv_queue_empty(q)) 2998 if (elv_queue_empty(q))
2998 blk_plug_device(q); 2999 blk_plug_device(q);
2999 add_request(q, req); 3000 add_request(q, req);
3000 out: 3001 out:
3001 if (sync) 3002 if (sync)
3002 __generic_unplug_device(q); 3003 __generic_unplug_device(q);
3003 3004
3004 spin_unlock_irq(q->queue_lock); 3005 spin_unlock_irq(q->queue_lock);
3005 return 0; 3006 return 0;
3006 3007
3007 end_io: 3008 end_io:
3008 bio_endio(bio, nr_sectors << 9, err); 3009 bio_endio(bio, nr_sectors << 9, err);
3009 return 0; 3010 return 0;
3010 } 3011 }
3011 3012
3012 /* 3013 /*
3013 * If bio->bi_dev is a partition, remap the location 3014 * If bio->bi_dev is a partition, remap the location
3014 */ 3015 */
3015 static inline void blk_partition_remap(struct bio *bio) 3016 static inline void blk_partition_remap(struct bio *bio)
3016 { 3017 {
3017 struct block_device *bdev = bio->bi_bdev; 3018 struct block_device *bdev = bio->bi_bdev;
3018 3019
3019 if (bdev != bdev->bd_contains) { 3020 if (bdev != bdev->bd_contains) {
3020 struct hd_struct *p = bdev->bd_part; 3021 struct hd_struct *p = bdev->bd_part;
3021 const int rw = bio_data_dir(bio); 3022 const int rw = bio_data_dir(bio);
3022 3023
3023 p->sectors[rw] += bio_sectors(bio); 3024 p->sectors[rw] += bio_sectors(bio);
3024 p->ios[rw]++; 3025 p->ios[rw]++;
3025 3026
3026 bio->bi_sector += p->start_sect; 3027 bio->bi_sector += p->start_sect;
3027 bio->bi_bdev = bdev->bd_contains; 3028 bio->bi_bdev = bdev->bd_contains;
3028 } 3029 }
3029 } 3030 }
3030 3031
3031 static void handle_bad_sector(struct bio *bio) 3032 static void handle_bad_sector(struct bio *bio)
3032 { 3033 {
3033 char b[BDEVNAME_SIZE]; 3034 char b[BDEVNAME_SIZE];
3034 3035
3035 printk(KERN_INFO "attempt to access beyond end of device\n"); 3036 printk(KERN_INFO "attempt to access beyond end of device\n");
3036 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", 3037 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
3037 bdevname(bio->bi_bdev, b), 3038 bdevname(bio->bi_bdev, b),
3038 bio->bi_rw, 3039 bio->bi_rw,
3039 (unsigned long long)bio->bi_sector + bio_sectors(bio), 3040 (unsigned long long)bio->bi_sector + bio_sectors(bio),
3040 (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); 3041 (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
3041 3042
3042 set_bit(BIO_EOF, &bio->bi_flags); 3043 set_bit(BIO_EOF, &bio->bi_flags);
3043 } 3044 }
3044 3045
3045 /** 3046 /**
3046 * generic_make_request: hand a buffer to its device driver for I/O 3047 * generic_make_request: hand a buffer to its device driver for I/O
3047 * @bio: The bio describing the location in memory and on the device. 3048 * @bio: The bio describing the location in memory and on the device.
3048 * 3049 *
3049 * generic_make_request() is used to make I/O requests of block 3050 * generic_make_request() is used to make I/O requests of block
3050 * devices. It is passed a &struct bio, which describes the I/O that needs 3051 * devices. It is passed a &struct bio, which describes the I/O that needs
3051 * to be done. 3052 * to be done.
3052 * 3053 *
3053 * generic_make_request() does not return any status. The 3054 * generic_make_request() does not return any status. The
3054 * success/failure status of the request, along with notification of 3055 * success/failure status of the request, along with notification of
3055 * completion, is delivered asynchronously through the bio->bi_end_io 3056 * completion, is delivered asynchronously through the bio->bi_end_io
3056 * function described (one day) else where. 3057 * function described (one day) else where.
3057 * 3058 *
3058 * The caller of generic_make_request must make sure that bi_io_vec 3059 * The caller of generic_make_request must make sure that bi_io_vec
3059 * are set to describe the memory buffer, and that bi_dev and bi_sector are 3060 * are set to describe the memory buffer, and that bi_dev and bi_sector are
3060 * set to describe the device address, and the 3061 * set to describe the device address, and the
3061 * bi_end_io and optionally bi_private are set to describe how 3062 * bi_end_io and optionally bi_private are set to describe how
3062 * completion notification should be signaled. 3063 * completion notification should be signaled.
3063 * 3064 *
3064 * generic_make_request and the drivers it calls may use bi_next if this 3065 * generic_make_request and the drivers it calls may use bi_next if this
3065 * bio happens to be merged with someone else, and may change bi_dev and 3066 * bio happens to be merged with someone else, and may change bi_dev and
3066 * bi_sector for remaps as it sees fit. So the values of these fields 3067 * bi_sector for remaps as it sees fit. So the values of these fields
3067 * should NOT be depended on after the call to generic_make_request. 3068 * should NOT be depended on after the call to generic_make_request.
3068 */ 3069 */
3069 void generic_make_request(struct bio *bio) 3070 void generic_make_request(struct bio *bio)
3070 { 3071 {
3071 request_queue_t *q; 3072 request_queue_t *q;
3072 sector_t maxsector; 3073 sector_t maxsector;
3073 int ret, nr_sectors = bio_sectors(bio); 3074 int ret, nr_sectors = bio_sectors(bio);
3074 dev_t old_dev; 3075 dev_t old_dev;
3075 3076
3076 might_sleep(); 3077 might_sleep();
3077 /* Test device or partition size, when known. */ 3078 /* Test device or partition size, when known. */
3078 maxsector = bio->bi_bdev->bd_inode->i_size >> 9; 3079 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
3079 if (maxsector) { 3080 if (maxsector) {
3080 sector_t sector = bio->bi_sector; 3081 sector_t sector = bio->bi_sector;
3081 3082
3082 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 3083 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
3083 /* 3084 /*
3084 * This may well happen - the kernel calls bread() 3085 * This may well happen - the kernel calls bread()
3085 * without checking the size of the device, e.g., when 3086 * without checking the size of the device, e.g., when
3086 * mounting a device. 3087 * mounting a device.
3087 */ 3088 */
3088 handle_bad_sector(bio); 3089 handle_bad_sector(bio);
3089 goto end_io; 3090 goto end_io;
3090 } 3091 }
3091 } 3092 }
3092 3093
3093 /* 3094 /*
3094 * Resolve the mapping until finished. (drivers are 3095 * Resolve the mapping until finished. (drivers are
3095 * still free to implement/resolve their own stacking 3096 * still free to implement/resolve their own stacking
3096 * by explicitly returning 0) 3097 * by explicitly returning 0)
3097 * 3098 *
3098 * NOTE: we don't repeat the blk_size check for each new device. 3099 * NOTE: we don't repeat the blk_size check for each new device.
3099 * Stacking drivers are expected to know what they are doing. 3100 * Stacking drivers are expected to know what they are doing.
3100 */ 3101 */
3101 maxsector = -1; 3102 maxsector = -1;
3102 old_dev = 0; 3103 old_dev = 0;
3103 do { 3104 do {
3104 char b[BDEVNAME_SIZE]; 3105 char b[BDEVNAME_SIZE];
3105 3106
3106 q = bdev_get_queue(bio->bi_bdev); 3107 q = bdev_get_queue(bio->bi_bdev);
3107 if (!q) { 3108 if (!q) {
3108 printk(KERN_ERR 3109 printk(KERN_ERR
3109 "generic_make_request: Trying to access " 3110 "generic_make_request: Trying to access "
3110 "nonexistent block-device %s (%Lu)\n", 3111 "nonexistent block-device %s (%Lu)\n",
3111 bdevname(bio->bi_bdev, b), 3112 bdevname(bio->bi_bdev, b),
3112 (long long) bio->bi_sector); 3113 (long long) bio->bi_sector);
3113 end_io: 3114 end_io:
3114 bio_endio(bio, bio->bi_size, -EIO); 3115 bio_endio(bio, bio->bi_size, -EIO);
3115 break; 3116 break;
3116 } 3117 }
3117 3118
3118 if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) { 3119 if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
3119 printk("bio too big device %s (%u > %u)\n", 3120 printk("bio too big device %s (%u > %u)\n",
3120 bdevname(bio->bi_bdev, b), 3121 bdevname(bio->bi_bdev, b),
3121 bio_sectors(bio), 3122 bio_sectors(bio),
3122 q->max_hw_sectors); 3123 q->max_hw_sectors);
3123 goto end_io; 3124 goto end_io;
3124 } 3125 }
3125 3126
3126 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 3127 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
3127 goto end_io; 3128 goto end_io;
3128 3129
3129 /* 3130 /*
3130 * If this device has partitions, remap block n 3131 * If this device has partitions, remap block n
3131 * of partition p to block n+start(p) of the disk. 3132 * of partition p to block n+start(p) of the disk.
3132 */ 3133 */
3133 blk_partition_remap(bio); 3134 blk_partition_remap(bio);
3134 3135
3135 if (maxsector != -1) 3136 if (maxsector != -1)
3136 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 3137 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
3137 maxsector); 3138 maxsector);
3138 3139
3139 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 3140 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
3140 3141
3141 maxsector = bio->bi_sector; 3142 maxsector = bio->bi_sector;
3142 old_dev = bio->bi_bdev->bd_dev; 3143 old_dev = bio->bi_bdev->bd_dev;
3143 3144
3144 ret = q->make_request_fn(q, bio); 3145 ret = q->make_request_fn(q, bio);
3145 } while (ret); 3146 } while (ret);
3146 } 3147 }
3147 3148
3148 EXPORT_SYMBOL(generic_make_request); 3149 EXPORT_SYMBOL(generic_make_request);
3149 3150
3150 /** 3151 /**
3151 * submit_bio: submit a bio to the block device layer for I/O 3152 * submit_bio: submit a bio to the block device layer for I/O
3152 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 3153 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
3153 * @bio: The &struct bio which describes the I/O 3154 * @bio: The &struct bio which describes the I/O
3154 * 3155 *
3155 * submit_bio() is very similar in purpose to generic_make_request(), and 3156 * submit_bio() is very similar in purpose to generic_make_request(), and
3156 * uses that function to do most of the work. Both are fairly rough 3157 * uses that function to do most of the work. Both are fairly rough
3157 * interfaces, @bio must be presetup and ready for I/O. 3158 * interfaces, @bio must be presetup and ready for I/O.
3158 * 3159 *
3159 */ 3160 */
3160 void submit_bio(int rw, struct bio *bio) 3161 void submit_bio(int rw, struct bio *bio)
3161 { 3162 {
3162 int count = bio_sectors(bio); 3163 int count = bio_sectors(bio);
3163 3164
3164 BIO_BUG_ON(!bio->bi_size); 3165 BIO_BUG_ON(!bio->bi_size);
3165 BIO_BUG_ON(!bio->bi_io_vec); 3166 BIO_BUG_ON(!bio->bi_io_vec);
3166 bio->bi_rw |= rw; 3167 bio->bi_rw |= rw;
3167 if (rw & WRITE) 3168 if (rw & WRITE)
3168 count_vm_events(PGPGOUT, count); 3169 count_vm_events(PGPGOUT, count);
3169 else 3170 else
3170 count_vm_events(PGPGIN, count); 3171 count_vm_events(PGPGIN, count);
3171 3172
3172 if (unlikely(block_dump)) { 3173 if (unlikely(block_dump)) {
3173 char b[BDEVNAME_SIZE]; 3174 char b[BDEVNAME_SIZE];
3174 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", 3175 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
3175 current->comm, current->pid, 3176 current->comm, current->pid,
3176 (rw & WRITE) ? "WRITE" : "READ", 3177 (rw & WRITE) ? "WRITE" : "READ",
3177 (unsigned long long)bio->bi_sector, 3178 (unsigned long long)bio->bi_sector,
3178 bdevname(bio->bi_bdev,b)); 3179 bdevname(bio->bi_bdev,b));
3179 } 3180 }
3180 3181
3181 generic_make_request(bio); 3182 generic_make_request(bio);
3182 } 3183 }
3183 3184
3184 EXPORT_SYMBOL(submit_bio); 3185 EXPORT_SYMBOL(submit_bio);
3185 3186
3186 static void blk_recalc_rq_segments(struct request *rq) 3187 static void blk_recalc_rq_segments(struct request *rq)
3187 { 3188 {
3188 struct bio *bio, *prevbio = NULL; 3189 struct bio *bio, *prevbio = NULL;
3189 int nr_phys_segs, nr_hw_segs; 3190 int nr_phys_segs, nr_hw_segs;
3190 unsigned int phys_size, hw_size; 3191 unsigned int phys_size, hw_size;
3191 request_queue_t *q = rq->q; 3192 request_queue_t *q = rq->q;
3192 3193
3193 if (!rq->bio) 3194 if (!rq->bio)
3194 return; 3195 return;
3195 3196
3196 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; 3197 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
3197 rq_for_each_bio(bio, rq) { 3198 rq_for_each_bio(bio, rq) {
3198 /* Force bio hw/phys segs to be recalculated. */ 3199 /* Force bio hw/phys segs to be recalculated. */
3199 bio->bi_flags &= ~(1 << BIO_SEG_VALID); 3200 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
3200 3201
3201 nr_phys_segs += bio_phys_segments(q, bio); 3202 nr_phys_segs += bio_phys_segments(q, bio);
3202 nr_hw_segs += bio_hw_segments(q, bio); 3203 nr_hw_segs += bio_hw_segments(q, bio);
3203 if (prevbio) { 3204 if (prevbio) {
3204 int pseg = phys_size + prevbio->bi_size + bio->bi_size; 3205 int pseg = phys_size + prevbio->bi_size + bio->bi_size;
3205 int hseg = hw_size + prevbio->bi_size + bio->bi_size; 3206 int hseg = hw_size + prevbio->bi_size + bio->bi_size;
3206 3207
3207 if (blk_phys_contig_segment(q, prevbio, bio) && 3208 if (blk_phys_contig_segment(q, prevbio, bio) &&
3208 pseg <= q->max_segment_size) { 3209 pseg <= q->max_segment_size) {
3209 nr_phys_segs--; 3210 nr_phys_segs--;
3210 phys_size += prevbio->bi_size + bio->bi_size; 3211 phys_size += prevbio->bi_size + bio->bi_size;
3211 } else 3212 } else
3212 phys_size = 0; 3213 phys_size = 0;
3213 3214
3214 if (blk_hw_contig_segment(q, prevbio, bio) && 3215 if (blk_hw_contig_segment(q, prevbio, bio) &&
3215 hseg <= q->max_segment_size) { 3216 hseg <= q->max_segment_size) {
3216 nr_hw_segs--; 3217 nr_hw_segs--;
3217 hw_size += prevbio->bi_size + bio->bi_size; 3218 hw_size += prevbio->bi_size + bio->bi_size;
3218 } else 3219 } else
3219 hw_size = 0; 3220 hw_size = 0;
3220 } 3221 }
3221 prevbio = bio; 3222 prevbio = bio;
3222 } 3223 }
3223 3224
3224 rq->nr_phys_segments = nr_phys_segs; 3225 rq->nr_phys_segments = nr_phys_segs;
3225 rq->nr_hw_segments = nr_hw_segs; 3226 rq->nr_hw_segments = nr_hw_segs;
3226 } 3227 }
3227 3228
3228 static void blk_recalc_rq_sectors(struct request *rq, int nsect) 3229 static void blk_recalc_rq_sectors(struct request *rq, int nsect)
3229 { 3230 {
3230 if (blk_fs_request(rq)) { 3231 if (blk_fs_request(rq)) {
3231 rq->hard_sector += nsect; 3232 rq->hard_sector += nsect;
3232 rq->hard_nr_sectors -= nsect; 3233 rq->hard_nr_sectors -= nsect;
3233 3234
3234 /* 3235 /*
3235 * Move the I/O submission pointers ahead if required. 3236 * Move the I/O submission pointers ahead if required.
3236 */ 3237 */
3237 if ((rq->nr_sectors >= rq->hard_nr_sectors) && 3238 if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
3238 (rq->sector <= rq->hard_sector)) { 3239 (rq->sector <= rq->hard_sector)) {
3239 rq->sector = rq->hard_sector; 3240 rq->sector = rq->hard_sector;
3240 rq->nr_sectors = rq->hard_nr_sectors; 3241 rq->nr_sectors = rq->hard_nr_sectors;
3241 rq->hard_cur_sectors = bio_cur_sectors(rq->bio); 3242 rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
3242 rq->current_nr_sectors = rq->hard_cur_sectors; 3243 rq->current_nr_sectors = rq->hard_cur_sectors;
3243 rq->buffer = bio_data(rq->bio); 3244 rq->buffer = bio_data(rq->bio);
3244 } 3245 }
3245 3246
3246 /* 3247 /*
3247 * if total number of sectors is less than the first segment 3248 * if total number of sectors is less than the first segment
3248 * size, something has gone terribly wrong 3249 * size, something has gone terribly wrong
3249 */ 3250 */
3250 if (rq->nr_sectors < rq->current_nr_sectors) { 3251 if (rq->nr_sectors < rq->current_nr_sectors) {
3251 printk("blk: request botched\n"); 3252 printk("blk: request botched\n");
3252 rq->nr_sectors = rq->current_nr_sectors; 3253 rq->nr_sectors = rq->current_nr_sectors;
3253 } 3254 }
3254 } 3255 }
3255 } 3256 }
3256 3257
3257 static int __end_that_request_first(struct request *req, int uptodate, 3258 static int __end_that_request_first(struct request *req, int uptodate,
3258 int nr_bytes) 3259 int nr_bytes)
3259 { 3260 {
3260 int total_bytes, bio_nbytes, error, next_idx = 0; 3261 int total_bytes, bio_nbytes, error, next_idx = 0;
3261 struct bio *bio; 3262 struct bio *bio;
3262 3263
3263 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 3264 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
3264 3265
3265 /* 3266 /*
3266 * extend uptodate bool to allow < 0 value to be direct io error 3267 * extend uptodate bool to allow < 0 value to be direct io error
3267 */ 3268 */
3268 error = 0; 3269 error = 0;
3269 if (end_io_error(uptodate)) 3270 if (end_io_error(uptodate))
3270 error = !uptodate ? -EIO : uptodate; 3271 error = !uptodate ? -EIO : uptodate;
3271 3272
3272 /* 3273 /*
3273 * for a REQ_BLOCK_PC request, we want to carry any eventual 3274 * for a REQ_BLOCK_PC request, we want to carry any eventual
3274 * sense key with us all the way through 3275 * sense key with us all the way through
3275 */ 3276 */
3276 if (!blk_pc_request(req)) 3277 if (!blk_pc_request(req))
3277 req->errors = 0; 3278 req->errors = 0;
3278 3279
3279 if (!uptodate) { 3280 if (!uptodate) {
3280 if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) 3281 if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
3281 printk("end_request: I/O error, dev %s, sector %llu\n", 3282 printk("end_request: I/O error, dev %s, sector %llu\n",
3282 req->rq_disk ? req->rq_disk->disk_name : "?", 3283 req->rq_disk ? req->rq_disk->disk_name : "?",
3283 (unsigned long long)req->sector); 3284 (unsigned long long)req->sector);
3284 } 3285 }
3285 3286
3286 if (blk_fs_request(req) && req->rq_disk) { 3287 if (blk_fs_request(req) && req->rq_disk) {
3287 const int rw = rq_data_dir(req); 3288 const int rw = rq_data_dir(req);
3288 3289
3289 disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); 3290 disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
3290 } 3291 }
3291 3292
3292 total_bytes = bio_nbytes = 0; 3293 total_bytes = bio_nbytes = 0;
3293 while ((bio = req->bio) != NULL) { 3294 while ((bio = req->bio) != NULL) {
3294 int nbytes; 3295 int nbytes;
3295 3296
3296 if (nr_bytes >= bio->bi_size) { 3297 if (nr_bytes >= bio->bi_size) {
3297 req->bio = bio->bi_next; 3298 req->bio = bio->bi_next;
3298 nbytes = bio->bi_size; 3299 nbytes = bio->bi_size;
3299 if (!ordered_bio_endio(req, bio, nbytes, error)) 3300 if (!ordered_bio_endio(req, bio, nbytes, error))
3300 bio_endio(bio, nbytes, error); 3301 bio_endio(bio, nbytes, error);
3301 next_idx = 0; 3302 next_idx = 0;
3302 bio_nbytes = 0; 3303 bio_nbytes = 0;
3303 } else { 3304 } else {
3304 int idx = bio->bi_idx + next_idx; 3305 int idx = bio->bi_idx + next_idx;
3305 3306
3306 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { 3307 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
3307 blk_dump_rq_flags(req, "__end_that"); 3308 blk_dump_rq_flags(req, "__end_that");
3308 printk("%s: bio idx %d >= vcnt %d\n", 3309 printk("%s: bio idx %d >= vcnt %d\n",
3309 __FUNCTION__, 3310 __FUNCTION__,
3310 bio->bi_idx, bio->bi_vcnt); 3311 bio->bi_idx, bio->bi_vcnt);
3311 break; 3312 break;
3312 } 3313 }
3313 3314
3314 nbytes = bio_iovec_idx(bio, idx)->bv_len; 3315 nbytes = bio_iovec_idx(bio, idx)->bv_len;
3315 BIO_BUG_ON(nbytes > bio->bi_size); 3316 BIO_BUG_ON(nbytes > bio->bi_size);
3316 3317
3317 /* 3318 /*
3318 * not a complete bvec done 3319 * not a complete bvec done
3319 */ 3320 */
3320 if (unlikely(nbytes > nr_bytes)) { 3321 if (unlikely(nbytes > nr_bytes)) {
3321 bio_nbytes += nr_bytes; 3322 bio_nbytes += nr_bytes;
3322 total_bytes += nr_bytes; 3323 total_bytes += nr_bytes;
3323 break; 3324 break;
3324 } 3325 }
3325 3326
3326 /* 3327 /*
3327 * advance to the next vector 3328 * advance to the next vector
3328 */ 3329 */
3329 next_idx++; 3330 next_idx++;
3330 bio_nbytes += nbytes; 3331 bio_nbytes += nbytes;
3331 } 3332 }
3332 3333
3333 total_bytes += nbytes; 3334 total_bytes += nbytes;
3334 nr_bytes -= nbytes; 3335 nr_bytes -= nbytes;
3335 3336
3336 if ((bio = req->bio)) { 3337 if ((bio = req->bio)) {
3337 /* 3338 /*
3338 * end more in this run, or just return 'not-done' 3339 * end more in this run, or just return 'not-done'
3339 */ 3340 */
3340 if (unlikely(nr_bytes <= 0)) 3341 if (unlikely(nr_bytes <= 0))
3341 break; 3342 break;
3342 } 3343 }
3343 } 3344 }
3344 3345
3345 /* 3346 /*
3346 * completely done 3347 * completely done
3347 */ 3348 */
3348 if (!req->bio) 3349 if (!req->bio)
3349 return 0; 3350 return 0;
3350 3351
3351 /* 3352 /*
3352 * if the request wasn't completed, update state 3353 * if the request wasn't completed, update state
3353 */ 3354 */
3354 if (bio_nbytes) { 3355 if (bio_nbytes) {
3355 if (!ordered_bio_endio(req, bio, bio_nbytes, error)) 3356 if (!ordered_bio_endio(req, bio, bio_nbytes, error))
3356 bio_endio(bio, bio_nbytes, error); 3357 bio_endio(bio, bio_nbytes, error);
3357 bio->bi_idx += next_idx; 3358 bio->bi_idx += next_idx;
3358 bio_iovec(bio)->bv_offset += nr_bytes; 3359 bio_iovec(bio)->bv_offset += nr_bytes;
3359 bio_iovec(bio)->bv_len -= nr_bytes; 3360 bio_iovec(bio)->bv_len -= nr_bytes;
3360 } 3361 }
3361 3362
3362 blk_recalc_rq_sectors(req, total_bytes >> 9); 3363 blk_recalc_rq_sectors(req, total_bytes >> 9);
3363 blk_recalc_rq_segments(req); 3364 blk_recalc_rq_segments(req);
3364 return 1; 3365 return 1;
3365 } 3366 }
3366 3367
3367 /** 3368 /**
3368 * end_that_request_first - end I/O on a request 3369 * end_that_request_first - end I/O on a request
3369 * @req: the request being processed 3370 * @req: the request being processed
3370 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error 3371 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
3371 * @nr_sectors: number of sectors to end I/O on 3372 * @nr_sectors: number of sectors to end I/O on
3372 * 3373 *
3373 * Description: 3374 * Description:
3374 * Ends I/O on a number of sectors attached to @req, and sets it up 3375 * Ends I/O on a number of sectors attached to @req, and sets it up
3375 * for the next range of segments (if any) in the cluster. 3376 * for the next range of segments (if any) in the cluster.
3376 * 3377 *
3377 * Return: 3378 * Return:
3378 * 0 - we are done with this request, call end_that_request_last() 3379 * 0 - we are done with this request, call end_that_request_last()
3379 * 1 - still buffers pending for this request 3380 * 1 - still buffers pending for this request
3380 **/ 3381 **/
3381 int end_that_request_first(struct request *req, int uptodate, int nr_sectors) 3382 int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
3382 { 3383 {
3383 return __end_that_request_first(req, uptodate, nr_sectors << 9); 3384 return __end_that_request_first(req, uptodate, nr_sectors << 9);
3384 } 3385 }
3385 3386
3386 EXPORT_SYMBOL(end_that_request_first); 3387 EXPORT_SYMBOL(end_that_request_first);
3387 3388
3388 /** 3389 /**
3389 * end_that_request_chunk - end I/O on a request 3390 * end_that_request_chunk - end I/O on a request
3390 * @req: the request being processed 3391 * @req: the request being processed
3391 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error 3392 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
3392 * @nr_bytes: number of bytes to complete 3393 * @nr_bytes: number of bytes to complete
3393 * 3394 *
3394 * Description: 3395 * Description:
3395 * Ends I/O on a number of bytes attached to @req, and sets it up 3396 * Ends I/O on a number of bytes attached to @req, and sets it up
3396 * for the next range of segments (if any). Like end_that_request_first(), 3397 * for the next range of segments (if any). Like end_that_request_first(),
3397 * but deals with bytes instead of sectors. 3398 * but deals with bytes instead of sectors.
3398 * 3399 *
3399 * Return: 3400 * Return:
3400 * 0 - we are done with this request, call end_that_request_last() 3401 * 0 - we are done with this request, call end_that_request_last()
3401 * 1 - still buffers pending for this request 3402 * 1 - still buffers pending for this request
3402 **/ 3403 **/
3403 int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) 3404 int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
3404 { 3405 {
3405 return __end_that_request_first(req, uptodate, nr_bytes); 3406 return __end_that_request_first(req, uptodate, nr_bytes);
3406 } 3407 }
3407 3408
3408 EXPORT_SYMBOL(end_that_request_chunk); 3409 EXPORT_SYMBOL(end_that_request_chunk);
3409 3410
3410 /* 3411 /*
3411 * splice the completion data to a local structure and hand off to 3412 * splice the completion data to a local structure and hand off to
3412 * process_completion_queue() to complete the requests 3413 * process_completion_queue() to complete the requests
3413 */ 3414 */
3414 static void blk_done_softirq(struct softirq_action *h) 3415 static void blk_done_softirq(struct softirq_action *h)
3415 { 3416 {
3416 struct list_head *cpu_list, local_list; 3417 struct list_head *cpu_list, local_list;
3417 3418
3418 local_irq_disable(); 3419 local_irq_disable();
3419 cpu_list = &__get_cpu_var(blk_cpu_done); 3420 cpu_list = &__get_cpu_var(blk_cpu_done);
3420 list_replace_init(cpu_list, &local_list); 3421 list_replace_init(cpu_list, &local_list);
3421 local_irq_enable(); 3422 local_irq_enable();
3422 3423
3423 while (!list_empty(&local_list)) { 3424 while (!list_empty(&local_list)) {
3424 struct request *rq = list_entry(local_list.next, struct request, donelist); 3425 struct request *rq = list_entry(local_list.next, struct request, donelist);
3425 3426
3426 list_del_init(&rq->donelist); 3427 list_del_init(&rq->donelist);
3427 rq->q->softirq_done_fn(rq); 3428 rq->q->softirq_done_fn(rq);
3428 } 3429 }
3429 } 3430 }
3430 3431
3431 #ifdef CONFIG_HOTPLUG_CPU 3432 #ifdef CONFIG_HOTPLUG_CPU
3432 3433
3433 static int blk_cpu_notify(struct notifier_block *self, unsigned long action, 3434 static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
3434 void *hcpu) 3435 void *hcpu)
3435 { 3436 {
3436 /* 3437 /*
3437 * If a CPU goes away, splice its entries to the current CPU 3438 * If a CPU goes away, splice its entries to the current CPU
3438 * and trigger a run of the softirq 3439 * and trigger a run of the softirq
3439 */ 3440 */
3440 if (action == CPU_DEAD) { 3441 if (action == CPU_DEAD) {
3441 int cpu = (unsigned long) hcpu; 3442 int cpu = (unsigned long) hcpu;
3442 3443
3443 local_irq_disable(); 3444 local_irq_disable();
3444 list_splice_init(&per_cpu(blk_cpu_done, cpu), 3445 list_splice_init(&per_cpu(blk_cpu_done, cpu),
3445 &__get_cpu_var(blk_cpu_done)); 3446 &__get_cpu_var(blk_cpu_done));
3446 raise_softirq_irqoff(BLOCK_SOFTIRQ); 3447 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3447 local_irq_enable(); 3448 local_irq_enable();
3448 } 3449 }
3449 3450
3450 return NOTIFY_OK; 3451 return NOTIFY_OK;
3451 } 3452 }
3452 3453
3453 3454
3454 static struct notifier_block __devinitdata blk_cpu_notifier = { 3455 static struct notifier_block __devinitdata blk_cpu_notifier = {
3455 .notifier_call = blk_cpu_notify, 3456 .notifier_call = blk_cpu_notify,
3456 }; 3457 };
3457 3458
3458 #endif /* CONFIG_HOTPLUG_CPU */ 3459 #endif /* CONFIG_HOTPLUG_CPU */
3459 3460
3460 /** 3461 /**
3461 * blk_complete_request - end I/O on a request 3462 * blk_complete_request - end I/O on a request
3462 * @req: the request being processed 3463 * @req: the request being processed
3463 * 3464 *
3464 * Description: 3465 * Description:
3465 * Ends all I/O on a request. It does not handle partial completions, 3466 * Ends all I/O on a request. It does not handle partial completions,
3466 * unless the driver actually implements this in its completion callback 3467 * unless the driver actually implements this in its completion callback
3467 * through requeueing. Theh actual completion happens out-of-order, 3468 * through requeueing. Theh actual completion happens out-of-order,
3468 * through a softirq handler. The user must have registered a completion 3469 * through a softirq handler. The user must have registered a completion
3469 * callback through blk_queue_softirq_done(). 3470 * callback through blk_queue_softirq_done().
3470 **/ 3471 **/
3471 3472
3472 void blk_complete_request(struct request *req) 3473 void blk_complete_request(struct request *req)
3473 { 3474 {
3474 struct list_head *cpu_list; 3475 struct list_head *cpu_list;
3475 unsigned long flags; 3476 unsigned long flags;
3476 3477
3477 BUG_ON(!req->q->softirq_done_fn); 3478 BUG_ON(!req->q->softirq_done_fn);
3478 3479
3479 local_irq_save(flags); 3480 local_irq_save(flags);
3480 3481
3481 cpu_list = &__get_cpu_var(blk_cpu_done); 3482 cpu_list = &__get_cpu_var(blk_cpu_done);
3482 list_add_tail(&req->donelist, cpu_list); 3483 list_add_tail(&req->donelist, cpu_list);
3483 raise_softirq_irqoff(BLOCK_SOFTIRQ); 3484 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3484 3485
3485 local_irq_restore(flags); 3486 local_irq_restore(flags);
3486 } 3487 }
3487 3488
3488 EXPORT_SYMBOL(blk_complete_request); 3489 EXPORT_SYMBOL(blk_complete_request);
3489 3490
3490 /* 3491 /*
3491 * queue lock must be held 3492 * queue lock must be held
3492 */ 3493 */
3493 void end_that_request_last(struct request *req, int uptodate) 3494 void end_that_request_last(struct request *req, int uptodate)
3494 { 3495 {
3495 struct gendisk *disk = req->rq_disk; 3496 struct gendisk *disk = req->rq_disk;
3496 int error; 3497 int error;
3497 3498
3498 /* 3499 /*
3499 * extend uptodate bool to allow < 0 value to be direct io error 3500 * extend uptodate bool to allow < 0 value to be direct io error
3500 */ 3501 */
3501 error = 0; 3502 error = 0;
3502 if (end_io_error(uptodate)) 3503 if (end_io_error(uptodate))
3503 error = !uptodate ? -EIO : uptodate; 3504 error = !uptodate ? -EIO : uptodate;
3504 3505
3505 if (unlikely(laptop_mode) && blk_fs_request(req)) 3506 if (unlikely(laptop_mode) && blk_fs_request(req))
3506 laptop_io_completion(); 3507 laptop_io_completion();
3507 3508
3508 /* 3509 /*
3509 * Account IO completion. bar_rq isn't accounted as a normal 3510 * Account IO completion. bar_rq isn't accounted as a normal
3510 * IO on queueing nor completion. Accounting the containing 3511 * IO on queueing nor completion. Accounting the containing
3511 * request is enough. 3512 * request is enough.
3512 */ 3513 */
3513 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { 3514 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
3514 unsigned long duration = jiffies - req->start_time; 3515 unsigned long duration = jiffies - req->start_time;
3515 const int rw = rq_data_dir(req); 3516 const int rw = rq_data_dir(req);
3516 3517
3517 __disk_stat_inc(disk, ios[rw]); 3518 __disk_stat_inc(disk, ios[rw]);
3518 __disk_stat_add(disk, ticks[rw], duration); 3519 __disk_stat_add(disk, ticks[rw], duration);
3519 disk_round_stats(disk); 3520 disk_round_stats(disk);
3520 disk->in_flight--; 3521 disk->in_flight--;
3521 } 3522 }
3522 if (req->end_io) 3523 if (req->end_io)
3523 req->end_io(req, error); 3524 req->end_io(req, error);
3524 else 3525 else
3525 __blk_put_request(req->q, req); 3526 __blk_put_request(req->q, req);
3526 } 3527 }
3527 3528
3528 EXPORT_SYMBOL(end_that_request_last); 3529 EXPORT_SYMBOL(end_that_request_last);
3529 3530
3530 void end_request(struct request *req, int uptodate) 3531 void end_request(struct request *req, int uptodate)
3531 { 3532 {
3532 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { 3533 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
3533 add_disk_randomness(req->rq_disk); 3534 add_disk_randomness(req->rq_disk);
3534 blkdev_dequeue_request(req); 3535 blkdev_dequeue_request(req);
3535 end_that_request_last(req, uptodate); 3536 end_that_request_last(req, uptodate);
3536 } 3537 }
3537 } 3538 }
3538 3539
3539 EXPORT_SYMBOL(end_request); 3540 EXPORT_SYMBOL(end_request);
3540 3541
3541 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) 3542 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
3542 { 3543 {
3543 /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ 3544 /* first two bits are identical in rq->cmd_flags and bio->bi_rw */
3544 rq->cmd_flags |= (bio->bi_rw & 3); 3545 rq->cmd_flags |= (bio->bi_rw & 3);
3545 3546
3546 rq->nr_phys_segments = bio_phys_segments(q, bio); 3547 rq->nr_phys_segments = bio_phys_segments(q, bio);
3547 rq->nr_hw_segments = bio_hw_segments(q, bio); 3548 rq->nr_hw_segments = bio_hw_segments(q, bio);
3548 rq->current_nr_sectors = bio_cur_sectors(bio); 3549 rq->current_nr_sectors = bio_cur_sectors(bio);
3549 rq->hard_cur_sectors = rq->current_nr_sectors; 3550 rq->hard_cur_sectors = rq->current_nr_sectors;
3550 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 3551 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
3551 rq->buffer = bio_data(bio); 3552 rq->buffer = bio_data(bio);
3552 3553
3553 rq->bio = rq->biotail = bio; 3554 rq->bio = rq->biotail = bio;
3554 } 3555 }
3555 3556
3556 EXPORT_SYMBOL(blk_rq_bio_prep); 3557 EXPORT_SYMBOL(blk_rq_bio_prep);
3557 3558
3558 int kblockd_schedule_work(struct work_struct *work) 3559 int kblockd_schedule_work(struct work_struct *work)
3559 { 3560 {
3560 return queue_work(kblockd_workqueue, work); 3561 return queue_work(kblockd_workqueue, work);
3561 } 3562 }
3562 3563
3563 EXPORT_SYMBOL(kblockd_schedule_work); 3564 EXPORT_SYMBOL(kblockd_schedule_work);
3564 3565
3565 void kblockd_flush(void) 3566 void kblockd_flush(void)
3566 { 3567 {
3567 flush_workqueue(kblockd_workqueue); 3568 flush_workqueue(kblockd_workqueue);
3568 } 3569 }
3569 EXPORT_SYMBOL(kblockd_flush); 3570 EXPORT_SYMBOL(kblockd_flush);
3570 3571
3571 int __init blk_dev_init(void) 3572 int __init blk_dev_init(void)
3572 { 3573 {
3573 int i; 3574 int i;
3574 3575
3575 kblockd_workqueue = create_workqueue("kblockd"); 3576 kblockd_workqueue = create_workqueue("kblockd");
3576 if (!kblockd_workqueue) 3577 if (!kblockd_workqueue)
3577 panic("Failed to create kblockd\n"); 3578 panic("Failed to create kblockd\n");
3578 3579
3579 request_cachep = kmem_cache_create("blkdev_requests", 3580 request_cachep = kmem_cache_create("blkdev_requests",
3580 sizeof(struct request), 0, SLAB_PANIC, NULL, NULL); 3581 sizeof(struct request), 0, SLAB_PANIC, NULL, NULL);
3581 3582
3582 requestq_cachep = kmem_cache_create("blkdev_queue", 3583 requestq_cachep = kmem_cache_create("blkdev_queue",
3583 sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL); 3584 sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL);
3584 3585
3585 iocontext_cachep = kmem_cache_create("blkdev_ioc", 3586 iocontext_cachep = kmem_cache_create("blkdev_ioc",
3586 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); 3587 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
3587 3588
3588 for_each_possible_cpu(i) 3589 for_each_possible_cpu(i)
3589 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 3590 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
3590 3591
3591 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); 3592 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
3592 register_hotcpu_notifier(&blk_cpu_notifier); 3593 register_hotcpu_notifier(&blk_cpu_notifier);
3593 3594
3594 blk_max_low_pfn = max_low_pfn; 3595 blk_max_low_pfn = max_low_pfn;
3595 blk_max_pfn = max_pfn; 3596 blk_max_pfn = max_pfn;
3596 3597
3597 return 0; 3598 return 0;
3598 } 3599 }
3599 3600
3600 /* 3601 /*
3601 * IO Context helper functions 3602 * IO Context helper functions
3602 */ 3603 */
3603 void put_io_context(struct io_context *ioc) 3604 void put_io_context(struct io_context *ioc)
3604 { 3605 {
3605 if (ioc == NULL) 3606 if (ioc == NULL)
3606 return; 3607 return;
3607 3608
3608 BUG_ON(atomic_read(&ioc->refcount) == 0); 3609 BUG_ON(atomic_read(&ioc->refcount) == 0);
3609 3610
3610 if (atomic_dec_and_test(&ioc->refcount)) { 3611 if (atomic_dec_and_test(&ioc->refcount)) {
3611 struct cfq_io_context *cic; 3612 struct cfq_io_context *cic;
3612 3613
3613 rcu_read_lock(); 3614 rcu_read_lock();
3614 if (ioc->aic && ioc->aic->dtor) 3615 if (ioc->aic && ioc->aic->dtor)
3615 ioc->aic->dtor(ioc->aic); 3616 ioc->aic->dtor(ioc->aic);
3616 if (ioc->cic_root.rb_node != NULL) { 3617 if (ioc->cic_root.rb_node != NULL) {
3617 struct rb_node *n = rb_first(&ioc->cic_root); 3618 struct rb_node *n = rb_first(&ioc->cic_root);
3618 3619
3619 cic = rb_entry(n, struct cfq_io_context, rb_node); 3620 cic = rb_entry(n, struct cfq_io_context, rb_node);
3620 cic->dtor(ioc); 3621 cic->dtor(ioc);
3621 } 3622 }
3622 rcu_read_unlock(); 3623 rcu_read_unlock();
3623 3624
3624 kmem_cache_free(iocontext_cachep, ioc); 3625 kmem_cache_free(iocontext_cachep, ioc);
3625 } 3626 }
3626 } 3627 }
3627 EXPORT_SYMBOL(put_io_context); 3628 EXPORT_SYMBOL(put_io_context);
3628 3629
3629 /* Called by the exitting task */ 3630 /* Called by the exitting task */
3630 void exit_io_context(void) 3631 void exit_io_context(void)
3631 { 3632 {
3632 unsigned long flags; 3633 unsigned long flags;
3633 struct io_context *ioc; 3634 struct io_context *ioc;
3634 struct cfq_io_context *cic; 3635 struct cfq_io_context *cic;
3635 3636
3636 local_irq_save(flags); 3637 local_irq_save(flags);
3637 task_lock(current); 3638 task_lock(current);
3638 ioc = current->io_context; 3639 ioc = current->io_context;
3639 current->io_context = NULL; 3640 current->io_context = NULL;
3640 ioc->task = NULL; 3641 ioc->task = NULL;
3641 task_unlock(current); 3642 task_unlock(current);
3642 local_irq_restore(flags); 3643 local_irq_restore(flags);
3643 3644
3644 if (ioc->aic && ioc->aic->exit) 3645 if (ioc->aic && ioc->aic->exit)
3645 ioc->aic->exit(ioc->aic); 3646 ioc->aic->exit(ioc->aic);
3646 if (ioc->cic_root.rb_node != NULL) { 3647 if (ioc->cic_root.rb_node != NULL) {
3647 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); 3648 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
3648 cic->exit(ioc); 3649 cic->exit(ioc);
3649 } 3650 }
3650 3651
3651 put_io_context(ioc); 3652 put_io_context(ioc);
3652 } 3653 }
3653 3654
3654 /* 3655 /*
3655 * If the current task has no IO context then create one and initialise it. 3656 * If the current task has no IO context then create one and initialise it.
3656 * Otherwise, return its existing IO context. 3657 * Otherwise, return its existing IO context.
3657 * 3658 *
3658 * This returned IO context doesn't have a specifically elevated refcount, 3659 * This returned IO context doesn't have a specifically elevated refcount,
3659 * but since the current task itself holds a reference, the context can be 3660 * but since the current task itself holds a reference, the context can be
3660 * used in general code, so long as it stays within `current` context. 3661 * used in general code, so long as it stays within `current` context.
3661 */ 3662 */
3662 struct io_context *current_io_context(gfp_t gfp_flags) 3663 struct io_context *current_io_context(gfp_t gfp_flags)
3663 { 3664 {
3664 struct task_struct *tsk = current; 3665 struct task_struct *tsk = current;
3665 struct io_context *ret; 3666 struct io_context *ret;
3666 3667
3667 ret = tsk->io_context; 3668 ret = tsk->io_context;
3668 if (likely(ret)) 3669 if (likely(ret))
3669 return ret; 3670 return ret;
3670 3671
3671 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); 3672 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
3672 if (ret) { 3673 if (ret) {
3673 atomic_set(&ret->refcount, 1); 3674 atomic_set(&ret->refcount, 1);
3674 ret->task = current; 3675 ret->task = current;
3675 ret->set_ioprio = NULL; 3676 ret->set_ioprio = NULL;
3676 ret->last_waited = jiffies; /* doesn't matter... */ 3677 ret->last_waited = jiffies; /* doesn't matter... */
3677 ret->nr_batch_requests = 0; /* because this is 0 */ 3678 ret->nr_batch_requests = 0; /* because this is 0 */
3678 ret->aic = NULL; 3679 ret->aic = NULL;
3679 ret->cic_root.rb_node = NULL; 3680 ret->cic_root.rb_node = NULL;
3680 /* make sure set_task_ioprio() sees the settings above */ 3681 /* make sure set_task_ioprio() sees the settings above */
3681 smp_wmb(); 3682 smp_wmb();
3682 tsk->io_context = ret; 3683 tsk->io_context = ret;
3683 } 3684 }
3684 3685
3685 return ret; 3686 return ret;
3686 } 3687 }
3687 EXPORT_SYMBOL(current_io_context); 3688 EXPORT_SYMBOL(current_io_context);
3688 3689
3689 /* 3690 /*
3690 * If the current task has no IO context then create one and initialise it. 3691 * If the current task has no IO context then create one and initialise it.
3691 * If it does have a context, take a ref on it. 3692 * If it does have a context, take a ref on it.
3692 * 3693 *
3693 * This is always called in the context of the task which submitted the I/O. 3694 * This is always called in the context of the task which submitted the I/O.
3694 */ 3695 */
3695 struct io_context *get_io_context(gfp_t gfp_flags) 3696 struct io_context *get_io_context(gfp_t gfp_flags)
3696 { 3697 {
3697 struct io_context *ret; 3698 struct io_context *ret;
3698 ret = current_io_context(gfp_flags); 3699 ret = current_io_context(gfp_flags);
3699 if (likely(ret)) 3700 if (likely(ret))
3700 atomic_inc(&ret->refcount); 3701 atomic_inc(&ret->refcount);
3701 return ret; 3702 return ret;
3702 } 3703 }
3703 EXPORT_SYMBOL(get_io_context); 3704 EXPORT_SYMBOL(get_io_context);
3704 3705
3705 void copy_io_context(struct io_context **pdst, struct io_context **psrc) 3706 void copy_io_context(struct io_context **pdst, struct io_context **psrc)
3706 { 3707 {
3707 struct io_context *src = *psrc; 3708 struct io_context *src = *psrc;
3708 struct io_context *dst = *pdst; 3709 struct io_context *dst = *pdst;
3709 3710
3710 if (src) { 3711 if (src) {
3711 BUG_ON(atomic_read(&src->refcount) == 0); 3712 BUG_ON(atomic_read(&src->refcount) == 0);
3712 atomic_inc(&src->refcount); 3713 atomic_inc(&src->refcount);
3713 put_io_context(dst); 3714 put_io_context(dst);
3714 *pdst = src; 3715 *pdst = src;
3715 } 3716 }
3716 } 3717 }
3717 EXPORT_SYMBOL(copy_io_context); 3718 EXPORT_SYMBOL(copy_io_context);
3718 3719
3719 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) 3720 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
3720 { 3721 {
3721 struct io_context *temp; 3722 struct io_context *temp;
3722 temp = *ioc1; 3723 temp = *ioc1;
3723 *ioc1 = *ioc2; 3724 *ioc1 = *ioc2;
3724 *ioc2 = temp; 3725 *ioc2 = temp;
3725 } 3726 }
3726 EXPORT_SYMBOL(swap_io_context); 3727 EXPORT_SYMBOL(swap_io_context);
3727 3728
3728 /* 3729 /*
3729 * sysfs parts below 3730 * sysfs parts below
3730 */ 3731 */
3731 struct queue_sysfs_entry { 3732 struct queue_sysfs_entry {
3732 struct attribute attr; 3733 struct attribute attr;
3733 ssize_t (*show)(struct request_queue *, char *); 3734 ssize_t (*show)(struct request_queue *, char *);
3734 ssize_t (*store)(struct request_queue *, const char *, size_t); 3735 ssize_t (*store)(struct request_queue *, const char *, size_t);
3735 }; 3736 };
3736 3737
3737 static ssize_t 3738 static ssize_t
3738 queue_var_show(unsigned int var, char *page) 3739 queue_var_show(unsigned int var, char *page)
3739 { 3740 {
3740 return sprintf(page, "%d\n", var); 3741 return sprintf(page, "%d\n", var);
3741 } 3742 }
3742 3743
3743 static ssize_t 3744 static ssize_t
3744 queue_var_store(unsigned long *var, const char *page, size_t count) 3745 queue_var_store(unsigned long *var, const char *page, size_t count)
3745 { 3746 {
3746 char *p = (char *) page; 3747 char *p = (char *) page;
3747 3748
3748 *var = simple_strtoul(p, &p, 10); 3749 *var = simple_strtoul(p, &p, 10);
3749 return count; 3750 return count;
3750 } 3751 }
3751 3752
3752 static ssize_t queue_requests_show(struct request_queue *q, char *page) 3753 static ssize_t queue_requests_show(struct request_queue *q, char *page)
3753 { 3754 {
3754 return queue_var_show(q->nr_requests, (page)); 3755 return queue_var_show(q->nr_requests, (page));
3755 } 3756 }
3756 3757
3757 static ssize_t 3758 static ssize_t
3758 queue_requests_store(struct request_queue *q, const char *page, size_t count) 3759 queue_requests_store(struct request_queue *q, const char *page, size_t count)
3759 { 3760 {
3760 struct request_list *rl = &q->rq; 3761 struct request_list *rl = &q->rq;
3761 unsigned long nr; 3762 unsigned long nr;
3762 int ret = queue_var_store(&nr, page, count); 3763 int ret = queue_var_store(&nr, page, count);
3763 if (nr < BLKDEV_MIN_RQ) 3764 if (nr < BLKDEV_MIN_RQ)
3764 nr = BLKDEV_MIN_RQ; 3765 nr = BLKDEV_MIN_RQ;
3765 3766
3766 spin_lock_irq(q->queue_lock); 3767 spin_lock_irq(q->queue_lock);
3767 q->nr_requests = nr; 3768 q->nr_requests = nr;
3768 blk_queue_congestion_threshold(q); 3769 blk_queue_congestion_threshold(q);
3769 3770
3770 if (rl->count[READ] >= queue_congestion_on_threshold(q)) 3771 if (rl->count[READ] >= queue_congestion_on_threshold(q))
3771 set_queue_congested(q, READ); 3772 set_queue_congested(q, READ);
3772 else if (rl->count[READ] < queue_congestion_off_threshold(q)) 3773 else if (rl->count[READ] < queue_congestion_off_threshold(q))
3773 clear_queue_congested(q, READ); 3774 clear_queue_congested(q, READ);
3774 3775
3775 if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) 3776 if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
3776 set_queue_congested(q, WRITE); 3777 set_queue_congested(q, WRITE);
3777 else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) 3778 else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
3778 clear_queue_congested(q, WRITE); 3779 clear_queue_congested(q, WRITE);
3779 3780
3780 if (rl->count[READ] >= q->nr_requests) { 3781 if (rl->count[READ] >= q->nr_requests) {
3781 blk_set_queue_full(q, READ); 3782 blk_set_queue_full(q, READ);
3782 } else if (rl->count[READ]+1 <= q->nr_requests) { 3783 } else if (rl->count[READ]+1 <= q->nr_requests) {
3783 blk_clear_queue_full(q, READ); 3784 blk_clear_queue_full(q, READ);
3784 wake_up(&rl->wait[READ]); 3785 wake_up(&rl->wait[READ]);
3785 } 3786 }
3786 3787
3787 if (rl->count[WRITE] >= q->nr_requests) { 3788 if (rl->count[WRITE] >= q->nr_requests) {
3788 blk_set_queue_full(q, WRITE); 3789 blk_set_queue_full(q, WRITE);
3789 } else if (rl->count[WRITE]+1 <= q->nr_requests) { 3790 } else if (rl->count[WRITE]+1 <= q->nr_requests) {
3790 blk_clear_queue_full(q, WRITE); 3791 blk_clear_queue_full(q, WRITE);
3791 wake_up(&rl->wait[WRITE]); 3792 wake_up(&rl->wait[WRITE]);
3792 } 3793 }
3793 spin_unlock_irq(q->queue_lock); 3794 spin_unlock_irq(q->queue_lock);
3794 return ret; 3795 return ret;
3795 } 3796 }
3796 3797
3797 static ssize_t queue_ra_show(struct request_queue *q, char *page) 3798 static ssize_t queue_ra_show(struct request_queue *q, char *page)
3798 { 3799 {
3799 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); 3800 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3800 3801
3801 return queue_var_show(ra_kb, (page)); 3802 return queue_var_show(ra_kb, (page));
3802 } 3803 }
3803 3804
3804 static ssize_t 3805 static ssize_t
3805 queue_ra_store(struct request_queue *q, const char *page, size_t count) 3806 queue_ra_store(struct request_queue *q, const char *page, size_t count)
3806 { 3807 {
3807 unsigned long ra_kb; 3808 unsigned long ra_kb;
3808 ssize_t ret = queue_var_store(&ra_kb, page, count); 3809 ssize_t ret = queue_var_store(&ra_kb, page, count);
3809 3810
3810 spin_lock_irq(q->queue_lock); 3811 spin_lock_irq(q->queue_lock);
3811 if (ra_kb > (q->max_sectors >> 1)) 3812 if (ra_kb > (q->max_sectors >> 1))
3812 ra_kb = (q->max_sectors >> 1); 3813 ra_kb = (q->max_sectors >> 1);
3813 3814
3814 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); 3815 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
3815 spin_unlock_irq(q->queue_lock); 3816 spin_unlock_irq(q->queue_lock);
3816 3817
3817 return ret; 3818 return ret;
3818 } 3819 }
3819 3820
3820 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) 3821 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
3821 { 3822 {
3822 int max_sectors_kb = q->max_sectors >> 1; 3823 int max_sectors_kb = q->max_sectors >> 1;
3823 3824
3824 return queue_var_show(max_sectors_kb, (page)); 3825 return queue_var_show(max_sectors_kb, (page));
3825 } 3826 }
3826 3827
3827 static ssize_t 3828 static ssize_t
3828 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) 3829 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
3829 { 3830 {
3830 unsigned long max_sectors_kb, 3831 unsigned long max_sectors_kb,
3831 max_hw_sectors_kb = q->max_hw_sectors >> 1, 3832 max_hw_sectors_kb = q->max_hw_sectors >> 1,
3832 page_kb = 1 << (PAGE_CACHE_SHIFT - 10); 3833 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
3833 ssize_t ret = queue_var_store(&max_sectors_kb, page, count); 3834 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
3834 int ra_kb; 3835 int ra_kb;
3835 3836
3836 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) 3837 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
3837 return -EINVAL; 3838 return -EINVAL;
3838 /* 3839 /*
3839 * Take the queue lock to update the readahead and max_sectors 3840 * Take the queue lock to update the readahead and max_sectors
3840 * values synchronously: 3841 * values synchronously:
3841 */ 3842 */
3842 spin_lock_irq(q->queue_lock); 3843 spin_lock_irq(q->queue_lock);
3843 /* 3844 /*
3844 * Trim readahead window as well, if necessary: 3845 * Trim readahead window as well, if necessary:
3845 */ 3846 */
3846 ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); 3847 ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3847 if (ra_kb > max_sectors_kb) 3848 if (ra_kb > max_sectors_kb)
3848 q->backing_dev_info.ra_pages = 3849 q->backing_dev_info.ra_pages =
3849 max_sectors_kb >> (PAGE_CACHE_SHIFT - 10); 3850 max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);
3850 3851
3851 q->max_sectors = max_sectors_kb << 1; 3852 q->max_sectors = max_sectors_kb << 1;
3852 spin_unlock_irq(q->queue_lock); 3853 spin_unlock_irq(q->queue_lock);
3853 3854
3854 return ret; 3855 return ret;
3855 } 3856 }
3856 3857
3857 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) 3858 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
3858 { 3859 {
3859 int max_hw_sectors_kb = q->max_hw_sectors >> 1; 3860 int max_hw_sectors_kb = q->max_hw_sectors >> 1;
3860 3861
3861 return queue_var_show(max_hw_sectors_kb, (page)); 3862 return queue_var_show(max_hw_sectors_kb, (page));
3862 } 3863 }
3863 3864
3864 3865
3865 static struct queue_sysfs_entry queue_requests_entry = { 3866 static struct queue_sysfs_entry queue_requests_entry = {
3866 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, 3867 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
3867 .show = queue_requests_show, 3868 .show = queue_requests_show,
3868 .store = queue_requests_store, 3869 .store = queue_requests_store,
3869 }; 3870 };
3870 3871
3871 static struct queue_sysfs_entry queue_ra_entry = { 3872 static struct queue_sysfs_entry queue_ra_entry = {
3872 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, 3873 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
3873 .show = queue_ra_show, 3874 .show = queue_ra_show,
3874 .store = queue_ra_store, 3875 .store = queue_ra_store,
3875 }; 3876 };
3876 3877
3877 static struct queue_sysfs_entry queue_max_sectors_entry = { 3878 static struct queue_sysfs_entry queue_max_sectors_entry = {
3878 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, 3879 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
3879 .show = queue_max_sectors_show, 3880 .show = queue_max_sectors_show,
3880 .store = queue_max_sectors_store, 3881 .store = queue_max_sectors_store,
3881 }; 3882 };
3882 3883
3883 static struct queue_sysfs_entry queue_max_hw_sectors_entry = { 3884 static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
3884 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, 3885 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
3885 .show = queue_max_hw_sectors_show, 3886 .show = queue_max_hw_sectors_show,
3886 }; 3887 };
3887 3888
3888 static struct queue_sysfs_entry queue_iosched_entry = { 3889 static struct queue_sysfs_entry queue_iosched_entry = {
3889 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, 3890 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
3890 .show = elv_iosched_show, 3891 .show = elv_iosched_show,
3891 .store = elv_iosched_store, 3892 .store = elv_iosched_store,
3892 }; 3893 };
3893 3894
3894 static struct attribute *default_attrs[] = { 3895 static struct attribute *default_attrs[] = {
3895 &queue_requests_entry.attr, 3896 &queue_requests_entry.attr,
3896 &queue_ra_entry.attr, 3897 &queue_ra_entry.attr,
3897 &queue_max_hw_sectors_entry.attr, 3898 &queue_max_hw_sectors_entry.attr,
3898 &queue_max_sectors_entry.attr, 3899 &queue_max_sectors_entry.attr,
3899 &queue_iosched_entry.attr, 3900 &queue_iosched_entry.attr,
3900 NULL, 3901 NULL,
3901 }; 3902 };
3902 3903
3903 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) 3904 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
3904 3905
3905 static ssize_t 3906 static ssize_t
3906 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 3907 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3907 { 3908 {
3908 struct queue_sysfs_entry *entry = to_queue(attr); 3909 struct queue_sysfs_entry *entry = to_queue(attr);
3909 request_queue_t *q = container_of(kobj, struct request_queue, kobj); 3910 request_queue_t *q = container_of(kobj, struct request_queue, kobj);
3910 ssize_t res; 3911 ssize_t res;
3911 3912
3912 if (!entry->show) 3913 if (!entry->show)
3913 return -EIO; 3914 return -EIO;
3914 mutex_lock(&q->sysfs_lock); 3915 mutex_lock(&q->sysfs_lock);
3915 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { 3916 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
3916 mutex_unlock(&q->sysfs_lock); 3917 mutex_unlock(&q->sysfs_lock);
3917 return -ENOENT; 3918 return -ENOENT;
3918 } 3919 }
3919 res = entry->show(q, page); 3920 res = entry->show(q, page);
3920 mutex_unlock(&q->sysfs_lock); 3921 mutex_unlock(&q->sysfs_lock);
3921 return res; 3922 return res;
3922 } 3923 }
3923 3924
3924 static ssize_t 3925 static ssize_t
3925 queue_attr_store(struct kobject *kobj, struct attribute *attr, 3926 queue_attr_store(struct kobject *kobj, struct attribute *attr,
3926 const char *page, size_t length) 3927 const char *page, size_t length)
3927 { 3928 {
3928 struct queue_sysfs_entry *entry = to_queue(attr); 3929 struct queue_sysfs_entry *entry = to_queue(attr);
3929 request_queue_t *q = container_of(kobj, struct request_queue, kobj); 3930 request_queue_t *q = container_of(kobj, struct request_queue, kobj);
3930 3931
3931 ssize_t res; 3932 ssize_t res;
3932 3933
3933 if (!entry->store) 3934 if (!entry->store)
3934 return -EIO; 3935 return -EIO;
3935 mutex_lock(&q->sysfs_lock); 3936 mutex_lock(&q->sysfs_lock);
3936 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { 3937 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
3937 mutex_unlock(&q->sysfs_lock); 3938 mutex_unlock(&q->sysfs_lock);
3938 return -ENOENT; 3939 return -ENOENT;
3939 } 3940 }
3940 res = entry->store(q, page, length); 3941 res = entry->store(q, page, length);
3941 mutex_unlock(&q->sysfs_lock); 3942 mutex_unlock(&q->sysfs_lock);
3942 return res; 3943 return res;
3943 } 3944 }
3944 3945
3945 static struct sysfs_ops queue_sysfs_ops = { 3946 static struct sysfs_ops queue_sysfs_ops = {
3946 .show = queue_attr_show, 3947 .show = queue_attr_show,
3947 .store = queue_attr_store, 3948 .store = queue_attr_store,
3948 }; 3949 };
3949 3950
3950 static struct kobj_type queue_ktype = { 3951 static struct kobj_type queue_ktype = {
3951 .sysfs_ops = &queue_sysfs_ops, 3952 .sysfs_ops = &queue_sysfs_ops,
3952 .default_attrs = default_attrs, 3953 .default_attrs = default_attrs,
3953 .release = blk_release_queue, 3954 .release = blk_release_queue,
3954 }; 3955 };
3955 3956
3956 int blk_register_queue(struct gendisk *disk) 3957 int blk_register_queue(struct gendisk *disk)
3957 { 3958 {
3958 int ret; 3959 int ret;
3959 3960
3960 request_queue_t *q = disk->queue; 3961 request_queue_t *q = disk->queue;
3961 3962
3962 if (!q || !q->request_fn) 3963 if (!q || !q->request_fn)
3963 return -ENXIO; 3964 return -ENXIO;
3964 3965
3965 q->kobj.parent = kobject_get(&disk->kobj); 3966 q->kobj.parent = kobject_get(&disk->kobj);
3966 3967
3967 ret = kobject_add(&q->kobj); 3968 ret = kobject_add(&q->kobj);
3968 if (ret < 0) 3969 if (ret < 0)
3969 return ret; 3970 return ret;
3970 3971
3971 kobject_uevent(&q->kobj, KOBJ_ADD); 3972 kobject_uevent(&q->kobj, KOBJ_ADD);
3972 3973
3973 ret = elv_register_queue(q); 3974 ret = elv_register_queue(q);
3974 if (ret) { 3975 if (ret) {
3975 kobject_uevent(&q->kobj, KOBJ_REMOVE); 3976 kobject_uevent(&q->kobj, KOBJ_REMOVE);
3976 kobject_del(&q->kobj); 3977 kobject_del(&q->kobj);
3977 return ret; 3978 return ret;
3978 } 3979 }
3979 3980
3980 return 0; 3981 return 0;
3981 } 3982 }
3982 3983
3983 void blk_unregister_queue(struct gendisk *disk) 3984 void blk_unregister_queue(struct gendisk *disk)
3984 { 3985 {
3985 request_queue_t *q = disk->queue; 3986 request_queue_t *q = disk->queue;
3986 3987
3987 if (q && q->request_fn) { 3988 if (q && q->request_fn) {
3988 elv_unregister_queue(q); 3989 elv_unregister_queue(q);
3989 3990
3990 kobject_uevent(&q->kobj, KOBJ_REMOVE); 3991 kobject_uevent(&q->kobj, KOBJ_REMOVE);
3991 kobject_del(&q->kobj); 3992 kobject_del(&q->kobj);
3992 kobject_put(&disk->kobj); 3993 kobject_put(&disk->kobj);
3993 } 3994 }
3994 } 3995 }
include/linux/blkdev.h
1 #ifndef _LINUX_BLKDEV_H 1 #ifndef _LINUX_BLKDEV_H
2 #define _LINUX_BLKDEV_H 2 #define _LINUX_BLKDEV_H
3 3
4 #include <linux/major.h> 4 #include <linux/major.h>
5 #include <linux/genhd.h> 5 #include <linux/genhd.h>
6 #include <linux/list.h> 6 #include <linux/list.h>
7 #include <linux/timer.h> 7 #include <linux/timer.h>
8 #include <linux/workqueue.h> 8 #include <linux/workqueue.h>
9 #include <linux/pagemap.h> 9 #include <linux/pagemap.h>
10 #include <linux/backing-dev.h> 10 #include <linux/backing-dev.h>
11 #include <linux/wait.h> 11 #include <linux/wait.h>
12 #include <linux/mempool.h> 12 #include <linux/mempool.h>
13 #include <linux/bio.h> 13 #include <linux/bio.h>
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/stringify.h> 15 #include <linux/stringify.h>
16 16
17 #include <asm/scatterlist.h> 17 #include <asm/scatterlist.h>
18 18
19 struct scsi_ioctl_command; 19 struct scsi_ioctl_command;
20 20
21 struct request_queue; 21 struct request_queue;
22 typedef struct request_queue request_queue_t; 22 typedef struct request_queue request_queue_t;
23 struct elevator_queue; 23 struct elevator_queue;
24 typedef struct elevator_queue elevator_t; 24 typedef struct elevator_queue elevator_t;
25 struct request_pm_state; 25 struct request_pm_state;
26 struct blk_trace; 26 struct blk_trace;
27 27
28 #define BLKDEV_MIN_RQ 4 28 #define BLKDEV_MIN_RQ 4
29 #define BLKDEV_MAX_RQ 128 /* Default maximum */ 29 #define BLKDEV_MAX_RQ 128 /* Default maximum */
30 30
31 /* 31 /*
32 * This is the per-process anticipatory I/O scheduler state. 32 * This is the per-process anticipatory I/O scheduler state.
33 */ 33 */
34 struct as_io_context { 34 struct as_io_context {
35 spinlock_t lock; 35 spinlock_t lock;
36 36
37 void (*dtor)(struct as_io_context *aic); /* destructor */ 37 void (*dtor)(struct as_io_context *aic); /* destructor */
38 void (*exit)(struct as_io_context *aic); /* called on task exit */ 38 void (*exit)(struct as_io_context *aic); /* called on task exit */
39 39
40 unsigned long state; 40 unsigned long state;
41 atomic_t nr_queued; /* queued reads & sync writes */ 41 atomic_t nr_queued; /* queued reads & sync writes */
42 atomic_t nr_dispatched; /* number of requests gone to the drivers */ 42 atomic_t nr_dispatched; /* number of requests gone to the drivers */
43 43
44 /* IO History tracking */ 44 /* IO History tracking */
45 /* Thinktime */ 45 /* Thinktime */
46 unsigned long last_end_request; 46 unsigned long last_end_request;
47 unsigned long ttime_total; 47 unsigned long ttime_total;
48 unsigned long ttime_samples; 48 unsigned long ttime_samples;
49 unsigned long ttime_mean; 49 unsigned long ttime_mean;
50 /* Layout pattern */ 50 /* Layout pattern */
51 unsigned int seek_samples; 51 unsigned int seek_samples;
52 sector_t last_request_pos; 52 sector_t last_request_pos;
53 u64 seek_total; 53 u64 seek_total;
54 sector_t seek_mean; 54 sector_t seek_mean;
55 }; 55 };
56 56
57 struct cfq_queue; 57 struct cfq_queue;
58 struct cfq_io_context { 58 struct cfq_io_context {
59 struct rb_node rb_node; 59 struct rb_node rb_node;
60 void *key; 60 void *key;
61 61
62 struct cfq_queue *cfqq[2]; 62 struct cfq_queue *cfqq[2];
63 63
64 struct io_context *ioc; 64 struct io_context *ioc;
65 65
66 unsigned long last_end_request; 66 unsigned long last_end_request;
67 sector_t last_request_pos; 67 sector_t last_request_pos;
68 unsigned long last_queue; 68 unsigned long last_queue;
69 69
70 unsigned long ttime_total; 70 unsigned long ttime_total;
71 unsigned long ttime_samples; 71 unsigned long ttime_samples;
72 unsigned long ttime_mean; 72 unsigned long ttime_mean;
73 73
74 unsigned int seek_samples; 74 unsigned int seek_samples;
75 u64 seek_total; 75 u64 seek_total;
76 sector_t seek_mean; 76 sector_t seek_mean;
77 77
78 struct list_head queue_list; 78 struct list_head queue_list;
79 79
80 void (*dtor)(struct io_context *); /* destructor */ 80 void (*dtor)(struct io_context *); /* destructor */
81 void (*exit)(struct io_context *); /* called on task exit */ 81 void (*exit)(struct io_context *); /* called on task exit */
82 }; 82 };
83 83
84 /* 84 /*
85 * This is the per-process I/O subsystem state. It is refcounted and 85 * This is the per-process I/O subsystem state. It is refcounted and
86 * kmalloc'ed. Currently all fields are modified in process io context 86 * kmalloc'ed. Currently all fields are modified in process io context
87 * (apart from the atomic refcount), so require no locking. 87 * (apart from the atomic refcount), so require no locking.
88 */ 88 */
89 struct io_context { 89 struct io_context {
90 atomic_t refcount; 90 atomic_t refcount;
91 struct task_struct *task; 91 struct task_struct *task;
92 92
93 int (*set_ioprio)(struct io_context *, unsigned int); 93 int (*set_ioprio)(struct io_context *, unsigned int);
94 94
95 /* 95 /*
96 * For request batching 96 * For request batching
97 */ 97 */
98 unsigned long last_waited; /* Time last woken after wait for request */ 98 unsigned long last_waited; /* Time last woken after wait for request */
99 int nr_batch_requests; /* Number of requests left in the batch */ 99 int nr_batch_requests; /* Number of requests left in the batch */
100 100
101 struct as_io_context *aic; 101 struct as_io_context *aic;
102 struct rb_root cic_root; 102 struct rb_root cic_root;
103 }; 103 };
104 104
105 void put_io_context(struct io_context *ioc); 105 void put_io_context(struct io_context *ioc);
106 void exit_io_context(void); 106 void exit_io_context(void);
107 struct io_context *current_io_context(gfp_t gfp_flags); 107 struct io_context *current_io_context(gfp_t gfp_flags);
108 struct io_context *get_io_context(gfp_t gfp_flags); 108 struct io_context *get_io_context(gfp_t gfp_flags);
109 void copy_io_context(struct io_context **pdst, struct io_context **psrc); 109 void copy_io_context(struct io_context **pdst, struct io_context **psrc);
110 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); 110 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
111 111
112 struct request; 112 struct request;
113 typedef void (rq_end_io_fn)(struct request *, int); 113 typedef void (rq_end_io_fn)(struct request *, int);
114 114
115 struct request_list { 115 struct request_list {
116 int count[2]; 116 int count[2];
117 int starved[2]; 117 int starved[2];
118 int elvpriv; 118 int elvpriv;
119 mempool_t *rq_pool; 119 mempool_t *rq_pool;
120 wait_queue_head_t wait[2]; 120 wait_queue_head_t wait[2];
121 }; 121 };
122 122
123 /* 123 /*
124 * request command types 124 * request command types
125 */ 125 */
126 enum rq_cmd_type_bits { 126 enum rq_cmd_type_bits {
127 REQ_TYPE_FS = 1, /* fs request */ 127 REQ_TYPE_FS = 1, /* fs request */
128 REQ_TYPE_BLOCK_PC, /* scsi command */ 128 REQ_TYPE_BLOCK_PC, /* scsi command */
129 REQ_TYPE_SENSE, /* sense request */ 129 REQ_TYPE_SENSE, /* sense request */
130 REQ_TYPE_PM_SUSPEND, /* suspend request */ 130 REQ_TYPE_PM_SUSPEND, /* suspend request */
131 REQ_TYPE_PM_RESUME, /* resume request */ 131 REQ_TYPE_PM_RESUME, /* resume request */
132 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ 132 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
133 REQ_TYPE_FLUSH, /* flush request */ 133 REQ_TYPE_FLUSH, /* flush request */
134 REQ_TYPE_SPECIAL, /* driver defined type */ 134 REQ_TYPE_SPECIAL, /* driver defined type */
135 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ 135 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */
136 /* 136 /*
137 * for ATA/ATAPI devices. this really doesn't belong here, ide should 137 * for ATA/ATAPI devices. this really doesn't belong here, ide should
138 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver 138 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
139 * private REQ_LB opcodes to differentiate what type of request this is 139 * private REQ_LB opcodes to differentiate what type of request this is
140 */ 140 */
141 REQ_TYPE_ATA_CMD, 141 REQ_TYPE_ATA_CMD,
142 REQ_TYPE_ATA_TASK, 142 REQ_TYPE_ATA_TASK,
143 REQ_TYPE_ATA_TASKFILE, 143 REQ_TYPE_ATA_TASKFILE,
144 }; 144 };
145 145
146 /* 146 /*
147 * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being 147 * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
148 * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a 148 * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
149 * SCSI cdb. 149 * SCSI cdb.
150 * 150 *
151 * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, 151 * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
152 * typically to differentiate REQ_TYPE_SPECIAL requests. 152 * typically to differentiate REQ_TYPE_SPECIAL requests.
153 * 153 *
154 */ 154 */
155 enum { 155 enum {
156 /* 156 /*
157 * just examples for now 157 * just examples for now
158 */ 158 */
159 REQ_LB_OP_EJECT = 0x40, /* eject request */ 159 REQ_LB_OP_EJECT = 0x40, /* eject request */
160 REQ_LB_OP_FLUSH = 0x41, /* flush device */ 160 REQ_LB_OP_FLUSH = 0x41, /* flush device */
161 }; 161 };
162 162
163 /* 163 /*
164 * request type modified bits. first three bits match BIO_RW* bits, important 164 * request type modified bits. first three bits match BIO_RW* bits, important
165 */ 165 */
166 enum rq_flag_bits { 166 enum rq_flag_bits {
167 __REQ_RW, /* not set, read. set, write */ 167 __REQ_RW, /* not set, read. set, write */
168 __REQ_FAILFAST, /* no low level driver retries */ 168 __REQ_FAILFAST, /* no low level driver retries */
169 __REQ_SORTED, /* elevator knows about this request */ 169 __REQ_SORTED, /* elevator knows about this request */
170 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 170 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
171 __REQ_HARDBARRIER, /* may not be passed by drive either */ 171 __REQ_HARDBARRIER, /* may not be passed by drive either */
172 __REQ_FUA, /* forced unit access */ 172 __REQ_FUA, /* forced unit access */
173 __REQ_NOMERGE, /* don't touch this for merging */ 173 __REQ_NOMERGE, /* don't touch this for merging */
174 __REQ_STARTED, /* drive already may have started this one */ 174 __REQ_STARTED, /* drive already may have started this one */
175 __REQ_DONTPREP, /* don't call prep for this one */ 175 __REQ_DONTPREP, /* don't call prep for this one */
176 __REQ_QUEUED, /* uses queueing */ 176 __REQ_QUEUED, /* uses queueing */
177 __REQ_ELVPRIV, /* elevator private data attached */ 177 __REQ_ELVPRIV, /* elevator private data attached */
178 __REQ_FAILED, /* set if the request failed */ 178 __REQ_FAILED, /* set if the request failed */
179 __REQ_QUIET, /* don't worry about errors */ 179 __REQ_QUIET, /* don't worry about errors */
180 __REQ_PREEMPT, /* set for "ide_preempt" requests */ 180 __REQ_PREEMPT, /* set for "ide_preempt" requests */
181 __REQ_ORDERED_COLOR, /* is before or after barrier */ 181 __REQ_ORDERED_COLOR, /* is before or after barrier */
182 __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ 182 __REQ_RW_SYNC, /* request is sync (O_DIRECT) */
183 __REQ_NR_BITS, /* stops here */ 183 __REQ_NR_BITS, /* stops here */
184 }; 184 };
185 185
186 #define REQ_RW (1 << __REQ_RW) 186 #define REQ_RW (1 << __REQ_RW)
187 #define REQ_FAILFAST (1 << __REQ_FAILFAST) 187 #define REQ_FAILFAST (1 << __REQ_FAILFAST)
188 #define REQ_SORTED (1 << __REQ_SORTED) 188 #define REQ_SORTED (1 << __REQ_SORTED)
189 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 189 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
190 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) 190 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
191 #define REQ_FUA (1 << __REQ_FUA) 191 #define REQ_FUA (1 << __REQ_FUA)
192 #define REQ_NOMERGE (1 << __REQ_NOMERGE) 192 #define REQ_NOMERGE (1 << __REQ_NOMERGE)
193 #define REQ_STARTED (1 << __REQ_STARTED) 193 #define REQ_STARTED (1 << __REQ_STARTED)
194 #define REQ_DONTPREP (1 << __REQ_DONTPREP) 194 #define REQ_DONTPREP (1 << __REQ_DONTPREP)
195 #define REQ_QUEUED (1 << __REQ_QUEUED) 195 #define REQ_QUEUED (1 << __REQ_QUEUED)
196 #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) 196 #define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
197 #define REQ_FAILED (1 << __REQ_FAILED) 197 #define REQ_FAILED (1 << __REQ_FAILED)
198 #define REQ_QUIET (1 << __REQ_QUIET) 198 #define REQ_QUIET (1 << __REQ_QUIET)
199 #define REQ_PREEMPT (1 << __REQ_PREEMPT) 199 #define REQ_PREEMPT (1 << __REQ_PREEMPT)
200 #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) 200 #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
201 #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) 201 #define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
202 202
203 #define BLK_MAX_CDB 16 203 #define BLK_MAX_CDB 16
204 204
205 /* 205 /*
206 * try to put the fields that are referenced together in the same cacheline 206 * try to put the fields that are referenced together in the same cacheline
207 */ 207 */
208 struct request { 208 struct request {
209 struct list_head queuelist; 209 struct list_head queuelist;
210 struct list_head donelist; 210 struct list_head donelist;
211 211
212 unsigned int cmd_flags; 212 unsigned int cmd_flags;
213 enum rq_cmd_type_bits cmd_type; 213 enum rq_cmd_type_bits cmd_type;
214 214
215 /* Maintain bio traversal state for part by part I/O submission. 215 /* Maintain bio traversal state for part by part I/O submission.
216 * hard_* are block layer internals, no driver should touch them! 216 * hard_* are block layer internals, no driver should touch them!
217 */ 217 */
218 218
219 sector_t sector; /* next sector to submit */ 219 sector_t sector; /* next sector to submit */
220 unsigned long nr_sectors; /* no. of sectors left to submit */ 220 unsigned long nr_sectors; /* no. of sectors left to submit */
221 /* no. of sectors left to submit in the current segment */ 221 /* no. of sectors left to submit in the current segment */
222 unsigned int current_nr_sectors; 222 unsigned int current_nr_sectors;
223 223
224 sector_t hard_sector; /* next sector to complete */ 224 sector_t hard_sector; /* next sector to complete */
225 unsigned long hard_nr_sectors; /* no. of sectors left to complete */ 225 unsigned long hard_nr_sectors; /* no. of sectors left to complete */
226 /* no. of sectors left to complete in the current segment */ 226 /* no. of sectors left to complete in the current segment */
227 unsigned int hard_cur_sectors; 227 unsigned int hard_cur_sectors;
228 228
229 struct bio *bio; 229 struct bio *bio;
230 struct bio *biotail; 230 struct bio *biotail;
231 231
232 struct hlist_node hash; /* merge hash */ 232 struct hlist_node hash; /* merge hash */
233 struct rb_node rb_node; /* sort/lookup */
233 234
234 void *elevator_private; 235 void *elevator_private;
235 void *completion_data; 236 void *completion_data;
236 237
237 int rq_status; /* should split this into a few status bits */ 238 int rq_status; /* should split this into a few status bits */
238 int errors; 239 int errors;
239 struct gendisk *rq_disk; 240 struct gendisk *rq_disk;
240 unsigned long start_time; 241 unsigned long start_time;
241 242
242 /* Number of scatter-gather DMA addr+len pairs after 243 /* Number of scatter-gather DMA addr+len pairs after
243 * physical address coalescing is performed. 244 * physical address coalescing is performed.
244 */ 245 */
245 unsigned short nr_phys_segments; 246 unsigned short nr_phys_segments;
246 247
247 /* Number of scatter-gather addr+len pairs after 248 /* Number of scatter-gather addr+len pairs after
248 * physical and DMA remapping hardware coalescing is performed. 249 * physical and DMA remapping hardware coalescing is performed.
249 * This is the number of scatter-gather entries the driver 250 * This is the number of scatter-gather entries the driver
250 * will actually have to deal with after DMA mapping is done. 251 * will actually have to deal with after DMA mapping is done.
251 */ 252 */
252 unsigned short nr_hw_segments; 253 unsigned short nr_hw_segments;
253 254
254 unsigned short ioprio; 255 unsigned short ioprio;
255 256
256 int tag; 257 int tag;
257 258
258 int ref_count; 259 int ref_count;
259 request_queue_t *q; 260 request_queue_t *q;
260 struct request_list *rl; 261 struct request_list *rl;
261 262
262 struct completion *waiting; 263 struct completion *waiting;
263 void *special; 264 void *special;
264 char *buffer; 265 char *buffer;
265 266
266 /* 267 /*
267 * when request is used as a packet command carrier 268 * when request is used as a packet command carrier
268 */ 269 */
269 unsigned int cmd_len; 270 unsigned int cmd_len;
270 unsigned char cmd[BLK_MAX_CDB]; 271 unsigned char cmd[BLK_MAX_CDB];
271 272
272 unsigned int data_len; 273 unsigned int data_len;
273 unsigned int sense_len; 274 unsigned int sense_len;
274 void *data; 275 void *data;
275 void *sense; 276 void *sense;
276 277
277 unsigned int timeout; 278 unsigned int timeout;
278 int retries; 279 int retries;
279 280
280 /* 281 /*
281 * completion callback. end_io_data should be folded in with waiting 282 * completion callback. end_io_data should be folded in with waiting
282 */ 283 */
283 rq_end_io_fn *end_io; 284 rq_end_io_fn *end_io;
284 void *end_io_data; 285 void *end_io_data;
285 }; 286 };
286 287
287 /* 288 /*
288 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME 289 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
289 * requests. Some step values could eventually be made generic. 290 * requests. Some step values could eventually be made generic.
290 */ 291 */
291 struct request_pm_state 292 struct request_pm_state
292 { 293 {
293 /* PM state machine step value, currently driver specific */ 294 /* PM state machine step value, currently driver specific */
294 int pm_step; 295 int pm_step;
295 /* requested PM state value (S1, S2, S3, S4, ...) */ 296 /* requested PM state value (S1, S2, S3, S4, ...) */
296 u32 pm_state; 297 u32 pm_state;
297 void* data; /* for driver use */ 298 void* data; /* for driver use */
298 }; 299 };
299 300
300 #include <linux/elevator.h> 301 #include <linux/elevator.h>
301 302
302 typedef int (merge_request_fn) (request_queue_t *, struct request *, 303 typedef int (merge_request_fn) (request_queue_t *, struct request *,
303 struct bio *); 304 struct bio *);
304 typedef int (merge_requests_fn) (request_queue_t *, struct request *, 305 typedef int (merge_requests_fn) (request_queue_t *, struct request *,
305 struct request *); 306 struct request *);
306 typedef void (request_fn_proc) (request_queue_t *q); 307 typedef void (request_fn_proc) (request_queue_t *q);
307 typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); 308 typedef int (make_request_fn) (request_queue_t *q, struct bio *bio);
308 typedef int (prep_rq_fn) (request_queue_t *, struct request *); 309 typedef int (prep_rq_fn) (request_queue_t *, struct request *);
309 typedef void (unplug_fn) (request_queue_t *); 310 typedef void (unplug_fn) (request_queue_t *);
310 311
311 struct bio_vec; 312 struct bio_vec;
312 typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); 313 typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
313 typedef void (activity_fn) (void *data, int rw); 314 typedef void (activity_fn) (void *data, int rw);
314 typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); 315 typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *);
315 typedef void (prepare_flush_fn) (request_queue_t *, struct request *); 316 typedef void (prepare_flush_fn) (request_queue_t *, struct request *);
316 typedef void (softirq_done_fn)(struct request *); 317 typedef void (softirq_done_fn)(struct request *);
317 318
318 enum blk_queue_state { 319 enum blk_queue_state {
319 Queue_down, 320 Queue_down,
320 Queue_up, 321 Queue_up,
321 }; 322 };
322 323
323 struct blk_queue_tag { 324 struct blk_queue_tag {
324 struct request **tag_index; /* map of busy tags */ 325 struct request **tag_index; /* map of busy tags */
325 unsigned long *tag_map; /* bit map of free/busy tags */ 326 unsigned long *tag_map; /* bit map of free/busy tags */
326 struct list_head busy_list; /* fifo list of busy tags */ 327 struct list_head busy_list; /* fifo list of busy tags */
327 int busy; /* current depth */ 328 int busy; /* current depth */
328 int max_depth; /* what we will send to device */ 329 int max_depth; /* what we will send to device */
329 int real_max_depth; /* what the array can hold */ 330 int real_max_depth; /* what the array can hold */
330 atomic_t refcnt; /* map can be shared */ 331 atomic_t refcnt; /* map can be shared */
331 }; 332 };
332 333
333 struct request_queue 334 struct request_queue
334 { 335 {
335 /* 336 /*
336 * Together with queue_head for cacheline sharing 337 * Together with queue_head for cacheline sharing
337 */ 338 */
338 struct list_head queue_head; 339 struct list_head queue_head;
339 struct request *last_merge; 340 struct request *last_merge;
340 elevator_t *elevator; 341 elevator_t *elevator;
341 342
342 /* 343 /*
343 * the queue request freelist, one for reads and one for writes 344 * the queue request freelist, one for reads and one for writes
344 */ 345 */
345 struct request_list rq; 346 struct request_list rq;
346 347
347 request_fn_proc *request_fn; 348 request_fn_proc *request_fn;
348 merge_request_fn *back_merge_fn; 349 merge_request_fn *back_merge_fn;
349 merge_request_fn *front_merge_fn; 350 merge_request_fn *front_merge_fn;
350 merge_requests_fn *merge_requests_fn; 351 merge_requests_fn *merge_requests_fn;
351 make_request_fn *make_request_fn; 352 make_request_fn *make_request_fn;
352 prep_rq_fn *prep_rq_fn; 353 prep_rq_fn *prep_rq_fn;
353 unplug_fn *unplug_fn; 354 unplug_fn *unplug_fn;
354 merge_bvec_fn *merge_bvec_fn; 355 merge_bvec_fn *merge_bvec_fn;
355 activity_fn *activity_fn; 356 activity_fn *activity_fn;
356 issue_flush_fn *issue_flush_fn; 357 issue_flush_fn *issue_flush_fn;
357 prepare_flush_fn *prepare_flush_fn; 358 prepare_flush_fn *prepare_flush_fn;
358 softirq_done_fn *softirq_done_fn; 359 softirq_done_fn *softirq_done_fn;
359 360
360 /* 361 /*
361 * Dispatch queue sorting 362 * Dispatch queue sorting
362 */ 363 */
363 sector_t end_sector; 364 sector_t end_sector;
364 struct request *boundary_rq; 365 struct request *boundary_rq;
365 366
366 /* 367 /*
367 * Auto-unplugging state 368 * Auto-unplugging state
368 */ 369 */
369 struct timer_list unplug_timer; 370 struct timer_list unplug_timer;
370 int unplug_thresh; /* After this many requests */ 371 int unplug_thresh; /* After this many requests */
371 unsigned long unplug_delay; /* After this many jiffies */ 372 unsigned long unplug_delay; /* After this many jiffies */
372 struct work_struct unplug_work; 373 struct work_struct unplug_work;
373 374
374 struct backing_dev_info backing_dev_info; 375 struct backing_dev_info backing_dev_info;
375 376
376 /* 377 /*
377 * The queue owner gets to use this for whatever they like. 378 * The queue owner gets to use this for whatever they like.
378 * ll_rw_blk doesn't touch it. 379 * ll_rw_blk doesn't touch it.
379 */ 380 */
380 void *queuedata; 381 void *queuedata;
381 382
382 void *activity_data; 383 void *activity_data;
383 384
384 /* 385 /*
385 * queue needs bounce pages for pages above this limit 386 * queue needs bounce pages for pages above this limit
386 */ 387 */
387 unsigned long bounce_pfn; 388 unsigned long bounce_pfn;
388 gfp_t bounce_gfp; 389 gfp_t bounce_gfp;
389 390
390 /* 391 /*
391 * various queue flags, see QUEUE_* below 392 * various queue flags, see QUEUE_* below
392 */ 393 */
393 unsigned long queue_flags; 394 unsigned long queue_flags;
394 395
395 /* 396 /*
396 * protects queue structures from reentrancy. ->__queue_lock should 397 * protects queue structures from reentrancy. ->__queue_lock should
397 * _never_ be used directly, it is queue private. always use 398 * _never_ be used directly, it is queue private. always use
398 * ->queue_lock. 399 * ->queue_lock.
399 */ 400 */
400 spinlock_t __queue_lock; 401 spinlock_t __queue_lock;
401 spinlock_t *queue_lock; 402 spinlock_t *queue_lock;
402 403
403 /* 404 /*
404 * queue kobject 405 * queue kobject
405 */ 406 */
406 struct kobject kobj; 407 struct kobject kobj;
407 408
408 /* 409 /*
409 * queue settings 410 * queue settings
410 */ 411 */
411 unsigned long nr_requests; /* Max # of requests */ 412 unsigned long nr_requests; /* Max # of requests */
412 unsigned int nr_congestion_on; 413 unsigned int nr_congestion_on;
413 unsigned int nr_congestion_off; 414 unsigned int nr_congestion_off;
414 unsigned int nr_batching; 415 unsigned int nr_batching;
415 416
416 unsigned int max_sectors; 417 unsigned int max_sectors;
417 unsigned int max_hw_sectors; 418 unsigned int max_hw_sectors;
418 unsigned short max_phys_segments; 419 unsigned short max_phys_segments;
419 unsigned short max_hw_segments; 420 unsigned short max_hw_segments;
420 unsigned short hardsect_size; 421 unsigned short hardsect_size;
421 unsigned int max_segment_size; 422 unsigned int max_segment_size;
422 423
423 unsigned long seg_boundary_mask; 424 unsigned long seg_boundary_mask;
424 unsigned int dma_alignment; 425 unsigned int dma_alignment;
425 426
426 struct blk_queue_tag *queue_tags; 427 struct blk_queue_tag *queue_tags;
427 428
428 unsigned int nr_sorted; 429 unsigned int nr_sorted;
429 unsigned int in_flight; 430 unsigned int in_flight;
430 431
431 /* 432 /*
432 * sg stuff 433 * sg stuff
433 */ 434 */
434 unsigned int sg_timeout; 435 unsigned int sg_timeout;
435 unsigned int sg_reserved_size; 436 unsigned int sg_reserved_size;
436 int node; 437 int node;
437 #ifdef CONFIG_BLK_DEV_IO_TRACE 438 #ifdef CONFIG_BLK_DEV_IO_TRACE
438 struct blk_trace *blk_trace; 439 struct blk_trace *blk_trace;
439 #endif 440 #endif
440 /* 441 /*
441 * reserved for flush operations 442 * reserved for flush operations
442 */ 443 */
443 unsigned int ordered, next_ordered, ordseq; 444 unsigned int ordered, next_ordered, ordseq;
444 int orderr, ordcolor; 445 int orderr, ordcolor;
445 struct request pre_flush_rq, bar_rq, post_flush_rq; 446 struct request pre_flush_rq, bar_rq, post_flush_rq;
446 struct request *orig_bar_rq; 447 struct request *orig_bar_rq;
447 unsigned int bi_size; 448 unsigned int bi_size;
448 449
449 struct mutex sysfs_lock; 450 struct mutex sysfs_lock;
450 }; 451 };
451 452
452 #define RQ_INACTIVE (-1) 453 #define RQ_INACTIVE (-1)
453 #define RQ_ACTIVE 1 454 #define RQ_ACTIVE 1
454 455
455 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ 456 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
456 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 457 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
457 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ 458 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
458 #define QUEUE_FLAG_READFULL 3 /* write queue has been filled */ 459 #define QUEUE_FLAG_READFULL 3 /* write queue has been filled */
459 #define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */ 460 #define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */
460 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ 461 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */
461 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ 462 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
462 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ 463 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
463 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 464 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
464 465
465 enum { 466 enum {
466 /* 467 /*
467 * Hardbarrier is supported with one of the following methods. 468 * Hardbarrier is supported with one of the following methods.
468 * 469 *
469 * NONE : hardbarrier unsupported 470 * NONE : hardbarrier unsupported
470 * DRAIN : ordering by draining is enough 471 * DRAIN : ordering by draining is enough
471 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes 472 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
472 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write 473 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
473 * TAG : ordering by tag is enough 474 * TAG : ordering by tag is enough
474 * TAG_FLUSH : ordering by tag w/ pre and post flushes 475 * TAG_FLUSH : ordering by tag w/ pre and post flushes
475 * TAG_FUA : ordering by tag w/ pre flush and FUA write 476 * TAG_FUA : ordering by tag w/ pre flush and FUA write
476 */ 477 */
477 QUEUE_ORDERED_NONE = 0x00, 478 QUEUE_ORDERED_NONE = 0x00,
478 QUEUE_ORDERED_DRAIN = 0x01, 479 QUEUE_ORDERED_DRAIN = 0x01,
479 QUEUE_ORDERED_TAG = 0x02, 480 QUEUE_ORDERED_TAG = 0x02,
480 481
481 QUEUE_ORDERED_PREFLUSH = 0x10, 482 QUEUE_ORDERED_PREFLUSH = 0x10,
482 QUEUE_ORDERED_POSTFLUSH = 0x20, 483 QUEUE_ORDERED_POSTFLUSH = 0x20,
483 QUEUE_ORDERED_FUA = 0x40, 484 QUEUE_ORDERED_FUA = 0x40,
484 485
485 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | 486 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
486 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, 487 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
487 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | 488 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
488 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, 489 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
489 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | 490 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
490 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, 491 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
491 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | 492 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
492 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, 493 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
493 494
494 /* 495 /*
495 * Ordered operation sequence 496 * Ordered operation sequence
496 */ 497 */
497 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ 498 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */
498 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ 499 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */
499 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ 500 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */
500 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ 501 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */
501 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ 502 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */
502 QUEUE_ORDSEQ_DONE = 0x20, 503 QUEUE_ORDSEQ_DONE = 0x20,
503 }; 504 };
504 505
505 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) 506 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
506 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 507 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
507 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 508 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
508 #define blk_queue_flushing(q) ((q)->ordseq) 509 #define blk_queue_flushing(q) ((q)->ordseq)
509 510
510 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) 511 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
511 #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) 512 #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
512 #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) 513 #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL)
513 #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) 514 #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE)
514 515
515 #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) 516 #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST)
516 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) 517 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
517 518
518 #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) 519 #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq))
519 520
520 #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) 521 #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
521 #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) 522 #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME)
522 #define blk_pm_request(rq) \ 523 #define blk_pm_request(rq) \
523 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) 524 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
524 525
525 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) 526 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
526 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) 527 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
527 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) 528 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
528 529
529 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 530 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
530 531
531 #define rq_data_dir(rq) ((rq)->cmd_flags & 1) 532 #define rq_data_dir(rq) ((rq)->cmd_flags & 1)
532 533
533 static inline int blk_queue_full(struct request_queue *q, int rw) 534 static inline int blk_queue_full(struct request_queue *q, int rw)
534 { 535 {
535 if (rw == READ) 536 if (rw == READ)
536 return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); 537 return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
537 return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); 538 return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
538 } 539 }
539 540
540 static inline void blk_set_queue_full(struct request_queue *q, int rw) 541 static inline void blk_set_queue_full(struct request_queue *q, int rw)
541 { 542 {
542 if (rw == READ) 543 if (rw == READ)
543 set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); 544 set_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
544 else 545 else
545 set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); 546 set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
546 } 547 }
547 548
548 static inline void blk_clear_queue_full(struct request_queue *q, int rw) 549 static inline void blk_clear_queue_full(struct request_queue *q, int rw)
549 { 550 {
550 if (rw == READ) 551 if (rw == READ)
551 clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); 552 clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
552 else 553 else
553 clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); 554 clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
554 } 555 }
555 556
556 557
557 /* 558 /*
558 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may 559 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
559 * it already be started by driver. 560 * it already be started by driver.
560 */ 561 */
561 #define RQ_NOMERGE_FLAGS \ 562 #define RQ_NOMERGE_FLAGS \
562 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) 563 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
563 #define rq_mergeable(rq) \ 564 #define rq_mergeable(rq) \
564 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) 565 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
565 566
566 /* 567 /*
567 * noop, requests are automagically marked as active/inactive by I/O 568 * noop, requests are automagically marked as active/inactive by I/O
568 * scheduler -- see elv_next_request 569 * scheduler -- see elv_next_request
569 */ 570 */
570 #define blk_queue_headactive(q, head_active) 571 #define blk_queue_headactive(q, head_active)
571 572
572 /* 573 /*
573 * q->prep_rq_fn return values 574 * q->prep_rq_fn return values
574 */ 575 */
575 #define BLKPREP_OK 0 /* serve it */ 576 #define BLKPREP_OK 0 /* serve it */
576 #define BLKPREP_KILL 1 /* fatal error, kill */ 577 #define BLKPREP_KILL 1 /* fatal error, kill */
577 #define BLKPREP_DEFER 2 /* leave on queue */ 578 #define BLKPREP_DEFER 2 /* leave on queue */
578 579
579 extern unsigned long blk_max_low_pfn, blk_max_pfn; 580 extern unsigned long blk_max_low_pfn, blk_max_pfn;
580 581
581 /* 582 /*
582 * standard bounce addresses: 583 * standard bounce addresses:
583 * 584 *
584 * BLK_BOUNCE_HIGH : bounce all highmem pages 585 * BLK_BOUNCE_HIGH : bounce all highmem pages
585 * BLK_BOUNCE_ANY : don't bounce anything 586 * BLK_BOUNCE_ANY : don't bounce anything
586 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary 587 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary
587 */ 588 */
588 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) 589 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT)
589 #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) 590 #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT)
590 #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) 591 #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD)
591 592
592 #ifdef CONFIG_MMU 593 #ifdef CONFIG_MMU
593 extern int init_emergency_isa_pool(void); 594 extern int init_emergency_isa_pool(void);
594 extern void blk_queue_bounce(request_queue_t *q, struct bio **bio); 595 extern void blk_queue_bounce(request_queue_t *q, struct bio **bio);
595 #else 596 #else
596 static inline int init_emergency_isa_pool(void) 597 static inline int init_emergency_isa_pool(void)
597 { 598 {
598 return 0; 599 return 0;
599 } 600 }
600 static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio) 601 static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio)
601 { 602 {
602 } 603 }
603 #endif /* CONFIG_MMU */ 604 #endif /* CONFIG_MMU */
604 605
605 #define rq_for_each_bio(_bio, rq) \ 606 #define rq_for_each_bio(_bio, rq) \
606 if ((rq->bio)) \ 607 if ((rq->bio)) \
607 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) 608 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
608 609
609 struct sec_size { 610 struct sec_size {
610 unsigned block_size; 611 unsigned block_size;
611 unsigned block_size_bits; 612 unsigned block_size_bits;
612 }; 613 };
613 614
614 extern int blk_register_queue(struct gendisk *disk); 615 extern int blk_register_queue(struct gendisk *disk);
615 extern void blk_unregister_queue(struct gendisk *disk); 616 extern void blk_unregister_queue(struct gendisk *disk);
616 extern void register_disk(struct gendisk *dev); 617 extern void register_disk(struct gendisk *dev);
617 extern void generic_make_request(struct bio *bio); 618 extern void generic_make_request(struct bio *bio);
618 extern void blk_put_request(struct request *); 619 extern void blk_put_request(struct request *);
619 extern void __blk_put_request(request_queue_t *, struct request *); 620 extern void __blk_put_request(request_queue_t *, struct request *);
620 extern void blk_end_sync_rq(struct request *rq, int error); 621 extern void blk_end_sync_rq(struct request *rq, int error);
621 extern struct request *blk_get_request(request_queue_t *, int, gfp_t); 622 extern struct request *blk_get_request(request_queue_t *, int, gfp_t);
622 extern void blk_insert_request(request_queue_t *, struct request *, int, void *); 623 extern void blk_insert_request(request_queue_t *, struct request *, int, void *);
623 extern void blk_requeue_request(request_queue_t *, struct request *); 624 extern void blk_requeue_request(request_queue_t *, struct request *);
624 extern void blk_plug_device(request_queue_t *); 625 extern void blk_plug_device(request_queue_t *);
625 extern int blk_remove_plug(request_queue_t *); 626 extern int blk_remove_plug(request_queue_t *);
626 extern void blk_recount_segments(request_queue_t *, struct bio *); 627 extern void blk_recount_segments(request_queue_t *, struct bio *);
627 extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *); 628 extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *);
628 extern int sg_scsi_ioctl(struct file *, struct request_queue *, 629 extern int sg_scsi_ioctl(struct file *, struct request_queue *,
629 struct gendisk *, struct scsi_ioctl_command __user *); 630 struct gendisk *, struct scsi_ioctl_command __user *);
630 extern void blk_start_queue(request_queue_t *q); 631 extern void blk_start_queue(request_queue_t *q);
631 extern void blk_stop_queue(request_queue_t *q); 632 extern void blk_stop_queue(request_queue_t *q);
632 extern void blk_sync_queue(struct request_queue *q); 633 extern void blk_sync_queue(struct request_queue *q);
633 extern void __blk_stop_queue(request_queue_t *q); 634 extern void __blk_stop_queue(request_queue_t *q);
634 extern void blk_run_queue(request_queue_t *); 635 extern void blk_run_queue(request_queue_t *);
635 extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); 636 extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
636 extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); 637 extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int);
637 extern int blk_rq_unmap_user(struct bio *, unsigned int); 638 extern int blk_rq_unmap_user(struct bio *, unsigned int);
638 extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, gfp_t); 639 extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, gfp_t);
639 extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); 640 extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int);
640 extern int blk_execute_rq(request_queue_t *, struct gendisk *, 641 extern int blk_execute_rq(request_queue_t *, struct gendisk *,
641 struct request *, int); 642 struct request *, int);
642 extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, 643 extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *,
643 struct request *, int, rq_end_io_fn *); 644 struct request *, int, rq_end_io_fn *);
644 645
645 static inline request_queue_t *bdev_get_queue(struct block_device *bdev) 646 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
646 { 647 {
647 return bdev->bd_disk->queue; 648 return bdev->bd_disk->queue;
648 } 649 }
649 650
650 static inline void blk_run_backing_dev(struct backing_dev_info *bdi, 651 static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
651 struct page *page) 652 struct page *page)
652 { 653 {
653 if (bdi && bdi->unplug_io_fn) 654 if (bdi && bdi->unplug_io_fn)
654 bdi->unplug_io_fn(bdi, page); 655 bdi->unplug_io_fn(bdi, page);
655 } 656 }
656 657
657 static inline void blk_run_address_space(struct address_space *mapping) 658 static inline void blk_run_address_space(struct address_space *mapping)
658 { 659 {
659 if (mapping) 660 if (mapping)
660 blk_run_backing_dev(mapping->backing_dev_info, NULL); 661 blk_run_backing_dev(mapping->backing_dev_info, NULL);
661 } 662 }
662 663
663 /* 664 /*
664 * end_request() and friends. Must be called with the request queue spinlock 665 * end_request() and friends. Must be called with the request queue spinlock
665 * acquired. All functions called within end_request() _must_be_ atomic. 666 * acquired. All functions called within end_request() _must_be_ atomic.
666 * 667 *
667 * Several drivers define their own end_request and call 668 * Several drivers define their own end_request and call
668 * end_that_request_first() and end_that_request_last() 669 * end_that_request_first() and end_that_request_last()
669 * for parts of the original function. This prevents 670 * for parts of the original function. This prevents
670 * code duplication in drivers. 671 * code duplication in drivers.
671 */ 672 */
672 extern int end_that_request_first(struct request *, int, int); 673 extern int end_that_request_first(struct request *, int, int);
673 extern int end_that_request_chunk(struct request *, int, int); 674 extern int end_that_request_chunk(struct request *, int, int);
674 extern void end_that_request_last(struct request *, int); 675 extern void end_that_request_last(struct request *, int);
675 extern void end_request(struct request *req, int uptodate); 676 extern void end_request(struct request *req, int uptodate);
676 extern void blk_complete_request(struct request *); 677 extern void blk_complete_request(struct request *);
677 678
678 static inline int rq_all_done(struct request *rq, unsigned int nr_bytes) 679 static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
679 { 680 {
680 if (blk_fs_request(rq)) 681 if (blk_fs_request(rq))
681 return (nr_bytes >= (rq->hard_nr_sectors << 9)); 682 return (nr_bytes >= (rq->hard_nr_sectors << 9));
682 else if (blk_pc_request(rq)) 683 else if (blk_pc_request(rq))
683 return nr_bytes >= rq->data_len; 684 return nr_bytes >= rq->data_len;
684 685
685 return 0; 686 return 0;
686 } 687 }
687 688
688 /* 689 /*
689 * end_that_request_first/chunk() takes an uptodate argument. we account 690 * end_that_request_first/chunk() takes an uptodate argument. we account
690 * any value <= as an io error. 0 means -EIO for compatability reasons, 691 * any value <= as an io error. 0 means -EIO for compatability reasons,
691 * any other < 0 value is the direct error type. An uptodate value of 692 * any other < 0 value is the direct error type. An uptodate value of
692 * 1 indicates successful io completion 693 * 1 indicates successful io completion
693 */ 694 */
694 #define end_io_error(uptodate) (unlikely((uptodate) <= 0)) 695 #define end_io_error(uptodate) (unlikely((uptodate) <= 0))
695 696
696 static inline void blkdev_dequeue_request(struct request *req) 697 static inline void blkdev_dequeue_request(struct request *req)
697 { 698 {
698 elv_dequeue_request(req->q, req); 699 elv_dequeue_request(req->q, req);
699 } 700 }
700 701
701 /* 702 /*
702 * Access functions for manipulating queue properties 703 * Access functions for manipulating queue properties
703 */ 704 */
704 extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn, 705 extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn,
705 spinlock_t *lock, int node_id); 706 spinlock_t *lock, int node_id);
706 extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *); 707 extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *);
707 extern void blk_cleanup_queue(request_queue_t *); 708 extern void blk_cleanup_queue(request_queue_t *);
708 extern void blk_queue_make_request(request_queue_t *, make_request_fn *); 709 extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
709 extern void blk_queue_bounce_limit(request_queue_t *, u64); 710 extern void blk_queue_bounce_limit(request_queue_t *, u64);
710 extern void blk_queue_max_sectors(request_queue_t *, unsigned int); 711 extern void blk_queue_max_sectors(request_queue_t *, unsigned int);
711 extern void blk_queue_max_phys_segments(request_queue_t *, unsigned short); 712 extern void blk_queue_max_phys_segments(request_queue_t *, unsigned short);
712 extern void blk_queue_max_hw_segments(request_queue_t *, unsigned short); 713 extern void blk_queue_max_hw_segments(request_queue_t *, unsigned short);
713 extern void blk_queue_max_segment_size(request_queue_t *, unsigned int); 714 extern void blk_queue_max_segment_size(request_queue_t *, unsigned int);
714 extern void blk_queue_hardsect_size(request_queue_t *, unsigned short); 715 extern void blk_queue_hardsect_size(request_queue_t *, unsigned short);
715 extern void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b); 716 extern void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b);
716 extern void blk_queue_segment_boundary(request_queue_t *, unsigned long); 717 extern void blk_queue_segment_boundary(request_queue_t *, unsigned long);
717 extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); 718 extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn);
718 extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); 719 extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *);
719 extern void blk_queue_dma_alignment(request_queue_t *, int); 720 extern void blk_queue_dma_alignment(request_queue_t *, int);
720 extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *); 721 extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *);
721 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 722 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
722 extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *); 723 extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *);
723 extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *); 724 extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
724 extern int blk_do_ordered(request_queue_t *, struct request **); 725 extern int blk_do_ordered(request_queue_t *, struct request **);
725 extern unsigned blk_ordered_cur_seq(request_queue_t *); 726 extern unsigned blk_ordered_cur_seq(request_queue_t *);
726 extern unsigned blk_ordered_req_seq(struct request *); 727 extern unsigned blk_ordered_req_seq(struct request *);
727 extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int); 728 extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int);
728 729
729 extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); 730 extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
730 extern void blk_dump_rq_flags(struct request *, char *); 731 extern void blk_dump_rq_flags(struct request *, char *);
731 extern void generic_unplug_device(request_queue_t *); 732 extern void generic_unplug_device(request_queue_t *);
732 extern void __generic_unplug_device(request_queue_t *); 733 extern void __generic_unplug_device(request_queue_t *);
733 extern long nr_blockdev_pages(void); 734 extern long nr_blockdev_pages(void);
734 735
735 int blk_get_queue(request_queue_t *); 736 int blk_get_queue(request_queue_t *);
736 request_queue_t *blk_alloc_queue(gfp_t); 737 request_queue_t *blk_alloc_queue(gfp_t);
737 request_queue_t *blk_alloc_queue_node(gfp_t, int); 738 request_queue_t *blk_alloc_queue_node(gfp_t, int);
738 extern void blk_put_queue(request_queue_t *); 739 extern void blk_put_queue(request_queue_t *);
739 740
740 /* 741 /*
741 * tag stuff 742 * tag stuff
742 */ 743 */
743 #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) 744 #define blk_queue_tag_depth(q) ((q)->queue_tags->busy)
744 #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) 745 #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth)
745 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) 746 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
746 extern int blk_queue_start_tag(request_queue_t *, struct request *); 747 extern int blk_queue_start_tag(request_queue_t *, struct request *);
747 extern struct request *blk_queue_find_tag(request_queue_t *, int); 748 extern struct request *blk_queue_find_tag(request_queue_t *, int);
748 extern void blk_queue_end_tag(request_queue_t *, struct request *); 749 extern void blk_queue_end_tag(request_queue_t *, struct request *);
749 extern int blk_queue_init_tags(request_queue_t *, int, struct blk_queue_tag *); 750 extern int blk_queue_init_tags(request_queue_t *, int, struct blk_queue_tag *);
750 extern void blk_queue_free_tags(request_queue_t *); 751 extern void blk_queue_free_tags(request_queue_t *);
751 extern int blk_queue_resize_tags(request_queue_t *, int); 752 extern int blk_queue_resize_tags(request_queue_t *, int);
752 extern void blk_queue_invalidate_tags(request_queue_t *); 753 extern void blk_queue_invalidate_tags(request_queue_t *);
753 extern long blk_congestion_wait(int rw, long timeout); 754 extern long blk_congestion_wait(int rw, long timeout);
754 extern struct blk_queue_tag *blk_init_tags(int); 755 extern struct blk_queue_tag *blk_init_tags(int);
755 extern void blk_free_tags(struct blk_queue_tag *); 756 extern void blk_free_tags(struct blk_queue_tag *);
756 extern void blk_congestion_end(int rw); 757 extern void blk_congestion_end(int rw);
757 758
758 extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); 759 extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
759 extern int blkdev_issue_flush(struct block_device *, sector_t *); 760 extern int blkdev_issue_flush(struct block_device *, sector_t *);
760 761
761 #define MAX_PHYS_SEGMENTS 128 762 #define MAX_PHYS_SEGMENTS 128
762 #define MAX_HW_SEGMENTS 128 763 #define MAX_HW_SEGMENTS 128
763 #define SAFE_MAX_SECTORS 255 764 #define SAFE_MAX_SECTORS 255
764 #define BLK_DEF_MAX_SECTORS 1024 765 #define BLK_DEF_MAX_SECTORS 1024
765 766
766 #define MAX_SEGMENT_SIZE 65536 767 #define MAX_SEGMENT_SIZE 65536
767 768
768 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) 769 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
769 770
770 static inline int queue_hardsect_size(request_queue_t *q) 771 static inline int queue_hardsect_size(request_queue_t *q)
771 { 772 {
772 int retval = 512; 773 int retval = 512;
773 774
774 if (q && q->hardsect_size) 775 if (q && q->hardsect_size)
775 retval = q->hardsect_size; 776 retval = q->hardsect_size;
776 777
777 return retval; 778 return retval;
778 } 779 }
779 780
780 static inline int bdev_hardsect_size(struct block_device *bdev) 781 static inline int bdev_hardsect_size(struct block_device *bdev)
781 { 782 {
782 return queue_hardsect_size(bdev_get_queue(bdev)); 783 return queue_hardsect_size(bdev_get_queue(bdev));
783 } 784 }
784 785
785 static inline int queue_dma_alignment(request_queue_t *q) 786 static inline int queue_dma_alignment(request_queue_t *q)
786 { 787 {
787 int retval = 511; 788 int retval = 511;
788 789
789 if (q && q->dma_alignment) 790 if (q && q->dma_alignment)
790 retval = q->dma_alignment; 791 retval = q->dma_alignment;
791 792
792 return retval; 793 return retval;
793 } 794 }
794 795
795 static inline int bdev_dma_aligment(struct block_device *bdev) 796 static inline int bdev_dma_aligment(struct block_device *bdev)
796 { 797 {
797 return queue_dma_alignment(bdev_get_queue(bdev)); 798 return queue_dma_alignment(bdev_get_queue(bdev));
798 } 799 }
799 800
800 #define blk_finished_io(nsects) do { } while (0) 801 #define blk_finished_io(nsects) do { } while (0)
801 #define blk_started_io(nsects) do { } while (0) 802 #define blk_started_io(nsects) do { } while (0)
802 803
803 /* assumes size > 256 */ 804 /* assumes size > 256 */
804 static inline unsigned int blksize_bits(unsigned int size) 805 static inline unsigned int blksize_bits(unsigned int size)
805 { 806 {
806 unsigned int bits = 8; 807 unsigned int bits = 8;
807 do { 808 do {
808 bits++; 809 bits++;
809 size >>= 1; 810 size >>= 1;
810 } while (size > 256); 811 } while (size > 256);
811 return bits; 812 return bits;
812 } 813 }
813 814
814 static inline unsigned int block_size(struct block_device *bdev) 815 static inline unsigned int block_size(struct block_device *bdev)
815 { 816 {
816 return bdev->bd_block_size; 817 return bdev->bd_block_size;
817 } 818 }
818 819
819 typedef struct {struct page *v;} Sector; 820 typedef struct {struct page *v;} Sector;
820 821
821 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); 822 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
822 823
823 static inline void put_dev_sector(Sector p) 824 static inline void put_dev_sector(Sector p)
824 { 825 {
825 page_cache_release(p.v); 826 page_cache_release(p.v);
826 } 827 }
827 828
828 struct work_struct; 829 struct work_struct;
829 int kblockd_schedule_work(struct work_struct *work); 830 int kblockd_schedule_work(struct work_struct *work);
830 void kblockd_flush(void); 831 void kblockd_flush(void);
831 832
832 #ifdef CONFIG_LBD 833 #ifdef CONFIG_LBD
833 # include <asm/div64.h> 834 # include <asm/div64.h>
834 # define sector_div(a, b) do_div(a, b) 835 # define sector_div(a, b) do_div(a, b)
835 #else 836 #else
836 # define sector_div(n, b)( \ 837 # define sector_div(n, b)( \
837 { \ 838 { \
838 int _res; \ 839 int _res; \
839 _res = (n) % (b); \ 840 _res = (n) % (b); \
840 (n) /= (b); \ 841 (n) /= (b); \
841 _res; \ 842 _res; \
842 } \ 843 } \
843 ) 844 )
844 #endif 845 #endif
845 846
846 #define MODULE_ALIAS_BLOCKDEV(major,minor) \ 847 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
847 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) 848 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
848 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ 849 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
849 MODULE_ALIAS("block-major-" __stringify(major) "-*") 850 MODULE_ALIAS("block-major-" __stringify(major) "-*")
850 851
851 852
852 #endif 853 #endif
853 854
include/linux/elevator.h
1 #ifndef _LINUX_ELEVATOR_H 1 #ifndef _LINUX_ELEVATOR_H
2 #define _LINUX_ELEVATOR_H 2 #define _LINUX_ELEVATOR_H
3 3
4 typedef int (elevator_merge_fn) (request_queue_t *, struct request **, 4 typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
5 struct bio *); 5 struct bio *);
6 6
7 typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *); 7 typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *);
8 8
9 typedef void (elevator_merged_fn) (request_queue_t *, struct request *); 9 typedef void (elevator_merged_fn) (request_queue_t *, struct request *, int);
10 10
11 typedef int (elevator_dispatch_fn) (request_queue_t *, int); 11 typedef int (elevator_dispatch_fn) (request_queue_t *, int);
12 12
13 typedef void (elevator_add_req_fn) (request_queue_t *, struct request *); 13 typedef void (elevator_add_req_fn) (request_queue_t *, struct request *);
14 typedef int (elevator_queue_empty_fn) (request_queue_t *); 14 typedef int (elevator_queue_empty_fn) (request_queue_t *);
15 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); 15 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
16 typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); 16 typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
17 typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); 17 typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *);
18 18
19 typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t); 19 typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t);
20 typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); 20 typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
21 typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *); 21 typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *);
22 typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); 22 typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
23 23
24 typedef void *(elevator_init_fn) (request_queue_t *, elevator_t *); 24 typedef void *(elevator_init_fn) (request_queue_t *, elevator_t *);
25 typedef void (elevator_exit_fn) (elevator_t *); 25 typedef void (elevator_exit_fn) (elevator_t *);
26 26
27 struct elevator_ops 27 struct elevator_ops
28 { 28 {
29 elevator_merge_fn *elevator_merge_fn; 29 elevator_merge_fn *elevator_merge_fn;
30 elevator_merged_fn *elevator_merged_fn; 30 elevator_merged_fn *elevator_merged_fn;
31 elevator_merge_req_fn *elevator_merge_req_fn; 31 elevator_merge_req_fn *elevator_merge_req_fn;
32 32
33 elevator_dispatch_fn *elevator_dispatch_fn; 33 elevator_dispatch_fn *elevator_dispatch_fn;
34 elevator_add_req_fn *elevator_add_req_fn; 34 elevator_add_req_fn *elevator_add_req_fn;
35 elevator_activate_req_fn *elevator_activate_req_fn; 35 elevator_activate_req_fn *elevator_activate_req_fn;
36 elevator_deactivate_req_fn *elevator_deactivate_req_fn; 36 elevator_deactivate_req_fn *elevator_deactivate_req_fn;
37 37
38 elevator_queue_empty_fn *elevator_queue_empty_fn; 38 elevator_queue_empty_fn *elevator_queue_empty_fn;
39 elevator_completed_req_fn *elevator_completed_req_fn; 39 elevator_completed_req_fn *elevator_completed_req_fn;
40 40
41 elevator_request_list_fn *elevator_former_req_fn; 41 elevator_request_list_fn *elevator_former_req_fn;
42 elevator_request_list_fn *elevator_latter_req_fn; 42 elevator_request_list_fn *elevator_latter_req_fn;
43 43
44 elevator_set_req_fn *elevator_set_req_fn; 44 elevator_set_req_fn *elevator_set_req_fn;
45 elevator_put_req_fn *elevator_put_req_fn; 45 elevator_put_req_fn *elevator_put_req_fn;
46 46
47 elevator_may_queue_fn *elevator_may_queue_fn; 47 elevator_may_queue_fn *elevator_may_queue_fn;
48 48
49 elevator_init_fn *elevator_init_fn; 49 elevator_init_fn *elevator_init_fn;
50 elevator_exit_fn *elevator_exit_fn; 50 elevator_exit_fn *elevator_exit_fn;
51 void (*trim)(struct io_context *); 51 void (*trim)(struct io_context *);
52 }; 52 };
53 53
54 #define ELV_NAME_MAX (16) 54 #define ELV_NAME_MAX (16)
55 55
56 struct elv_fs_entry { 56 struct elv_fs_entry {
57 struct attribute attr; 57 struct attribute attr;
58 ssize_t (*show)(elevator_t *, char *); 58 ssize_t (*show)(elevator_t *, char *);
59 ssize_t (*store)(elevator_t *, const char *, size_t); 59 ssize_t (*store)(elevator_t *, const char *, size_t);
60 }; 60 };
61 61
62 /* 62 /*
63 * identifies an elevator type, such as AS or deadline 63 * identifies an elevator type, such as AS or deadline
64 */ 64 */
65 struct elevator_type 65 struct elevator_type
66 { 66 {
67 struct list_head list; 67 struct list_head list;
68 struct elevator_ops ops; 68 struct elevator_ops ops;
69 struct elevator_type *elevator_type; 69 struct elevator_type *elevator_type;
70 struct elv_fs_entry *elevator_attrs; 70 struct elv_fs_entry *elevator_attrs;
71 char elevator_name[ELV_NAME_MAX]; 71 char elevator_name[ELV_NAME_MAX];
72 struct module *elevator_owner; 72 struct module *elevator_owner;
73 }; 73 };
74 74
75 /* 75 /*
76 * each queue has an elevator_queue associated with it 76 * each queue has an elevator_queue associated with it
77 */ 77 */
78 struct elevator_queue 78 struct elevator_queue
79 { 79 {
80 struct elevator_ops *ops; 80 struct elevator_ops *ops;
81 void *elevator_data; 81 void *elevator_data;
82 struct kobject kobj; 82 struct kobject kobj;
83 struct elevator_type *elevator_type; 83 struct elevator_type *elevator_type;
84 struct mutex sysfs_lock; 84 struct mutex sysfs_lock;
85 struct hlist_head *hash; 85 struct hlist_head *hash;
86 }; 86 };
87 87
88 /* 88 /*
89 * block elevator interface 89 * block elevator interface
90 */ 90 */
91 extern void elv_dispatch_sort(request_queue_t *, struct request *); 91 extern void elv_dispatch_sort(request_queue_t *, struct request *);
92 extern void elv_dispatch_add_tail(request_queue_t *, struct request *); 92 extern void elv_dispatch_add_tail(request_queue_t *, struct request *);
93 extern void elv_add_request(request_queue_t *, struct request *, int, int); 93 extern void elv_add_request(request_queue_t *, struct request *, int, int);
94 extern void __elv_add_request(request_queue_t *, struct request *, int, int); 94 extern void __elv_add_request(request_queue_t *, struct request *, int, int);
95 extern void elv_insert(request_queue_t *, struct request *, int); 95 extern void elv_insert(request_queue_t *, struct request *, int);
96 extern int elv_merge(request_queue_t *, struct request **, struct bio *); 96 extern int elv_merge(request_queue_t *, struct request **, struct bio *);
97 extern void elv_merge_requests(request_queue_t *, struct request *, 97 extern void elv_merge_requests(request_queue_t *, struct request *,
98 struct request *); 98 struct request *);
99 extern void elv_merged_request(request_queue_t *, struct request *); 99 extern void elv_merged_request(request_queue_t *, struct request *, int);
100 extern void elv_dequeue_request(request_queue_t *, struct request *); 100 extern void elv_dequeue_request(request_queue_t *, struct request *);
101 extern void elv_requeue_request(request_queue_t *, struct request *); 101 extern void elv_requeue_request(request_queue_t *, struct request *);
102 extern int elv_queue_empty(request_queue_t *); 102 extern int elv_queue_empty(request_queue_t *);
103 extern struct request *elv_next_request(struct request_queue *q); 103 extern struct request *elv_next_request(struct request_queue *q);
104 extern struct request *elv_former_request(request_queue_t *, struct request *); 104 extern struct request *elv_former_request(request_queue_t *, struct request *);
105 extern struct request *elv_latter_request(request_queue_t *, struct request *); 105 extern struct request *elv_latter_request(request_queue_t *, struct request *);
106 extern int elv_register_queue(request_queue_t *q); 106 extern int elv_register_queue(request_queue_t *q);
107 extern void elv_unregister_queue(request_queue_t *q); 107 extern void elv_unregister_queue(request_queue_t *q);
108 extern int elv_may_queue(request_queue_t *, int, struct bio *); 108 extern int elv_may_queue(request_queue_t *, int, struct bio *);
109 extern void elv_completed_request(request_queue_t *, struct request *); 109 extern void elv_completed_request(request_queue_t *, struct request *);
110 extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t); 110 extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t);
111 extern void elv_put_request(request_queue_t *, struct request *); 111 extern void elv_put_request(request_queue_t *, struct request *);
112 112
113 /* 113 /*
114 * io scheduler registration 114 * io scheduler registration
115 */ 115 */
116 extern int elv_register(struct elevator_type *); 116 extern int elv_register(struct elevator_type *);
117 extern void elv_unregister(struct elevator_type *); 117 extern void elv_unregister(struct elevator_type *);
118 118
119 /* 119 /*
120 * io scheduler sysfs switching 120 * io scheduler sysfs switching
121 */ 121 */
122 extern ssize_t elv_iosched_show(request_queue_t *, char *); 122 extern ssize_t elv_iosched_show(request_queue_t *, char *);
123 extern ssize_t elv_iosched_store(request_queue_t *, const char *, size_t); 123 extern ssize_t elv_iosched_store(request_queue_t *, const char *, size_t);
124 124
125 extern int elevator_init(request_queue_t *, char *); 125 extern int elevator_init(request_queue_t *, char *);
126 extern void elevator_exit(elevator_t *); 126 extern void elevator_exit(elevator_t *);
127 extern int elv_rq_merge_ok(struct request *, struct bio *); 127 extern int elv_rq_merge_ok(struct request *, struct bio *);
128 128
129 /* 129 /*
130 * Helper functions.
131 */
132 extern struct request *elv_rb_former_request(request_queue_t *, struct request *);
133 extern struct request *elv_rb_latter_request(request_queue_t *, struct request *);
134
135 /*
136 * rb support functions.
137 */
138 extern struct request *elv_rb_add(struct rb_root *, struct request *);
139 extern void elv_rb_del(struct rb_root *, struct request *);
140 extern struct request *elv_rb_find(struct rb_root *, sector_t);
141
142 /*
130 * Return values from elevator merger 143 * Return values from elevator merger
131 */ 144 */
132 #define ELEVATOR_NO_MERGE 0 145 #define ELEVATOR_NO_MERGE 0
133 #define ELEVATOR_FRONT_MERGE 1 146 #define ELEVATOR_FRONT_MERGE 1
134 #define ELEVATOR_BACK_MERGE 2 147 #define ELEVATOR_BACK_MERGE 2
135 148
136 /* 149 /*
137 * Insertion selection 150 * Insertion selection
138 */ 151 */
139 #define ELEVATOR_INSERT_FRONT 1 152 #define ELEVATOR_INSERT_FRONT 1
140 #define ELEVATOR_INSERT_BACK 2 153 #define ELEVATOR_INSERT_BACK 2
141 #define ELEVATOR_INSERT_SORT 3 154 #define ELEVATOR_INSERT_SORT 3
142 #define ELEVATOR_INSERT_REQUEUE 4 155 #define ELEVATOR_INSERT_REQUEUE 4
143 156
144 /* 157 /*
145 * return values from elevator_may_queue_fn 158 * return values from elevator_may_queue_fn
146 */ 159 */
147 enum { 160 enum {
148 ELV_MQUEUE_MAY, 161 ELV_MQUEUE_MAY,
149 ELV_MQUEUE_NO, 162 ELV_MQUEUE_NO,
150 ELV_MQUEUE_MUST, 163 ELV_MQUEUE_MUST,
151 }; 164 };
152 165
153 #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) 166 #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors)
167 #define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
154 168
155 #endif 169 #endif
156 170