Blame view
block/elevator.c
26.1 KB
1da177e4c
|
1 |
/* |
1da177e4c
|
2 3 4 5 |
* Block device elevator/IO-scheduler. * * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE * |
0fe234795
|
6 |
* 30042000 Jens Axboe <axboe@kernel.dk> : |
1da177e4c
|
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
* * Split the elevator a bit so that it is possible to choose a different * one or even write a new "plug in". There are three pieces: * - elevator_fn, inserts a new request in the queue list * - elevator_merge_fn, decides whether a new buffer can be merged with * an existing request * - elevator_dequeue_fn, called when a request is taken off the active list * * 20082000 Dave Jones <davej@suse.de> : * Removed tests for max-bomb-segments, which was breaking elvtune * when run without -bN * * Jens: * - Rework again to work with bio instead of buffer_heads * - loose bi_dev comparisons, partition handling is right now * - completely modularize elevator setup and teardown * */ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/elevator.h> #include <linux/bio.h> |
1da177e4c
|
30 31 32 33 |
#include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/compiler.h> |
cb98fc8bb
|
34 |
#include <linux/delay.h> |
2056a782f
|
35 |
#include <linux/blktrace_api.h> |
9817064b6
|
36 |
#include <linux/hash.h> |
1da177e4c
|
37 38 39 40 41 42 43 |
#include <asm/uaccess.h> static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); /* |
9817064b6
|
44 45 46 47 |
* Merge hash stuff. */ static const int elv_hash_shift = 6; #define ELV_HASH_BLOCK(sec) ((sec) >> 3) |
4eb166d98
|
48 49 |
#define ELV_HASH_FN(sec) \ (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) |
9817064b6
|
50 51 52 53 54 |
#define ELV_HASH_ENTRIES (1 << elv_hash_shift) #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) /* |
da7752650
|
55 56 57 58 59 |
* Query io scheduler to see if the current process issuing bio may be * merged with rq. */ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) { |
165125e1e
|
60 |
struct request_queue *q = rq->q; |
da7752650
|
61 62 63 64 65 66 67 68 69 |
elevator_t *e = q->elevator; if (e->ops->elevator_allow_merge_fn) return e->ops->elevator_allow_merge_fn(q, rq, bio); return 1; } /* |
1da177e4c
|
70 71 |
* can we safely merge with this request? */ |
72ed0bf60
|
72 |
int elv_rq_merge_ok(struct request *rq, struct bio *bio) |
1da177e4c
|
73 74 75 76 77 78 79 80 81 82 83 |
{ if (!rq_mergeable(rq)) return 0; /* * different data direction or already started, don't merge */ if (bio_data_dir(bio) != rq_data_dir(rq)) return 0; /* |
da7752650
|
84 |
* must be same device and not a special request |
1da177e4c
|
85 |
*/ |
bb4067e34
|
86 |
if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) |
da7752650
|
87 88 89 90 |
return 0; if (!elv_iosched_allow_merge(rq, bio)) return 0; |
1da177e4c
|
91 |
|
da7752650
|
92 |
return 1; |
1da177e4c
|
93 94 |
} EXPORT_SYMBOL(elv_rq_merge_ok); |
769db45b7
|
95 |
static inline int elv_try_merge(struct request *__rq, struct bio *bio) |
1da177e4c
|
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
{ int ret = ELEVATOR_NO_MERGE; /* * we can merge and sequence is ok, check if it's possible */ if (elv_rq_merge_ok(__rq, bio)) { if (__rq->sector + __rq->nr_sectors == bio->bi_sector) ret = ELEVATOR_BACK_MERGE; else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) ret = ELEVATOR_FRONT_MERGE; } return ret; } |
1da177e4c
|
111 |
|
1da177e4c
|
112 113 |
static struct elevator_type *elevator_find(const char *name) { |
a22b169df
|
114 |
struct elevator_type *e; |
1da177e4c
|
115 |
|
70cee26e0
|
116 |
list_for_each_entry(e, &elv_list, list) { |
a22b169df
|
117 118 |
if (!strcmp(e->elevator_name, name)) return e; |
1da177e4c
|
119 |
} |
1da177e4c
|
120 |
|
a22b169df
|
121 |
return NULL; |
1da177e4c
|
122 123 124 125 126 127 128 129 130 |
} static void elevator_put(struct elevator_type *e) { module_put(e->elevator_owner); } static struct elevator_type *elevator_get(const char *name) { |
2824bc932
|
131 |
struct elevator_type *e; |
1da177e4c
|
132 |
|
2a12dcd71
|
133 |
spin_lock(&elv_list_lock); |
2824bc932
|
134 135 |
e = elevator_find(name); |
e16409496
|
136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
if (!e) { char elv[ELV_NAME_MAX + strlen("-iosched")]; spin_unlock(&elv_list_lock); if (!strcmp(name, "anticipatory")) sprintf(elv, "as-iosched"); else sprintf(elv, "%s-iosched", name); request_module(elv); spin_lock(&elv_list_lock); e = elevator_find(name); } |
2824bc932
|
150 151 |
if (e && !try_module_get(e->elevator_owner)) e = NULL; |
2a12dcd71
|
152 |
spin_unlock(&elv_list_lock); |
1da177e4c
|
153 154 155 |
return e; } |
165125e1e
|
156 157 |
static void *elevator_init_queue(struct request_queue *q, struct elevator_queue *eq) |
1da177e4c
|
158 |
{ |
bb37b94c6
|
159 |
return eq->ops->elevator_init_fn(q); |
bc1c11697
|
160 |
} |
1da177e4c
|
161 |
|
165125e1e
|
162 |
static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, |
bc1c11697
|
163 164 |
void *data) { |
1da177e4c
|
165 |
q->elevator = eq; |
bc1c11697
|
166 |
eq->elevator_data = data; |
1da177e4c
|
167 168 169 |
} static char chosen_elevator[16]; |
5f0039764
|
170 |
static int __init elevator_setup(char *str) |
1da177e4c
|
171 |
{ |
752a3b796
|
172 173 174 175 |
/* * Be backwards-compatible with previous kernels, so users * won't get the wrong elevator. */ |
5f0039764
|
176 |
if (!strcmp(str, "as")) |
752a3b796
|
177 |
strcpy(chosen_elevator, "anticipatory"); |
cff3ba220
|
178 |
else |
5f0039764
|
179 |
strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); |
9b41046cd
|
180 |
return 1; |
1da177e4c
|
181 182 183 |
} __setup("elevator=", elevator_setup); |
3d1ab40f4
|
184 |
static struct kobj_type elv_ktype; |
165125e1e
|
185 186 |
static elevator_t *elevator_alloc(struct request_queue *q, struct elevator_type *e) |
3d1ab40f4
|
187 |
{ |
9817064b6
|
188 189 |
elevator_t *eq; int i; |
94f6030ca
|
190 |
eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node); |
9817064b6
|
191 192 |
if (unlikely(!eq)) goto err; |
9817064b6
|
193 194 |
eq->ops = &e->ops; eq->elevator_type = e; |
f9cb074bf
|
195 |
kobject_init(&eq->kobj, &elv_ktype); |
9817064b6
|
196 |
mutex_init(&eq->sysfs_lock); |
b5deef901
|
197 198 |
eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL, q->node); |
9817064b6
|
199 200 201 202 203 |
if (!eq->hash) goto err; for (i = 0; i < ELV_HASH_ENTRIES; i++) INIT_HLIST_HEAD(&eq->hash[i]); |
3d1ab40f4
|
204 |
return eq; |
9817064b6
|
205 206 207 208 |
err: kfree(eq); elevator_put(e); return NULL; |
3d1ab40f4
|
209 210 211 212 213 |
} static void elevator_release(struct kobject *kobj) { elevator_t *e = container_of(kobj, elevator_t, kobj); |
9817064b6
|
214 |
|
3d1ab40f4
|
215 |
elevator_put(e->elevator_type); |
9817064b6
|
216 |
kfree(e->hash); |
3d1ab40f4
|
217 218 |
kfree(e); } |
165125e1e
|
219 |
int elevator_init(struct request_queue *q, char *name) |
1da177e4c
|
220 221 222 223 |
{ struct elevator_type *e = NULL; struct elevator_queue *eq; int ret = 0; |
bc1c11697
|
224 |
void *data; |
1da177e4c
|
225 |
|
cb98fc8bb
|
226 227 228 229 |
INIT_LIST_HEAD(&q->queue_head); q->last_merge = NULL; q->end_sector = 0; q->boundary_rq = NULL; |
cb98fc8bb
|
230 |
|
4eb166d98
|
231 232 233 234 235 |
if (name) { e = elevator_get(name); if (!e) return -EINVAL; } |
1da177e4c
|
236 |
|
4eb166d98
|
237 238 239 240 241 242 243 |
if (!e && *chosen_elevator) { e = elevator_get(chosen_elevator); if (!e) printk(KERN_ERR "I/O scheduler %s not found ", chosen_elevator); } |
248d5ca5e
|
244 |
|
4eb166d98
|
245 246 247 248 249 250 251 252 253 |
if (!e) { e = elevator_get(CONFIG_DEFAULT_IOSCHED); if (!e) { printk(KERN_ERR "Default I/O scheduler not found. " \ "Using noop. "); e = elevator_get("noop"); } |
5f0039764
|
254 |
} |
b5deef901
|
255 |
eq = elevator_alloc(q, e); |
3d1ab40f4
|
256 |
if (!eq) |
1da177e4c
|
257 |
return -ENOMEM; |
1da177e4c
|
258 |
|
bc1c11697
|
259 260 |
data = elevator_init_queue(q, eq); if (!data) { |
3d1ab40f4
|
261 |
kobject_put(&eq->kobj); |
bc1c11697
|
262 263 |
return -ENOMEM; } |
1da177e4c
|
264 |
|
bc1c11697
|
265 |
elevator_attach(q, eq, data); |
1da177e4c
|
266 267 |
return ret; } |
2e662b65f
|
268 |
EXPORT_SYMBOL(elevator_init); |
1da177e4c
|
269 270 |
void elevator_exit(elevator_t *e) { |
3d1ab40f4
|
271 |
mutex_lock(&e->sysfs_lock); |
1da177e4c
|
272 273 |
if (e->ops->elevator_exit_fn) e->ops->elevator_exit_fn(e); |
3d1ab40f4
|
274 275 |
e->ops = NULL; mutex_unlock(&e->sysfs_lock); |
1da177e4c
|
276 |
|
3d1ab40f4
|
277 |
kobject_put(&e->kobj); |
1da177e4c
|
278 |
} |
2e662b65f
|
279 |
EXPORT_SYMBOL(elevator_exit); |
165125e1e
|
280 |
static void elv_activate_rq(struct request_queue *q, struct request *rq) |
cad975164
|
281 282 283 284 285 286 |
{ elevator_t *e = q->elevator; if (e->ops->elevator_activate_req_fn) e->ops->elevator_activate_req_fn(q, rq); } |
165125e1e
|
287 |
static void elv_deactivate_rq(struct request_queue *q, struct request *rq) |
cad975164
|
288 289 290 291 292 293 |
{ elevator_t *e = q->elevator; if (e->ops->elevator_deactivate_req_fn) e->ops->elevator_deactivate_req_fn(q, rq); } |
9817064b6
|
294 295 296 297 |
static inline void __elv_rqhash_del(struct request *rq) { hlist_del_init(&rq->hash); } |
165125e1e
|
298 |
static void elv_rqhash_del(struct request_queue *q, struct request *rq) |
9817064b6
|
299 300 301 302 |
{ if (ELV_ON_HASH(rq)) __elv_rqhash_del(rq); } |
165125e1e
|
303 |
static void elv_rqhash_add(struct request_queue *q, struct request *rq) |
9817064b6
|
304 305 306 307 308 309 |
{ elevator_t *e = q->elevator; BUG_ON(ELV_ON_HASH(rq)); hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); } |
165125e1e
|
310 |
static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) |
9817064b6
|
311 312 313 314 |
{ __elv_rqhash_del(rq); elv_rqhash_add(q, rq); } |
165125e1e
|
315 |
static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) |
9817064b6
|
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 |
{ elevator_t *e = q->elevator; struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; struct hlist_node *entry, *next; struct request *rq; hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { BUG_ON(!ELV_ON_HASH(rq)); if (unlikely(!rq_mergeable(rq))) { __elv_rqhash_del(rq); continue; } if (rq_hash_key(rq) == offset) return rq; } return NULL; } |
8922e16cf
|
336 |
/* |
2e662b65f
|
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 |
* RB-tree support functions for inserting/lookup/removal of requests * in a sorted RB tree. */ struct request *elv_rb_add(struct rb_root *root, struct request *rq) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct request *__rq; while (*p) { parent = *p; __rq = rb_entry(parent, struct request, rb_node); if (rq->sector < __rq->sector) p = &(*p)->rb_left; else if (rq->sector > __rq->sector) p = &(*p)->rb_right; else return __rq; } rb_link_node(&rq->rb_node, parent, p); rb_insert_color(&rq->rb_node, root); return NULL; } |
2e662b65f
|
362 363 364 365 366 367 368 369 |
EXPORT_SYMBOL(elv_rb_add); void elv_rb_del(struct rb_root *root, struct request *rq) { BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); rb_erase(&rq->rb_node, root); RB_CLEAR_NODE(&rq->rb_node); } |
2e662b65f
|
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 |
EXPORT_SYMBOL(elv_rb_del); struct request *elv_rb_find(struct rb_root *root, sector_t sector) { struct rb_node *n = root->rb_node; struct request *rq; while (n) { rq = rb_entry(n, struct request, rb_node); if (sector < rq->sector) n = n->rb_left; else if (sector > rq->sector) n = n->rb_right; else return rq; } return NULL; } |
2e662b65f
|
390 391 392 |
EXPORT_SYMBOL(elv_rb_find); /* |
8922e16cf
|
393 |
* Insert rq into dispatch queue of q. Queue lock must be held on |
dbe7f76dd
|
394 |
* entry. rq is sort instead into the dispatch queue. To be used by |
2e662b65f
|
395 |
* specific elevators. |
8922e16cf
|
396 |
*/ |
165125e1e
|
397 |
void elv_dispatch_sort(struct request_queue *q, struct request *rq) |
8922e16cf
|
398 399 |
{ sector_t boundary; |
8922e16cf
|
400 |
struct list_head *entry; |
4eb166d98
|
401 |
int stop_flags; |
8922e16cf
|
402 |
|
06b86245c
|
403 404 |
if (q->last_merge == rq) q->last_merge = NULL; |
9817064b6
|
405 406 |
elv_rqhash_del(q, rq); |
15853af9f
|
407 |
q->nr_sorted--; |
06b86245c
|
408 |
|
1b47f531e
|
409 |
boundary = q->end_sector; |
4eb166d98
|
410 |
stop_flags = REQ_SOFTBARRIER | REQ_HARDBARRIER | REQ_STARTED; |
8922e16cf
|
411 412 |
list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); |
783660b2f
|
413 414 |
if (rq_data_dir(rq) != rq_data_dir(pos)) break; |
4eb166d98
|
415 |
if (pos->cmd_flags & stop_flags) |
8922e16cf
|
416 417 418 419 420 421 422 423 424 425 426 427 428 429 |
break; if (rq->sector >= boundary) { if (pos->sector < boundary) continue; } else { if (pos->sector >= boundary) break; } if (rq->sector >= pos->sector) break; } list_add(&rq->queuelist, entry); } |
2e662b65f
|
430 |
EXPORT_SYMBOL(elv_dispatch_sort); |
9817064b6
|
431 |
/* |
2e662b65f
|
432 433 434 |
* Insert rq into dispatch queue of q. Queue lock must be held on * entry. rq is added to the back of the dispatch queue. To be used by * specific elevators. |
9817064b6
|
435 436 437 438 439 440 441 442 443 444 445 446 447 448 |
*/ void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) { if (q->last_merge == rq) q->last_merge = NULL; elv_rqhash_del(q, rq); q->nr_sorted--; q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; list_add_tail(&rq->queuelist, &q->queue_head); } |
2e662b65f
|
449 |
EXPORT_SYMBOL(elv_dispatch_add_tail); |
165125e1e
|
450 |
int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) |
1da177e4c
|
451 452 |
{ elevator_t *e = q->elevator; |
9817064b6
|
453 |
struct request *__rq; |
06b86245c
|
454 |
int ret; |
9817064b6
|
455 456 457 |
/* * First try one-hit cache. */ |
06b86245c
|
458 459 460 461 462 463 464 |
if (q->last_merge) { ret = elv_try_merge(q->last_merge, bio); if (ret != ELEVATOR_NO_MERGE) { *req = q->last_merge; return ret; } } |
1da177e4c
|
465 |
|
ac9fafa12
|
466 467 |
if (blk_queue_nomerges(q)) return ELEVATOR_NO_MERGE; |
9817064b6
|
468 469 470 471 472 473 474 475 |
/* * See if our hash lookup can find a potential backmerge. */ __rq = elv_rqhash_find(q, bio->bi_sector); if (__rq && elv_rq_merge_ok(__rq, bio)) { *req = __rq; return ELEVATOR_BACK_MERGE; } |
1da177e4c
|
476 477 478 479 480 |
if (e->ops->elevator_merge_fn) return e->ops->elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; } |
165125e1e
|
481 |
void elv_merged_request(struct request_queue *q, struct request *rq, int type) |
1da177e4c
|
482 483 484 485 |
{ elevator_t *e = q->elevator; if (e->ops->elevator_merged_fn) |
2e662b65f
|
486 |
e->ops->elevator_merged_fn(q, rq, type); |
06b86245c
|
487 |
|
2e662b65f
|
488 489 |
if (type == ELEVATOR_BACK_MERGE) elv_rqhash_reposition(q, rq); |
9817064b6
|
490 |
|
06b86245c
|
491 |
q->last_merge = rq; |
1da177e4c
|
492 |
} |
165125e1e
|
493 |
void elv_merge_requests(struct request_queue *q, struct request *rq, |
1da177e4c
|
494 495 496 |
struct request *next) { elevator_t *e = q->elevator; |
1da177e4c
|
497 498 |
if (e->ops->elevator_merge_req_fn) e->ops->elevator_merge_req_fn(q, rq, next); |
06b86245c
|
499 |
|
9817064b6
|
500 501 502 503 |
elv_rqhash_reposition(q, rq); elv_rqhash_del(q, next); q->nr_sorted--; |
06b86245c
|
504 |
q->last_merge = rq; |
1da177e4c
|
505 |
} |
165125e1e
|
506 |
void elv_requeue_request(struct request_queue *q, struct request *rq) |
1da177e4c
|
507 |
{ |
1da177e4c
|
508 509 510 511 |
/* * it already went through dequeue, we need to decrement the * in_flight count again */ |
8922e16cf
|
512 |
if (blk_account_rq(rq)) { |
1da177e4c
|
513 |
q->in_flight--; |
cad975164
|
514 515 |
if (blk_sorted_rq(rq)) elv_deactivate_rq(q, rq); |
8922e16cf
|
516 |
} |
1da177e4c
|
517 |
|
4aff5e233
|
518 |
rq->cmd_flags &= ~REQ_STARTED; |
1da177e4c
|
519 |
|
30e9656cc
|
520 |
elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); |
1da177e4c
|
521 |
} |
165125e1e
|
522 |
static void elv_drain_elevator(struct request_queue *q) |
15853af9f
|
523 524 525 526 527 528 529 530 531 532 533 534 535 |
{ static int printed; while (q->elevator->ops->elevator_dispatch_fn(q, 1)) ; if (q->nr_sorted == 0) return; if (printed++ < 10) { printk(KERN_ERR "%s: forced dispatching is broken " "(nr_sorted=%u), please report this ", q->elevator->elevator_type->elevator_name, q->nr_sorted); } } |
165125e1e
|
536 |
void elv_insert(struct request_queue *q, struct request *rq, int where) |
1da177e4c
|
537 |
{ |
797e7dbbe
|
538 539 |
struct list_head *pos; unsigned ordseq; |
dac07ec12
|
540 |
int unplug_it = 1; |
797e7dbbe
|
541 |
|
2056a782f
|
542 |
blk_add_trace_rq(q, rq, BLK_TA_INSERT); |
1da177e4c
|
543 |
rq->q = q; |
8922e16cf
|
544 545 |
switch (where) { case ELEVATOR_INSERT_FRONT: |
4aff5e233
|
546 |
rq->cmd_flags |= REQ_SOFTBARRIER; |
8922e16cf
|
547 548 549 550 551 |
list_add(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_BACK: |
4aff5e233
|
552 |
rq->cmd_flags |= REQ_SOFTBARRIER; |
15853af9f
|
553 |
elv_drain_elevator(q); |
8922e16cf
|
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 |
list_add_tail(&rq->queuelist, &q->queue_head); /* * We kick the queue here for the following reasons. * - The elevator might have returned NULL previously * to delay requests and returned them now. As the * queue wasn't empty before this request, ll_rw_blk * won't run the queue on return, resulting in hang. * - Usually, back inserted requests won't be merged * with anything. There's no point in delaying queue * processing. */ blk_remove_plug(q); q->request_fn(q); break; case ELEVATOR_INSERT_SORT: BUG_ON(!blk_fs_request(rq)); |
4aff5e233
|
571 |
rq->cmd_flags |= REQ_SORTED; |
15853af9f
|
572 |
q->nr_sorted++; |
9817064b6
|
573 574 575 576 577 |
if (rq_mergeable(rq)) { elv_rqhash_add(q, rq); if (!q->last_merge) q->last_merge = rq; } |
ca23509fb
|
578 579 580 581 582 583 |
/* * Some ioscheds (cfq) run q->request_fn directly, so * rq cannot be accessed after calling * elevator_add_req_fn. */ q->elevator->ops->elevator_add_req_fn(q, rq); |
8922e16cf
|
584 |
break; |
797e7dbbe
|
585 586 587 588 589 590 |
case ELEVATOR_INSERT_REQUEUE: /* * If ordered flush isn't in progress, we do front * insertion; otherwise, requests should be requeued * in ordseq order. */ |
4aff5e233
|
591 |
rq->cmd_flags |= REQ_SOFTBARRIER; |
797e7dbbe
|
592 |
|
95543179f
|
593 594 595 596 597 |
/* * Most requeues happen because of a busy condition, * don't force unplug of the queue for that case. */ unplug_it = 0; |
797e7dbbe
|
598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 |
if (q->ordseq == 0) { list_add(&rq->queuelist, &q->queue_head); break; } ordseq = blk_ordered_req_seq(rq); list_for_each(pos, &q->queue_head) { struct request *pos_rq = list_entry_rq(pos); if (ordseq <= blk_ordered_req_seq(pos_rq)) break; } list_add_tail(&rq->queuelist, pos); break; |
8922e16cf
|
613 614 615 |
default: printk(KERN_ERR "%s: bad insertion point %d ", |
24c03d47d
|
616 |
__func__, where); |
8922e16cf
|
617 618 |
BUG(); } |
dac07ec12
|
619 |
if (unplug_it && blk_queue_plugged(q)) { |
8922e16cf
|
620 621 622 623 624 625 |
int nrq = q->rq.count[READ] + q->rq.count[WRITE] - q->in_flight; if (nrq >= q->unplug_thresh) __generic_unplug_device(q); } |
1da177e4c
|
626 |
} |
165125e1e
|
627 |
void __elv_add_request(struct request_queue *q, struct request *rq, int where, |
30e9656cc
|
628 629 630 |
int plug) { if (q->ordcolor) |
4aff5e233
|
631 |
rq->cmd_flags |= REQ_ORDERED_COLOR; |
30e9656cc
|
632 |
|
4aff5e233
|
633 |
if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { |
30e9656cc
|
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 |
/* * toggle ordered color */ if (blk_barrier_rq(rq)) q->ordcolor ^= 1; /* * barriers implicitly indicate back insertion */ if (where == ELEVATOR_INSERT_SORT) where = ELEVATOR_INSERT_BACK; /* * this request is scheduling boundary, update * end_sector */ if (blk_fs_request(rq)) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } |
4eb166d98
|
654 655 |
} else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) |
30e9656cc
|
656 657 658 659 660 661 662 |
where = ELEVATOR_INSERT_BACK; if (plug) blk_plug_device(q); elv_insert(q, rq, where); } |
2e662b65f
|
663 |
EXPORT_SYMBOL(__elv_add_request); |
165125e1e
|
664 |
void elv_add_request(struct request_queue *q, struct request *rq, int where, |
1da177e4c
|
665 666 667 668 669 670 671 672 |
int plug) { unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); __elv_add_request(q, rq, where, plug); spin_unlock_irqrestore(q->queue_lock, flags); } |
2e662b65f
|
673 |
EXPORT_SYMBOL(elv_add_request); |
165125e1e
|
674 |
static inline struct request *__elv_next_request(struct request_queue *q) |
1da177e4c
|
675 |
{ |
8922e16cf
|
676 |
struct request *rq; |
797e7dbbe
|
677 678 679 680 681 682 |
while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); if (blk_do_ordered(q, &rq)) return rq; } |
1da177e4c
|
683 |
|
797e7dbbe
|
684 685 |
if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) return NULL; |
1da177e4c
|
686 |
} |
1da177e4c
|
687 |
} |
165125e1e
|
688 |
struct request *elv_next_request(struct request_queue *q) |
1da177e4c
|
689 690 691 692 693 |
{ struct request *rq; int ret; while ((rq = __elv_next_request(q)) != NULL) { |
bf2de6f5a
|
694 695 696 697 698 699 700 701 |
/* * Kill the empty barrier place holder, the driver must * not ever see it. */ if (blk_empty_barrier(rq)) { end_queued_request(rq, 1); continue; } |
4aff5e233
|
702 |
if (!(rq->cmd_flags & REQ_STARTED)) { |
8922e16cf
|
703 704 705 706 707 |
/* * This is the first time the device driver * sees this request (possibly after * requeueing). Notify IO scheduler. */ |
cad975164
|
708 709 |
if (blk_sorted_rq(rq)) elv_activate_rq(q, rq); |
1da177e4c
|
710 |
|
8922e16cf
|
711 712 713 714 715 |
/* * just mark as started even if we don't start * it, a request that has been delayed should * not be passed by new incoming requests */ |
4aff5e233
|
716 |
rq->cmd_flags |= REQ_STARTED; |
2056a782f
|
717 |
blk_add_trace_rq(q, rq, BLK_TA_ISSUE); |
8922e16cf
|
718 |
} |
1da177e4c
|
719 |
|
8922e16cf
|
720 |
if (!q->boundary_rq || q->boundary_rq == rq) { |
1b47f531e
|
721 |
q->end_sector = rq_end_sector(rq); |
8922e16cf
|
722 723 |
q->boundary_rq = NULL; } |
1da177e4c
|
724 |
|
fa0ccd837
|
725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 |
if (rq->cmd_flags & REQ_DONTPREP) break; if (q->dma_drain_size && rq->data_len) { /* * make sure space for the drain appears we * know we can do this because max_hw_segments * has been adjusted to be one fewer than the * device can handle */ rq->nr_phys_segments++; rq->nr_hw_segments++; } if (!q->prep_rq_fn) |
1da177e4c
|
740 741 742 743 744 745 |
break; ret = q->prep_rq_fn(q, rq); if (ret == BLKPREP_OK) { break; } else if (ret == BLKPREP_DEFER) { |
2e759cd4f
|
746 747 748 |
/* * the request may have been (partially) prepped. * we need to keep this request in the front to |
8922e16cf
|
749 750 |
* avoid resource deadlock. REQ_STARTED will * prevent other fs requests from passing this one. |
2e759cd4f
|
751 |
*/ |
fa0ccd837
|
752 753 754 755 756 757 758 759 760 |
if (q->dma_drain_size && rq->data_len && !(rq->cmd_flags & REQ_DONTPREP)) { /* * remove the space for the drain we added * so that we don't add it again */ --rq->nr_phys_segments; --rq->nr_hw_segments; } |
1da177e4c
|
761 762 763 |
rq = NULL; break; } else if (ret == BLKPREP_KILL) { |
4aff5e233
|
764 |
rq->cmd_flags |= REQ_QUIET; |
a0cd12854
|
765 |
end_queued_request(rq, 0); |
1da177e4c
|
766 |
} else { |
24c03d47d
|
767 768 |
printk(KERN_ERR "%s: bad return=%d ", __func__, ret); |
1da177e4c
|
769 770 771 772 773 774 |
break; } } return rq; } |
2e662b65f
|
775 |
EXPORT_SYMBOL(elv_next_request); |
165125e1e
|
776 |
void elv_dequeue_request(struct request_queue *q, struct request *rq) |
1da177e4c
|
777 |
{ |
8922e16cf
|
778 |
BUG_ON(list_empty(&rq->queuelist)); |
9817064b6
|
779 |
BUG_ON(ELV_ON_HASH(rq)); |
8922e16cf
|
780 781 |
list_del_init(&rq->queuelist); |
1da177e4c
|
782 783 784 785 |
/* * the time frame between a request being removed from the lists * and to it is freed is accounted as io that is in progress at |
8922e16cf
|
786 |
* the driver side. |
1da177e4c
|
787 788 789 |
*/ if (blk_account_rq(rq)) q->in_flight++; |
1da177e4c
|
790 |
} |
2e662b65f
|
791 |
EXPORT_SYMBOL(elv_dequeue_request); |
165125e1e
|
792 |
int elv_queue_empty(struct request_queue *q) |
1da177e4c
|
793 794 |
{ elevator_t *e = q->elevator; |
8922e16cf
|
795 796 |
if (!list_empty(&q->queue_head)) return 0; |
1da177e4c
|
797 798 |
if (e->ops->elevator_queue_empty_fn) return e->ops->elevator_queue_empty_fn(q); |
8922e16cf
|
799 |
return 1; |
1da177e4c
|
800 |
} |
2e662b65f
|
801 |
EXPORT_SYMBOL(elv_queue_empty); |
165125e1e
|
802 |
struct request *elv_latter_request(struct request_queue *q, struct request *rq) |
1da177e4c
|
803 |
{ |
1da177e4c
|
804 805 806 807 |
elevator_t *e = q->elevator; if (e->ops->elevator_latter_req_fn) return e->ops->elevator_latter_req_fn(q, rq); |
1da177e4c
|
808 809 |
return NULL; } |
165125e1e
|
810 |
struct request *elv_former_request(struct request_queue *q, struct request *rq) |
1da177e4c
|
811 |
{ |
1da177e4c
|
812 813 814 815 |
elevator_t *e = q->elevator; if (e->ops->elevator_former_req_fn) return e->ops->elevator_former_req_fn(q, rq); |
1da177e4c
|
816 817 |
return NULL; } |
165125e1e
|
818 |
int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
1da177e4c
|
819 820 821 822 |
{ elevator_t *e = q->elevator; if (e->ops->elevator_set_req_fn) |
cb78b285c
|
823 |
return e->ops->elevator_set_req_fn(q, rq, gfp_mask); |
1da177e4c
|
824 825 826 827 |
rq->elevator_private = NULL; return 0; } |
165125e1e
|
828 |
void elv_put_request(struct request_queue *q, struct request *rq) |
1da177e4c
|
829 830 831 832 |
{ elevator_t *e = q->elevator; if (e->ops->elevator_put_req_fn) |
bb37b94c6
|
833 |
e->ops->elevator_put_req_fn(rq); |
1da177e4c
|
834 |
} |
165125e1e
|
835 |
int elv_may_queue(struct request_queue *q, int rw) |
1da177e4c
|
836 837 838 839 |
{ elevator_t *e = q->elevator; if (e->ops->elevator_may_queue_fn) |
cb78b285c
|
840 |
return e->ops->elevator_may_queue_fn(q, rw); |
1da177e4c
|
841 842 843 |
return ELV_MQUEUE_MAY; } |
165125e1e
|
844 |
void elv_completed_request(struct request_queue *q, struct request *rq) |
1da177e4c
|
845 846 847 848 849 850 |
{ elevator_t *e = q->elevator; /* * request is released from the driver, io must be done */ |
8922e16cf
|
851 |
if (blk_account_rq(rq)) { |
1da177e4c
|
852 |
q->in_flight--; |
1bc691d35
|
853 854 855 |
if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } |
797e7dbbe
|
856 |
|
1bc691d35
|
857 858 859 860 861 862 863 |
/* * Check if the queue is waiting for fs requests to be * drained for flush sequence. */ if (unlikely(q->ordseq)) { struct request *first_rq = list_entry_rq(q->queue_head.next); if (q->in_flight == 0 && |
797e7dbbe
|
864 865 866 867 868 |
blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); q->request_fn(q); } |
8922e16cf
|
869 |
} |
1da177e4c
|
870 |
} |
3d1ab40f4
|
871 872 873 874 |
#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) static ssize_t elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) |
1da177e4c
|
875 |
{ |
3d1ab40f4
|
876 877 878 879 880 881 882 883 884 885 886 887 |
elevator_t *e = container_of(kobj, elevator_t, kobj); struct elv_fs_entry *entry = to_elv(attr); ssize_t error; if (!entry->show) return -EIO; mutex_lock(&e->sysfs_lock); error = e->ops ? entry->show(e, page) : -ENOENT; mutex_unlock(&e->sysfs_lock); return error; } |
1da177e4c
|
888 |
|
3d1ab40f4
|
889 890 891 892 893 894 895 |
static ssize_t elv_attr_store(struct kobject *kobj, struct attribute *attr, const char *page, size_t length) { elevator_t *e = container_of(kobj, elevator_t, kobj); struct elv_fs_entry *entry = to_elv(attr); ssize_t error; |
1da177e4c
|
896 |
|
3d1ab40f4
|
897 898 |
if (!entry->store) return -EIO; |
1da177e4c
|
899 |
|
3d1ab40f4
|
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 |
mutex_lock(&e->sysfs_lock); error = e->ops ? entry->store(e, page, length) : -ENOENT; mutex_unlock(&e->sysfs_lock); return error; } static struct sysfs_ops elv_sysfs_ops = { .show = elv_attr_show, .store = elv_attr_store, }; static struct kobj_type elv_ktype = { .sysfs_ops = &elv_sysfs_ops, .release = elevator_release, }; int elv_register_queue(struct request_queue *q) { elevator_t *e = q->elevator; int error; |
b2d6db587
|
920 |
error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); |
3d1ab40f4
|
921 |
if (!error) { |
e572ec7e4
|
922 |
struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; |
3d1ab40f4
|
923 |
if (attr) { |
e572ec7e4
|
924 925 |
while (attr->attr.name) { if (sysfs_create_file(&e->kobj, &attr->attr)) |
3d1ab40f4
|
926 |
break; |
e572ec7e4
|
927 |
attr++; |
3d1ab40f4
|
928 929 930 931 932 |
} } kobject_uevent(&e->kobj, KOBJ_ADD); } return error; |
1da177e4c
|
933 |
} |
bc1c11697
|
934 935 936 937 938 |
static void __elv_unregister_queue(elevator_t *e) { kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_del(&e->kobj); } |
1da177e4c
|
939 940 |
void elv_unregister_queue(struct request_queue *q) { |
bc1c11697
|
941 942 |
if (q) __elv_unregister_queue(q->elevator); |
1da177e4c
|
943 |
} |
2fdd82bd8
|
944 |
void elv_register(struct elevator_type *e) |
1da177e4c
|
945 |
{ |
1ffb96c58
|
946 |
char *def = ""; |
2a12dcd71
|
947 948 |
spin_lock(&elv_list_lock); |
ce5244974
|
949 |
BUG_ON(elevator_find(e->elevator_name)); |
1da177e4c
|
950 |
list_add_tail(&e->list, &elv_list); |
2a12dcd71
|
951 |
spin_unlock(&elv_list_lock); |
1da177e4c
|
952 |
|
5f0039764
|
953 954 955 |
if (!strcmp(e->elevator_name, chosen_elevator) || (!*chosen_elevator && !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) |
1ffb96c58
|
956 |
def = " (default)"; |
4eb166d98
|
957 958 959 |
printk(KERN_INFO "io scheduler %s registered%s ", e->elevator_name, def); |
1da177e4c
|
960 961 962 963 964 |
} EXPORT_SYMBOL_GPL(elv_register); void elv_unregister(struct elevator_type *e) { |
83521d3eb
|
965 966 967 968 969 |
struct task_struct *g, *p; /* * Iterate every thread in the process to remove the io contexts. */ |
e17a9489b
|
970 971 972 973 |
if (e->ops.trim) { read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); |
2d8f61316
|
974 975 |
if (p->io_context) e->ops.trim(p->io_context); |
e17a9489b
|
976 977 978 979 |
task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } |
83521d3eb
|
980 |
|
2a12dcd71
|
981 |
spin_lock(&elv_list_lock); |
1da177e4c
|
982 |
list_del_init(&e->list); |
2a12dcd71
|
983 |
spin_unlock(&elv_list_lock); |
1da177e4c
|
984 985 986 987 988 989 990 |
} EXPORT_SYMBOL_GPL(elv_unregister); /* * switch to new_e io scheduler. be careful not to introduce deadlocks - * we don't free the old io scheduler, before we have allocated what we * need for the new one. this way we have a chance of going back to the old |
cb98fc8bb
|
991 |
* one, if the new one fails init for some reason. |
1da177e4c
|
992 |
*/ |
165125e1e
|
993 |
static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) |
1da177e4c
|
994 |
{ |
cb98fc8bb
|
995 |
elevator_t *old_elevator, *e; |
bc1c11697
|
996 |
void *data; |
1da177e4c
|
997 |
|
cb98fc8bb
|
998 999 1000 |
/* * Allocate new elevator */ |
b5deef901
|
1001 |
e = elevator_alloc(q, new_e); |
1da177e4c
|
1002 |
if (!e) |
3d1ab40f4
|
1003 |
return 0; |
1da177e4c
|
1004 |
|
bc1c11697
|
1005 1006 1007 1008 1009 |
data = elevator_init_queue(q, e); if (!data) { kobject_put(&e->kobj); return 0; } |
1da177e4c
|
1010 |
/* |
cb98fc8bb
|
1011 |
* Turn on BYPASS and drain all requests w/ elevator private data |
1da177e4c
|
1012 |
*/ |
cb98fc8bb
|
1013 |
spin_lock_irq(q->queue_lock); |
75ad23bc0
|
1014 |
queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); |
cb98fc8bb
|
1015 |
|
15853af9f
|
1016 |
elv_drain_elevator(q); |
cb98fc8bb
|
1017 1018 |
while (q->rq.elvpriv) { |
407df2aa2
|
1019 1020 |
blk_remove_plug(q); q->request_fn(q); |
cb98fc8bb
|
1021 |
spin_unlock_irq(q->queue_lock); |
64521d1a3
|
1022 |
msleep(10); |
cb98fc8bb
|
1023 |
spin_lock_irq(q->queue_lock); |
15853af9f
|
1024 |
elv_drain_elevator(q); |
cb98fc8bb
|
1025 |
} |
1da177e4c
|
1026 |
/* |
bc1c11697
|
1027 |
* Remember old elevator. |
1da177e4c
|
1028 |
*/ |
1da177e4c
|
1029 1030 1031 |
old_elevator = q->elevator; /* |
1da177e4c
|
1032 1033 |
* attach and start new elevator */ |
bc1c11697
|
1034 1035 1036 1037 1038 |
elevator_attach(q, e, data); spin_unlock_irq(q->queue_lock); __elv_unregister_queue(old_elevator); |
1da177e4c
|
1039 1040 1041 1042 1043 |
if (elv_register_queue(q)) goto fail_register; /* |
cb98fc8bb
|
1044 |
* finally exit old elevator and turn off BYPASS. |
1da177e4c
|
1045 1046 |
*/ elevator_exit(old_elevator); |
75ad23bc0
|
1047 1048 1049 |
spin_lock_irq(q->queue_lock); queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); spin_unlock_irq(q->queue_lock); |
3d1ab40f4
|
1050 |
return 1; |
1da177e4c
|
1051 1052 1053 1054 1055 1056 1057 |
fail_register: /* * switch failed, exit the new io scheduler and reattach the old * one again (along with re-adding the sysfs dir) */ elevator_exit(e); |
1da177e4c
|
1058 1059 |
q->elevator = old_elevator; elv_register_queue(q); |
75ad23bc0
|
1060 1061 1062 1063 |
spin_lock_irq(q->queue_lock); queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); spin_unlock_irq(q->queue_lock); |
3d1ab40f4
|
1064 |
return 0; |
1da177e4c
|
1065 |
} |
165125e1e
|
1066 1067 |
ssize_t elv_iosched_store(struct request_queue *q, const char *name, size_t count) |
1da177e4c
|
1068 1069 |
{ char elevator_name[ELV_NAME_MAX]; |
be5612356
|
1070 |
size_t len; |
1da177e4c
|
1071 |
struct elevator_type *e; |
be5612356
|
1072 1073 1074 |
elevator_name[sizeof(elevator_name) - 1] = '\0'; strncpy(elevator_name, name, sizeof(elevator_name) - 1); len = strlen(elevator_name); |
1da177e4c
|
1075 |
|
be5612356
|
1076 1077 1078 |
if (len && elevator_name[len - 1] == ' ') elevator_name[len - 1] = '\0'; |
1da177e4c
|
1079 1080 1081 1082 1083 1084 1085 |
e = elevator_get(elevator_name); if (!e) { printk(KERN_ERR "elevator: type %s not found ", elevator_name); return -EINVAL; } |
2ca7d93bb
|
1086 1087 |
if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { elevator_put(e); |
1da177e4c
|
1088 |
return count; |
2ca7d93bb
|
1089 |
} |
1da177e4c
|
1090 |
|
3d1ab40f4
|
1091 |
if (!elevator_switch(q, e)) |
4eb166d98
|
1092 1093 1094 |
printk(KERN_ERR "elevator: switch to %s failed ", elevator_name); |
1da177e4c
|
1095 1096 |
return count; } |
165125e1e
|
1097 |
ssize_t elv_iosched_show(struct request_queue *q, char *name) |
1da177e4c
|
1098 1099 1100 |
{ elevator_t *e = q->elevator; struct elevator_type *elv = e->elevator_type; |
70cee26e0
|
1101 |
struct elevator_type *__e; |
1da177e4c
|
1102 |
int len = 0; |
2a12dcd71
|
1103 |
spin_lock(&elv_list_lock); |
70cee26e0
|
1104 |
list_for_each_entry(__e, &elv_list, list) { |
1da177e4c
|
1105 1106 1107 1108 1109 |
if (!strcmp(elv->elevator_name, __e->elevator_name)) len += sprintf(name+len, "[%s] ", elv->elevator_name); else len += sprintf(name+len, "%s ", __e->elevator_name); } |
2a12dcd71
|
1110 |
spin_unlock(&elv_list_lock); |
1da177e4c
|
1111 1112 1113 1114 1115 |
len += sprintf(len+name, " "); return len; } |
165125e1e
|
1116 1117 |
struct request *elv_rb_former_request(struct request_queue *q, struct request *rq) |
2e662b65f
|
1118 1119 1120 1121 1122 1123 1124 1125 |
{ struct rb_node *rbprev = rb_prev(&rq->rb_node); if (rbprev) return rb_entry_rq(rbprev); return NULL; } |
2e662b65f
|
1126 |
EXPORT_SYMBOL(elv_rb_former_request); |
165125e1e
|
1127 1128 |
struct request *elv_rb_latter_request(struct request_queue *q, struct request *rq) |
2e662b65f
|
1129 1130 1131 1132 1133 1134 1135 1136 |
{ struct rb_node *rbnext = rb_next(&rq->rb_node); if (rbnext) return rb_entry_rq(rbnext); return NULL; } |
2e662b65f
|
1137 |
EXPORT_SYMBOL(elv_rb_latter_request); |