Commit 2e662b65f05d550b6799ed6bfa9963b82279e6b7
Committed by
Jens Axboe
1 parent
10fd48f237
[PATCH] elevator: abstract out the rbtree sort handling
The rbtree sort/lookup/reposition logic is mostly duplicated in cfq/deadline/as, so move it to the elevator core. The io schedulers still provide the actual rb root, as we don't want to impose any sort of specific handling on the schedulers. Introduce the helpers and rb_node in struct request to help migrate the IO schedulers. Signed-off-by: Jens Axboe <axboe@suse.de>
Showing 4 changed files with 130 additions and 19 deletions Inline Diff
block/elevator.c
1 | /* | 1 | /* |
2 | * Block device elevator/IO-scheduler. | 2 | * Block device elevator/IO-scheduler. |
3 | * | 3 | * |
4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * | 5 | * |
6 | * 30042000 Jens Axboe <axboe@suse.de> : | 6 | * 30042000 Jens Axboe <axboe@suse.de> : |
7 | * | 7 | * |
8 | * Split the elevator a bit so that it is possible to choose a different | 8 | * Split the elevator a bit so that it is possible to choose a different |
9 | * one or even write a new "plug in". There are three pieces: | 9 | * one or even write a new "plug in". There are three pieces: |
10 | * - elevator_fn, inserts a new request in the queue list | 10 | * - elevator_fn, inserts a new request in the queue list |
11 | * - elevator_merge_fn, decides whether a new buffer can be merged with | 11 | * - elevator_merge_fn, decides whether a new buffer can be merged with |
12 | * an existing request | 12 | * an existing request |
13 | * - elevator_dequeue_fn, called when a request is taken off the active list | 13 | * - elevator_dequeue_fn, called when a request is taken off the active list |
14 | * | 14 | * |
15 | * 20082000 Dave Jones <davej@suse.de> : | 15 | * 20082000 Dave Jones <davej@suse.de> : |
16 | * Removed tests for max-bomb-segments, which was breaking elvtune | 16 | * Removed tests for max-bomb-segments, which was breaking elvtune |
17 | * when run without -bN | 17 | * when run without -bN |
18 | * | 18 | * |
19 | * Jens: | 19 | * Jens: |
20 | * - Rework again to work with bio instead of buffer_heads | 20 | * - Rework again to work with bio instead of buffer_heads |
21 | * - loose bi_dev comparisons, partition handling is right now | 21 | * - loose bi_dev comparisons, partition handling is right now |
22 | * - completely modularize elevator setup and teardown | 22 | * - completely modularize elevator setup and teardown |
23 | * | 23 | * |
24 | */ | 24 | */ |
25 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/blkdev.h> | 27 | #include <linux/blkdev.h> |
28 | #include <linux/elevator.h> | 28 | #include <linux/elevator.h> |
29 | #include <linux/bio.h> | 29 | #include <linux/bio.h> |
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/compiler.h> | 33 | #include <linux/compiler.h> |
34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
35 | #include <linux/blktrace_api.h> | 35 | #include <linux/blktrace_api.h> |
36 | #include <linux/hash.h> | 36 | #include <linux/hash.h> |
37 | 37 | ||
38 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
39 | 39 | ||
40 | static DEFINE_SPINLOCK(elv_list_lock); | 40 | static DEFINE_SPINLOCK(elv_list_lock); |
41 | static LIST_HEAD(elv_list); | 41 | static LIST_HEAD(elv_list); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Merge hash stuff. | 44 | * Merge hash stuff. |
45 | */ | 45 | */ |
46 | static const int elv_hash_shift = 6; | 46 | static const int elv_hash_shift = 6; |
47 | #define ELV_HASH_BLOCK(sec) ((sec) >> 3) | 47 | #define ELV_HASH_BLOCK(sec) ((sec) >> 3) |
48 | #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) | 48 | #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) |
49 | #define ELV_HASH_ENTRIES (1 << elv_hash_shift) | 49 | #define ELV_HASH_ENTRIES (1 << elv_hash_shift) |
50 | #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) | 50 | #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) |
51 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) | 51 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * can we safely merge with this request? | 54 | * can we safely merge with this request? |
55 | */ | 55 | */ |
56 | inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) | 56 | inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) |
57 | { | 57 | { |
58 | if (!rq_mergeable(rq)) | 58 | if (!rq_mergeable(rq)) |
59 | return 0; | 59 | return 0; |
60 | 60 | ||
61 | /* | 61 | /* |
62 | * different data direction or already started, don't merge | 62 | * different data direction or already started, don't merge |
63 | */ | 63 | */ |
64 | if (bio_data_dir(bio) != rq_data_dir(rq)) | 64 | if (bio_data_dir(bio) != rq_data_dir(rq)) |
65 | return 0; | 65 | return 0; |
66 | 66 | ||
67 | /* | 67 | /* |
68 | * same device and no special stuff set, merge is ok | 68 | * same device and no special stuff set, merge is ok |
69 | */ | 69 | */ |
70 | if (rq->rq_disk == bio->bi_bdev->bd_disk && | 70 | if (rq->rq_disk == bio->bi_bdev->bd_disk && |
71 | !rq->waiting && !rq->special) | 71 | !rq->waiting && !rq->special) |
72 | return 1; | 72 | return 1; |
73 | 73 | ||
74 | return 0; | 74 | return 0; |
75 | } | 75 | } |
76 | EXPORT_SYMBOL(elv_rq_merge_ok); | 76 | EXPORT_SYMBOL(elv_rq_merge_ok); |
77 | 77 | ||
78 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) | 78 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) |
79 | { | 79 | { |
80 | int ret = ELEVATOR_NO_MERGE; | 80 | int ret = ELEVATOR_NO_MERGE; |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * we can merge and sequence is ok, check if it's possible | 83 | * we can merge and sequence is ok, check if it's possible |
84 | */ | 84 | */ |
85 | if (elv_rq_merge_ok(__rq, bio)) { | 85 | if (elv_rq_merge_ok(__rq, bio)) { |
86 | if (__rq->sector + __rq->nr_sectors == bio->bi_sector) | 86 | if (__rq->sector + __rq->nr_sectors == bio->bi_sector) |
87 | ret = ELEVATOR_BACK_MERGE; | 87 | ret = ELEVATOR_BACK_MERGE; |
88 | else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) | 88 | else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) |
89 | ret = ELEVATOR_FRONT_MERGE; | 89 | ret = ELEVATOR_FRONT_MERGE; |
90 | } | 90 | } |
91 | 91 | ||
92 | return ret; | 92 | return ret; |
93 | } | 93 | } |
94 | 94 | ||
95 | static struct elevator_type *elevator_find(const char *name) | 95 | static struct elevator_type *elevator_find(const char *name) |
96 | { | 96 | { |
97 | struct elevator_type *e = NULL; | 97 | struct elevator_type *e = NULL; |
98 | struct list_head *entry; | 98 | struct list_head *entry; |
99 | 99 | ||
100 | list_for_each(entry, &elv_list) { | 100 | list_for_each(entry, &elv_list) { |
101 | struct elevator_type *__e; | 101 | struct elevator_type *__e; |
102 | 102 | ||
103 | __e = list_entry(entry, struct elevator_type, list); | 103 | __e = list_entry(entry, struct elevator_type, list); |
104 | 104 | ||
105 | if (!strcmp(__e->elevator_name, name)) { | 105 | if (!strcmp(__e->elevator_name, name)) { |
106 | e = __e; | 106 | e = __e; |
107 | break; | 107 | break; |
108 | } | 108 | } |
109 | } | 109 | } |
110 | 110 | ||
111 | return e; | 111 | return e; |
112 | } | 112 | } |
113 | 113 | ||
114 | static void elevator_put(struct elevator_type *e) | 114 | static void elevator_put(struct elevator_type *e) |
115 | { | 115 | { |
116 | module_put(e->elevator_owner); | 116 | module_put(e->elevator_owner); |
117 | } | 117 | } |
118 | 118 | ||
119 | static struct elevator_type *elevator_get(const char *name) | 119 | static struct elevator_type *elevator_get(const char *name) |
120 | { | 120 | { |
121 | struct elevator_type *e; | 121 | struct elevator_type *e; |
122 | 122 | ||
123 | spin_lock_irq(&elv_list_lock); | 123 | spin_lock_irq(&elv_list_lock); |
124 | 124 | ||
125 | e = elevator_find(name); | 125 | e = elevator_find(name); |
126 | if (e && !try_module_get(e->elevator_owner)) | 126 | if (e && !try_module_get(e->elevator_owner)) |
127 | e = NULL; | 127 | e = NULL; |
128 | 128 | ||
129 | spin_unlock_irq(&elv_list_lock); | 129 | spin_unlock_irq(&elv_list_lock); |
130 | 130 | ||
131 | return e; | 131 | return e; |
132 | } | 132 | } |
133 | 133 | ||
134 | static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) | 134 | static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) |
135 | { | 135 | { |
136 | return eq->ops->elevator_init_fn(q, eq); | 136 | return eq->ops->elevator_init_fn(q, eq); |
137 | } | 137 | } |
138 | 138 | ||
139 | static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, | 139 | static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, |
140 | void *data) | 140 | void *data) |
141 | { | 141 | { |
142 | q->elevator = eq; | 142 | q->elevator = eq; |
143 | eq->elevator_data = data; | 143 | eq->elevator_data = data; |
144 | } | 144 | } |
145 | 145 | ||
146 | static char chosen_elevator[16]; | 146 | static char chosen_elevator[16]; |
147 | 147 | ||
148 | static int __init elevator_setup(char *str) | 148 | static int __init elevator_setup(char *str) |
149 | { | 149 | { |
150 | /* | 150 | /* |
151 | * Be backwards-compatible with previous kernels, so users | 151 | * Be backwards-compatible with previous kernels, so users |
152 | * won't get the wrong elevator. | 152 | * won't get the wrong elevator. |
153 | */ | 153 | */ |
154 | if (!strcmp(str, "as")) | 154 | if (!strcmp(str, "as")) |
155 | strcpy(chosen_elevator, "anticipatory"); | 155 | strcpy(chosen_elevator, "anticipatory"); |
156 | else | 156 | else |
157 | strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); | 157 | strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); |
158 | return 1; | 158 | return 1; |
159 | } | 159 | } |
160 | 160 | ||
161 | __setup("elevator=", elevator_setup); | 161 | __setup("elevator=", elevator_setup); |
162 | 162 | ||
163 | static struct kobj_type elv_ktype; | 163 | static struct kobj_type elv_ktype; |
164 | 164 | ||
165 | static elevator_t *elevator_alloc(struct elevator_type *e) | 165 | static elevator_t *elevator_alloc(struct elevator_type *e) |
166 | { | 166 | { |
167 | elevator_t *eq; | 167 | elevator_t *eq; |
168 | int i; | 168 | int i; |
169 | 169 | ||
170 | eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); | 170 | eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); |
171 | if (unlikely(!eq)) | 171 | if (unlikely(!eq)) |
172 | goto err; | 172 | goto err; |
173 | 173 | ||
174 | memset(eq, 0, sizeof(*eq)); | 174 | memset(eq, 0, sizeof(*eq)); |
175 | eq->ops = &e->ops; | 175 | eq->ops = &e->ops; |
176 | eq->elevator_type = e; | 176 | eq->elevator_type = e; |
177 | kobject_init(&eq->kobj); | 177 | kobject_init(&eq->kobj); |
178 | snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); | 178 | snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); |
179 | eq->kobj.ktype = &elv_ktype; | 179 | eq->kobj.ktype = &elv_ktype; |
180 | mutex_init(&eq->sysfs_lock); | 180 | mutex_init(&eq->sysfs_lock); |
181 | 181 | ||
182 | eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL); | 182 | eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL); |
183 | if (!eq->hash) | 183 | if (!eq->hash) |
184 | goto err; | 184 | goto err; |
185 | 185 | ||
186 | for (i = 0; i < ELV_HASH_ENTRIES; i++) | 186 | for (i = 0; i < ELV_HASH_ENTRIES; i++) |
187 | INIT_HLIST_HEAD(&eq->hash[i]); | 187 | INIT_HLIST_HEAD(&eq->hash[i]); |
188 | 188 | ||
189 | return eq; | 189 | return eq; |
190 | err: | 190 | err: |
191 | kfree(eq); | 191 | kfree(eq); |
192 | elevator_put(e); | 192 | elevator_put(e); |
193 | return NULL; | 193 | return NULL; |
194 | } | 194 | } |
195 | 195 | ||
196 | static void elevator_release(struct kobject *kobj) | 196 | static void elevator_release(struct kobject *kobj) |
197 | { | 197 | { |
198 | elevator_t *e = container_of(kobj, elevator_t, kobj); | 198 | elevator_t *e = container_of(kobj, elevator_t, kobj); |
199 | 199 | ||
200 | elevator_put(e->elevator_type); | 200 | elevator_put(e->elevator_type); |
201 | kfree(e->hash); | 201 | kfree(e->hash); |
202 | kfree(e); | 202 | kfree(e); |
203 | } | 203 | } |
204 | 204 | ||
205 | int elevator_init(request_queue_t *q, char *name) | 205 | int elevator_init(request_queue_t *q, char *name) |
206 | { | 206 | { |
207 | struct elevator_type *e = NULL; | 207 | struct elevator_type *e = NULL; |
208 | struct elevator_queue *eq; | 208 | struct elevator_queue *eq; |
209 | int ret = 0; | 209 | int ret = 0; |
210 | void *data; | 210 | void *data; |
211 | 211 | ||
212 | INIT_LIST_HEAD(&q->queue_head); | 212 | INIT_LIST_HEAD(&q->queue_head); |
213 | q->last_merge = NULL; | 213 | q->last_merge = NULL; |
214 | q->end_sector = 0; | 214 | q->end_sector = 0; |
215 | q->boundary_rq = NULL; | 215 | q->boundary_rq = NULL; |
216 | 216 | ||
217 | if (name && !(e = elevator_get(name))) | 217 | if (name && !(e = elevator_get(name))) |
218 | return -EINVAL; | 218 | return -EINVAL; |
219 | 219 | ||
220 | if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) | 220 | if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) |
221 | printk("I/O scheduler %s not found\n", chosen_elevator); | 221 | printk("I/O scheduler %s not found\n", chosen_elevator); |
222 | 222 | ||
223 | if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { | 223 | if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { |
224 | printk("Default I/O scheduler not found, using no-op\n"); | 224 | printk("Default I/O scheduler not found, using no-op\n"); |
225 | e = elevator_get("noop"); | 225 | e = elevator_get("noop"); |
226 | } | 226 | } |
227 | 227 | ||
228 | eq = elevator_alloc(e); | 228 | eq = elevator_alloc(e); |
229 | if (!eq) | 229 | if (!eq) |
230 | return -ENOMEM; | 230 | return -ENOMEM; |
231 | 231 | ||
232 | data = elevator_init_queue(q, eq); | 232 | data = elevator_init_queue(q, eq); |
233 | if (!data) { | 233 | if (!data) { |
234 | kobject_put(&eq->kobj); | 234 | kobject_put(&eq->kobj); |
235 | return -ENOMEM; | 235 | return -ENOMEM; |
236 | } | 236 | } |
237 | 237 | ||
238 | elevator_attach(q, eq, data); | 238 | elevator_attach(q, eq, data); |
239 | return ret; | 239 | return ret; |
240 | } | 240 | } |
241 | 241 | ||
242 | EXPORT_SYMBOL(elevator_init); | ||
243 | |||
242 | void elevator_exit(elevator_t *e) | 244 | void elevator_exit(elevator_t *e) |
243 | { | 245 | { |
244 | mutex_lock(&e->sysfs_lock); | 246 | mutex_lock(&e->sysfs_lock); |
245 | if (e->ops->elevator_exit_fn) | 247 | if (e->ops->elevator_exit_fn) |
246 | e->ops->elevator_exit_fn(e); | 248 | e->ops->elevator_exit_fn(e); |
247 | e->ops = NULL; | 249 | e->ops = NULL; |
248 | mutex_unlock(&e->sysfs_lock); | 250 | mutex_unlock(&e->sysfs_lock); |
249 | 251 | ||
250 | kobject_put(&e->kobj); | 252 | kobject_put(&e->kobj); |
251 | } | 253 | } |
252 | 254 | ||
255 | EXPORT_SYMBOL(elevator_exit); | ||
256 | |||
253 | static inline void __elv_rqhash_del(struct request *rq) | 257 | static inline void __elv_rqhash_del(struct request *rq) |
254 | { | 258 | { |
255 | hlist_del_init(&rq->hash); | 259 | hlist_del_init(&rq->hash); |
256 | } | 260 | } |
257 | 261 | ||
258 | static void elv_rqhash_del(request_queue_t *q, struct request *rq) | 262 | static void elv_rqhash_del(request_queue_t *q, struct request *rq) |
259 | { | 263 | { |
260 | if (ELV_ON_HASH(rq)) | 264 | if (ELV_ON_HASH(rq)) |
261 | __elv_rqhash_del(rq); | 265 | __elv_rqhash_del(rq); |
262 | } | 266 | } |
263 | 267 | ||
264 | static void elv_rqhash_add(request_queue_t *q, struct request *rq) | 268 | static void elv_rqhash_add(request_queue_t *q, struct request *rq) |
265 | { | 269 | { |
266 | elevator_t *e = q->elevator; | 270 | elevator_t *e = q->elevator; |
267 | 271 | ||
268 | BUG_ON(ELV_ON_HASH(rq)); | 272 | BUG_ON(ELV_ON_HASH(rq)); |
269 | hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); | 273 | hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); |
270 | } | 274 | } |
271 | 275 | ||
272 | static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) | 276 | static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) |
273 | { | 277 | { |
274 | __elv_rqhash_del(rq); | 278 | __elv_rqhash_del(rq); |
275 | elv_rqhash_add(q, rq); | 279 | elv_rqhash_add(q, rq); |
276 | } | 280 | } |
277 | 281 | ||
278 | static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) | 282 | static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) |
279 | { | 283 | { |
280 | elevator_t *e = q->elevator; | 284 | elevator_t *e = q->elevator; |
281 | struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; | 285 | struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; |
282 | struct hlist_node *entry, *next; | 286 | struct hlist_node *entry, *next; |
283 | struct request *rq; | 287 | struct request *rq; |
284 | 288 | ||
285 | hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { | 289 | hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { |
286 | BUG_ON(!ELV_ON_HASH(rq)); | 290 | BUG_ON(!ELV_ON_HASH(rq)); |
287 | 291 | ||
288 | if (unlikely(!rq_mergeable(rq))) { | 292 | if (unlikely(!rq_mergeable(rq))) { |
289 | __elv_rqhash_del(rq); | 293 | __elv_rqhash_del(rq); |
290 | continue; | 294 | continue; |
291 | } | 295 | } |
292 | 296 | ||
293 | if (rq_hash_key(rq) == offset) | 297 | if (rq_hash_key(rq) == offset) |
294 | return rq; | 298 | return rq; |
295 | } | 299 | } |
296 | 300 | ||
297 | return NULL; | 301 | return NULL; |
298 | } | 302 | } |
299 | 303 | ||
300 | /* | 304 | /* |
305 | * RB-tree support functions for inserting/lookup/removal of requests | ||
306 | * in a sorted RB tree. | ||
307 | */ | ||
308 | struct request *elv_rb_add(struct rb_root *root, struct request *rq) | ||
309 | { | ||
310 | struct rb_node **p = &root->rb_node; | ||
311 | struct rb_node *parent = NULL; | ||
312 | struct request *__rq; | ||
313 | |||
314 | while (*p) { | ||
315 | parent = *p; | ||
316 | __rq = rb_entry(parent, struct request, rb_node); | ||
317 | |||
318 | if (rq->sector < __rq->sector) | ||
319 | p = &(*p)->rb_left; | ||
320 | else if (rq->sector > __rq->sector) | ||
321 | p = &(*p)->rb_right; | ||
322 | else | ||
323 | return __rq; | ||
324 | } | ||
325 | |||
326 | rb_link_node(&rq->rb_node, parent, p); | ||
327 | rb_insert_color(&rq->rb_node, root); | ||
328 | return NULL; | ||
329 | } | ||
330 | |||
331 | EXPORT_SYMBOL(elv_rb_add); | ||
332 | |||
333 | void elv_rb_del(struct rb_root *root, struct request *rq) | ||
334 | { | ||
335 | BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); | ||
336 | rb_erase(&rq->rb_node, root); | ||
337 | RB_CLEAR_NODE(&rq->rb_node); | ||
338 | } | ||
339 | |||
340 | EXPORT_SYMBOL(elv_rb_del); | ||
341 | |||
342 | struct request *elv_rb_find(struct rb_root *root, sector_t sector) | ||
343 | { | ||
344 | struct rb_node *n = root->rb_node; | ||
345 | struct request *rq; | ||
346 | |||
347 | while (n) { | ||
348 | rq = rb_entry(n, struct request, rb_node); | ||
349 | |||
350 | if (sector < rq->sector) | ||
351 | n = n->rb_left; | ||
352 | else if (sector > rq->sector) | ||
353 | n = n->rb_right; | ||
354 | else | ||
355 | return rq; | ||
356 | } | ||
357 | |||
358 | return NULL; | ||
359 | } | ||
360 | |||
361 | EXPORT_SYMBOL(elv_rb_find); | ||
362 | |||
363 | /* | ||
301 | * Insert rq into dispatch queue of q. Queue lock must be held on | 364 | * Insert rq into dispatch queue of q. Queue lock must be held on |
302 | * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be | 365 | * entry. rq is sort insted into the dispatch queue. To be used by |
303 | * appended to the dispatch queue. To be used by specific elevators. | 366 | * specific elevators. |
304 | */ | 367 | */ |
305 | void elv_dispatch_sort(request_queue_t *q, struct request *rq) | 368 | void elv_dispatch_sort(request_queue_t *q, struct request *rq) |
306 | { | 369 | { |
307 | sector_t boundary; | 370 | sector_t boundary; |
308 | struct list_head *entry; | 371 | struct list_head *entry; |
309 | 372 | ||
310 | if (q->last_merge == rq) | 373 | if (q->last_merge == rq) |
311 | q->last_merge = NULL; | 374 | q->last_merge = NULL; |
312 | 375 | ||
313 | elv_rqhash_del(q, rq); | 376 | elv_rqhash_del(q, rq); |
314 | 377 | ||
315 | q->nr_sorted--; | 378 | q->nr_sorted--; |
316 | 379 | ||
317 | boundary = q->end_sector; | 380 | boundary = q->end_sector; |
318 | 381 | ||
319 | list_for_each_prev(entry, &q->queue_head) { | 382 | list_for_each_prev(entry, &q->queue_head) { |
320 | struct request *pos = list_entry_rq(entry); | 383 | struct request *pos = list_entry_rq(entry); |
321 | 384 | ||
322 | if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) | 385 | if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) |
323 | break; | 386 | break; |
324 | if (rq->sector >= boundary) { | 387 | if (rq->sector >= boundary) { |
325 | if (pos->sector < boundary) | 388 | if (pos->sector < boundary) |
326 | continue; | 389 | continue; |
327 | } else { | 390 | } else { |
328 | if (pos->sector >= boundary) | 391 | if (pos->sector >= boundary) |
329 | break; | 392 | break; |
330 | } | 393 | } |
331 | if (rq->sector >= pos->sector) | 394 | if (rq->sector >= pos->sector) |
332 | break; | 395 | break; |
333 | } | 396 | } |
334 | 397 | ||
335 | list_add(&rq->queuelist, entry); | 398 | list_add(&rq->queuelist, entry); |
336 | } | 399 | } |
337 | 400 | ||
401 | EXPORT_SYMBOL(elv_dispatch_sort); | ||
402 | |||
338 | /* | 403 | /* |
339 | * This should be in elevator.h, but that requires pulling in rq and q | 404 | * Insert rq into dispatch queue of q. Queue lock must be held on |
405 | * entry. rq is added to the back of the dispatch queue. To be used by | ||
406 | * specific elevators. | ||
340 | */ | 407 | */ |
341 | void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) | 408 | void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) |
342 | { | 409 | { |
343 | if (q->last_merge == rq) | 410 | if (q->last_merge == rq) |
344 | q->last_merge = NULL; | 411 | q->last_merge = NULL; |
345 | 412 | ||
346 | elv_rqhash_del(q, rq); | 413 | elv_rqhash_del(q, rq); |
347 | 414 | ||
348 | q->nr_sorted--; | 415 | q->nr_sorted--; |
349 | 416 | ||
350 | q->end_sector = rq_end_sector(rq); | 417 | q->end_sector = rq_end_sector(rq); |
351 | q->boundary_rq = rq; | 418 | q->boundary_rq = rq; |
352 | list_add_tail(&rq->queuelist, &q->queue_head); | 419 | list_add_tail(&rq->queuelist, &q->queue_head); |
353 | } | 420 | } |
354 | 421 | ||
422 | EXPORT_SYMBOL(elv_dispatch_add_tail); | ||
423 | |||
355 | int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) | 424 | int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) |
356 | { | 425 | { |
357 | elevator_t *e = q->elevator; | 426 | elevator_t *e = q->elevator; |
358 | struct request *__rq; | 427 | struct request *__rq; |
359 | int ret; | 428 | int ret; |
360 | 429 | ||
361 | /* | 430 | /* |
362 | * First try one-hit cache. | 431 | * First try one-hit cache. |
363 | */ | 432 | */ |
364 | if (q->last_merge) { | 433 | if (q->last_merge) { |
365 | ret = elv_try_merge(q->last_merge, bio); | 434 | ret = elv_try_merge(q->last_merge, bio); |
366 | if (ret != ELEVATOR_NO_MERGE) { | 435 | if (ret != ELEVATOR_NO_MERGE) { |
367 | *req = q->last_merge; | 436 | *req = q->last_merge; |
368 | return ret; | 437 | return ret; |
369 | } | 438 | } |
370 | } | 439 | } |
371 | 440 | ||
372 | /* | 441 | /* |
373 | * See if our hash lookup can find a potential backmerge. | 442 | * See if our hash lookup can find a potential backmerge. |
374 | */ | 443 | */ |
375 | __rq = elv_rqhash_find(q, bio->bi_sector); | 444 | __rq = elv_rqhash_find(q, bio->bi_sector); |
376 | if (__rq && elv_rq_merge_ok(__rq, bio)) { | 445 | if (__rq && elv_rq_merge_ok(__rq, bio)) { |
377 | *req = __rq; | 446 | *req = __rq; |
378 | return ELEVATOR_BACK_MERGE; | 447 | return ELEVATOR_BACK_MERGE; |
379 | } | 448 | } |
380 | 449 | ||
381 | if (e->ops->elevator_merge_fn) | 450 | if (e->ops->elevator_merge_fn) |
382 | return e->ops->elevator_merge_fn(q, req, bio); | 451 | return e->ops->elevator_merge_fn(q, req, bio); |
383 | 452 | ||
384 | return ELEVATOR_NO_MERGE; | 453 | return ELEVATOR_NO_MERGE; |
385 | } | 454 | } |
386 | 455 | ||
387 | void elv_merged_request(request_queue_t *q, struct request *rq) | 456 | void elv_merged_request(request_queue_t *q, struct request *rq, int type) |
388 | { | 457 | { |
389 | elevator_t *e = q->elevator; | 458 | elevator_t *e = q->elevator; |
390 | 459 | ||
391 | if (e->ops->elevator_merged_fn) | 460 | if (e->ops->elevator_merged_fn) |
392 | e->ops->elevator_merged_fn(q, rq); | 461 | e->ops->elevator_merged_fn(q, rq, type); |
393 | 462 | ||
394 | elv_rqhash_reposition(q, rq); | 463 | if (type == ELEVATOR_BACK_MERGE) |
464 | elv_rqhash_reposition(q, rq); | ||
395 | 465 | ||
396 | q->last_merge = rq; | 466 | q->last_merge = rq; |
397 | } | 467 | } |
398 | 468 | ||
399 | void elv_merge_requests(request_queue_t *q, struct request *rq, | 469 | void elv_merge_requests(request_queue_t *q, struct request *rq, |
400 | struct request *next) | 470 | struct request *next) |
401 | { | 471 | { |
402 | elevator_t *e = q->elevator; | 472 | elevator_t *e = q->elevator; |
403 | 473 | ||
404 | if (e->ops->elevator_merge_req_fn) | 474 | if (e->ops->elevator_merge_req_fn) |
405 | e->ops->elevator_merge_req_fn(q, rq, next); | 475 | e->ops->elevator_merge_req_fn(q, rq, next); |
406 | 476 | ||
407 | elv_rqhash_reposition(q, rq); | 477 | elv_rqhash_reposition(q, rq); |
408 | elv_rqhash_del(q, next); | 478 | elv_rqhash_del(q, next); |
409 | 479 | ||
410 | q->nr_sorted--; | 480 | q->nr_sorted--; |
411 | q->last_merge = rq; | 481 | q->last_merge = rq; |
412 | } | 482 | } |
413 | 483 | ||
414 | void elv_requeue_request(request_queue_t *q, struct request *rq) | 484 | void elv_requeue_request(request_queue_t *q, struct request *rq) |
415 | { | 485 | { |
416 | elevator_t *e = q->elevator; | 486 | elevator_t *e = q->elevator; |
417 | 487 | ||
418 | /* | 488 | /* |
419 | * it already went through dequeue, we need to decrement the | 489 | * it already went through dequeue, we need to decrement the |
420 | * in_flight count again | 490 | * in_flight count again |
421 | */ | 491 | */ |
422 | if (blk_account_rq(rq)) { | 492 | if (blk_account_rq(rq)) { |
423 | q->in_flight--; | 493 | q->in_flight--; |
424 | if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) | 494 | if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) |
425 | e->ops->elevator_deactivate_req_fn(q, rq); | 495 | e->ops->elevator_deactivate_req_fn(q, rq); |
426 | } | 496 | } |
427 | 497 | ||
428 | rq->cmd_flags &= ~REQ_STARTED; | 498 | rq->cmd_flags &= ~REQ_STARTED; |
429 | 499 | ||
430 | elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); | 500 | elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); |
431 | } | 501 | } |
432 | 502 | ||
433 | static void elv_drain_elevator(request_queue_t *q) | 503 | static void elv_drain_elevator(request_queue_t *q) |
434 | { | 504 | { |
435 | static int printed; | 505 | static int printed; |
436 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | 506 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) |
437 | ; | 507 | ; |
438 | if (q->nr_sorted == 0) | 508 | if (q->nr_sorted == 0) |
439 | return; | 509 | return; |
440 | if (printed++ < 10) { | 510 | if (printed++ < 10) { |
441 | printk(KERN_ERR "%s: forced dispatching is broken " | 511 | printk(KERN_ERR "%s: forced dispatching is broken " |
442 | "(nr_sorted=%u), please report this\n", | 512 | "(nr_sorted=%u), please report this\n", |
443 | q->elevator->elevator_type->elevator_name, q->nr_sorted); | 513 | q->elevator->elevator_type->elevator_name, q->nr_sorted); |
444 | } | 514 | } |
445 | } | 515 | } |
446 | 516 | ||
447 | void elv_insert(request_queue_t *q, struct request *rq, int where) | 517 | void elv_insert(request_queue_t *q, struct request *rq, int where) |
448 | { | 518 | { |
449 | struct list_head *pos; | 519 | struct list_head *pos; |
450 | unsigned ordseq; | 520 | unsigned ordseq; |
451 | int unplug_it = 1; | 521 | int unplug_it = 1; |
452 | 522 | ||
453 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); | 523 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); |
454 | 524 | ||
455 | rq->q = q; | 525 | rq->q = q; |
456 | 526 | ||
457 | switch (where) { | 527 | switch (where) { |
458 | case ELEVATOR_INSERT_FRONT: | 528 | case ELEVATOR_INSERT_FRONT: |
459 | rq->cmd_flags |= REQ_SOFTBARRIER; | 529 | rq->cmd_flags |= REQ_SOFTBARRIER; |
460 | 530 | ||
461 | list_add(&rq->queuelist, &q->queue_head); | 531 | list_add(&rq->queuelist, &q->queue_head); |
462 | break; | 532 | break; |
463 | 533 | ||
464 | case ELEVATOR_INSERT_BACK: | 534 | case ELEVATOR_INSERT_BACK: |
465 | rq->cmd_flags |= REQ_SOFTBARRIER; | 535 | rq->cmd_flags |= REQ_SOFTBARRIER; |
466 | elv_drain_elevator(q); | 536 | elv_drain_elevator(q); |
467 | list_add_tail(&rq->queuelist, &q->queue_head); | 537 | list_add_tail(&rq->queuelist, &q->queue_head); |
468 | /* | 538 | /* |
469 | * We kick the queue here for the following reasons. | 539 | * We kick the queue here for the following reasons. |
470 | * - The elevator might have returned NULL previously | 540 | * - The elevator might have returned NULL previously |
471 | * to delay requests and returned them now. As the | 541 | * to delay requests and returned them now. As the |
472 | * queue wasn't empty before this request, ll_rw_blk | 542 | * queue wasn't empty before this request, ll_rw_blk |
473 | * won't run the queue on return, resulting in hang. | 543 | * won't run the queue on return, resulting in hang. |
474 | * - Usually, back inserted requests won't be merged | 544 | * - Usually, back inserted requests won't be merged |
475 | * with anything. There's no point in delaying queue | 545 | * with anything. There's no point in delaying queue |
476 | * processing. | 546 | * processing. |
477 | */ | 547 | */ |
478 | blk_remove_plug(q); | 548 | blk_remove_plug(q); |
479 | q->request_fn(q); | 549 | q->request_fn(q); |
480 | break; | 550 | break; |
481 | 551 | ||
482 | case ELEVATOR_INSERT_SORT: | 552 | case ELEVATOR_INSERT_SORT: |
483 | BUG_ON(!blk_fs_request(rq)); | 553 | BUG_ON(!blk_fs_request(rq)); |
484 | rq->cmd_flags |= REQ_SORTED; | 554 | rq->cmd_flags |= REQ_SORTED; |
485 | q->nr_sorted++; | 555 | q->nr_sorted++; |
486 | if (rq_mergeable(rq)) { | 556 | if (rq_mergeable(rq)) { |
487 | elv_rqhash_add(q, rq); | 557 | elv_rqhash_add(q, rq); |
488 | if (!q->last_merge) | 558 | if (!q->last_merge) |
489 | q->last_merge = rq; | 559 | q->last_merge = rq; |
490 | } | 560 | } |
491 | 561 | ||
492 | /* | 562 | /* |
493 | * Some ioscheds (cfq) run q->request_fn directly, so | 563 | * Some ioscheds (cfq) run q->request_fn directly, so |
494 | * rq cannot be accessed after calling | 564 | * rq cannot be accessed after calling |
495 | * elevator_add_req_fn. | 565 | * elevator_add_req_fn. |
496 | */ | 566 | */ |
497 | q->elevator->ops->elevator_add_req_fn(q, rq); | 567 | q->elevator->ops->elevator_add_req_fn(q, rq); |
498 | break; | 568 | break; |
499 | 569 | ||
500 | case ELEVATOR_INSERT_REQUEUE: | 570 | case ELEVATOR_INSERT_REQUEUE: |
501 | /* | 571 | /* |
502 | * If ordered flush isn't in progress, we do front | 572 | * If ordered flush isn't in progress, we do front |
503 | * insertion; otherwise, requests should be requeued | 573 | * insertion; otherwise, requests should be requeued |
504 | * in ordseq order. | 574 | * in ordseq order. |
505 | */ | 575 | */ |
506 | rq->cmd_flags |= REQ_SOFTBARRIER; | 576 | rq->cmd_flags |= REQ_SOFTBARRIER; |
507 | 577 | ||
508 | if (q->ordseq == 0) { | 578 | if (q->ordseq == 0) { |
509 | list_add(&rq->queuelist, &q->queue_head); | 579 | list_add(&rq->queuelist, &q->queue_head); |
510 | break; | 580 | break; |
511 | } | 581 | } |
512 | 582 | ||
513 | ordseq = blk_ordered_req_seq(rq); | 583 | ordseq = blk_ordered_req_seq(rq); |
514 | 584 | ||
515 | list_for_each(pos, &q->queue_head) { | 585 | list_for_each(pos, &q->queue_head) { |
516 | struct request *pos_rq = list_entry_rq(pos); | 586 | struct request *pos_rq = list_entry_rq(pos); |
517 | if (ordseq <= blk_ordered_req_seq(pos_rq)) | 587 | if (ordseq <= blk_ordered_req_seq(pos_rq)) |
518 | break; | 588 | break; |
519 | } | 589 | } |
520 | 590 | ||
521 | list_add_tail(&rq->queuelist, pos); | 591 | list_add_tail(&rq->queuelist, pos); |
522 | /* | 592 | /* |
523 | * most requeues happen because of a busy condition, don't | 593 | * most requeues happen because of a busy condition, don't |
524 | * force unplug of the queue for that case. | 594 | * force unplug of the queue for that case. |
525 | */ | 595 | */ |
526 | unplug_it = 0; | 596 | unplug_it = 0; |
527 | break; | 597 | break; |
528 | 598 | ||
529 | default: | 599 | default: |
530 | printk(KERN_ERR "%s: bad insertion point %d\n", | 600 | printk(KERN_ERR "%s: bad insertion point %d\n", |
531 | __FUNCTION__, where); | 601 | __FUNCTION__, where); |
532 | BUG(); | 602 | BUG(); |
533 | } | 603 | } |
534 | 604 | ||
535 | if (unplug_it && blk_queue_plugged(q)) { | 605 | if (unplug_it && blk_queue_plugged(q)) { |
536 | int nrq = q->rq.count[READ] + q->rq.count[WRITE] | 606 | int nrq = q->rq.count[READ] + q->rq.count[WRITE] |
537 | - q->in_flight; | 607 | - q->in_flight; |
538 | 608 | ||
539 | if (nrq >= q->unplug_thresh) | 609 | if (nrq >= q->unplug_thresh) |
540 | __generic_unplug_device(q); | 610 | __generic_unplug_device(q); |
541 | } | 611 | } |
542 | } | 612 | } |
543 | 613 | ||
544 | void __elv_add_request(request_queue_t *q, struct request *rq, int where, | 614 | void __elv_add_request(request_queue_t *q, struct request *rq, int where, |
545 | int plug) | 615 | int plug) |
546 | { | 616 | { |
547 | if (q->ordcolor) | 617 | if (q->ordcolor) |
548 | rq->cmd_flags |= REQ_ORDERED_COLOR; | 618 | rq->cmd_flags |= REQ_ORDERED_COLOR; |
549 | 619 | ||
550 | if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { | 620 | if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { |
551 | /* | 621 | /* |
552 | * toggle ordered color | 622 | * toggle ordered color |
553 | */ | 623 | */ |
554 | if (blk_barrier_rq(rq)) | 624 | if (blk_barrier_rq(rq)) |
555 | q->ordcolor ^= 1; | 625 | q->ordcolor ^= 1; |
556 | 626 | ||
557 | /* | 627 | /* |
558 | * barriers implicitly indicate back insertion | 628 | * barriers implicitly indicate back insertion |
559 | */ | 629 | */ |
560 | if (where == ELEVATOR_INSERT_SORT) | 630 | if (where == ELEVATOR_INSERT_SORT) |
561 | where = ELEVATOR_INSERT_BACK; | 631 | where = ELEVATOR_INSERT_BACK; |
562 | 632 | ||
563 | /* | 633 | /* |
564 | * this request is scheduling boundary, update | 634 | * this request is scheduling boundary, update |
565 | * end_sector | 635 | * end_sector |
566 | */ | 636 | */ |
567 | if (blk_fs_request(rq)) { | 637 | if (blk_fs_request(rq)) { |
568 | q->end_sector = rq_end_sector(rq); | 638 | q->end_sector = rq_end_sector(rq); |
569 | q->boundary_rq = rq; | 639 | q->boundary_rq = rq; |
570 | } | 640 | } |
571 | } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) | 641 | } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) |
572 | where = ELEVATOR_INSERT_BACK; | 642 | where = ELEVATOR_INSERT_BACK; |
573 | 643 | ||
574 | if (plug) | 644 | if (plug) |
575 | blk_plug_device(q); | 645 | blk_plug_device(q); |
576 | 646 | ||
577 | elv_insert(q, rq, where); | 647 | elv_insert(q, rq, where); |
578 | } | 648 | } |
579 | 649 | ||
650 | EXPORT_SYMBOL(__elv_add_request); | ||
651 | |||
580 | void elv_add_request(request_queue_t *q, struct request *rq, int where, | 652 | void elv_add_request(request_queue_t *q, struct request *rq, int where, |
581 | int plug) | 653 | int plug) |
582 | { | 654 | { |
583 | unsigned long flags; | 655 | unsigned long flags; |
584 | 656 | ||
585 | spin_lock_irqsave(q->queue_lock, flags); | 657 | spin_lock_irqsave(q->queue_lock, flags); |
586 | __elv_add_request(q, rq, where, plug); | 658 | __elv_add_request(q, rq, where, plug); |
587 | spin_unlock_irqrestore(q->queue_lock, flags); | 659 | spin_unlock_irqrestore(q->queue_lock, flags); |
588 | } | 660 | } |
589 | 661 | ||
662 | EXPORT_SYMBOL(elv_add_request); | ||
663 | |||
590 | static inline struct request *__elv_next_request(request_queue_t *q) | 664 | static inline struct request *__elv_next_request(request_queue_t *q) |
591 | { | 665 | { |
592 | struct request *rq; | 666 | struct request *rq; |
593 | 667 | ||
594 | while (1) { | 668 | while (1) { |
595 | while (!list_empty(&q->queue_head)) { | 669 | while (!list_empty(&q->queue_head)) { |
596 | rq = list_entry_rq(q->queue_head.next); | 670 | rq = list_entry_rq(q->queue_head.next); |
597 | if (blk_do_ordered(q, &rq)) | 671 | if (blk_do_ordered(q, &rq)) |
598 | return rq; | 672 | return rq; |
599 | } | 673 | } |
600 | 674 | ||
601 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) | 675 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) |
602 | return NULL; | 676 | return NULL; |
603 | } | 677 | } |
604 | } | 678 | } |
605 | 679 | ||
606 | struct request *elv_next_request(request_queue_t *q) | 680 | struct request *elv_next_request(request_queue_t *q) |
607 | { | 681 | { |
608 | struct request *rq; | 682 | struct request *rq; |
609 | int ret; | 683 | int ret; |
610 | 684 | ||
611 | while ((rq = __elv_next_request(q)) != NULL) { | 685 | while ((rq = __elv_next_request(q)) != NULL) { |
612 | if (!(rq->cmd_flags & REQ_STARTED)) { | 686 | if (!(rq->cmd_flags & REQ_STARTED)) { |
613 | elevator_t *e = q->elevator; | 687 | elevator_t *e = q->elevator; |
614 | 688 | ||
615 | /* | 689 | /* |
616 | * This is the first time the device driver | 690 | * This is the first time the device driver |
617 | * sees this request (possibly after | 691 | * sees this request (possibly after |
618 | * requeueing). Notify IO scheduler. | 692 | * requeueing). Notify IO scheduler. |
619 | */ | 693 | */ |
620 | if (blk_sorted_rq(rq) && | 694 | if (blk_sorted_rq(rq) && |
621 | e->ops->elevator_activate_req_fn) | 695 | e->ops->elevator_activate_req_fn) |
622 | e->ops->elevator_activate_req_fn(q, rq); | 696 | e->ops->elevator_activate_req_fn(q, rq); |
623 | 697 | ||
624 | /* | 698 | /* |
625 | * just mark as started even if we don't start | 699 | * just mark as started even if we don't start |
626 | * it, a request that has been delayed should | 700 | * it, a request that has been delayed should |
627 | * not be passed by new incoming requests | 701 | * not be passed by new incoming requests |
628 | */ | 702 | */ |
629 | rq->cmd_flags |= REQ_STARTED; | 703 | rq->cmd_flags |= REQ_STARTED; |
630 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); | 704 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); |
631 | } | 705 | } |
632 | 706 | ||
633 | if (!q->boundary_rq || q->boundary_rq == rq) { | 707 | if (!q->boundary_rq || q->boundary_rq == rq) { |
634 | q->end_sector = rq_end_sector(rq); | 708 | q->end_sector = rq_end_sector(rq); |
635 | q->boundary_rq = NULL; | 709 | q->boundary_rq = NULL; |
636 | } | 710 | } |
637 | 711 | ||
638 | if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) | 712 | if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) |
639 | break; | 713 | break; |
640 | 714 | ||
641 | ret = q->prep_rq_fn(q, rq); | 715 | ret = q->prep_rq_fn(q, rq); |
642 | if (ret == BLKPREP_OK) { | 716 | if (ret == BLKPREP_OK) { |
643 | break; | 717 | break; |
644 | } else if (ret == BLKPREP_DEFER) { | 718 | } else if (ret == BLKPREP_DEFER) { |
645 | /* | 719 | /* |
646 | * the request may have been (partially) prepped. | 720 | * the request may have been (partially) prepped. |
647 | * we need to keep this request in the front to | 721 | * we need to keep this request in the front to |
648 | * avoid resource deadlock. REQ_STARTED will | 722 | * avoid resource deadlock. REQ_STARTED will |
649 | * prevent other fs requests from passing this one. | 723 | * prevent other fs requests from passing this one. |
650 | */ | 724 | */ |
651 | rq = NULL; | 725 | rq = NULL; |
652 | break; | 726 | break; |
653 | } else if (ret == BLKPREP_KILL) { | 727 | } else if (ret == BLKPREP_KILL) { |
654 | int nr_bytes = rq->hard_nr_sectors << 9; | 728 | int nr_bytes = rq->hard_nr_sectors << 9; |
655 | 729 | ||
656 | if (!nr_bytes) | 730 | if (!nr_bytes) |
657 | nr_bytes = rq->data_len; | 731 | nr_bytes = rq->data_len; |
658 | 732 | ||
659 | blkdev_dequeue_request(rq); | 733 | blkdev_dequeue_request(rq); |
660 | rq->cmd_flags |= REQ_QUIET; | 734 | rq->cmd_flags |= REQ_QUIET; |
661 | end_that_request_chunk(rq, 0, nr_bytes); | 735 | end_that_request_chunk(rq, 0, nr_bytes); |
662 | end_that_request_last(rq, 0); | 736 | end_that_request_last(rq, 0); |
663 | } else { | 737 | } else { |
664 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, | 738 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, |
665 | ret); | 739 | ret); |
666 | break; | 740 | break; |
667 | } | 741 | } |
668 | } | 742 | } |
669 | 743 | ||
670 | return rq; | 744 | return rq; |
671 | } | 745 | } |
672 | 746 | ||
747 | EXPORT_SYMBOL(elv_next_request); | ||
748 | |||
673 | void elv_dequeue_request(request_queue_t *q, struct request *rq) | 749 | void elv_dequeue_request(request_queue_t *q, struct request *rq) |
674 | { | 750 | { |
675 | BUG_ON(list_empty(&rq->queuelist)); | 751 | BUG_ON(list_empty(&rq->queuelist)); |
676 | BUG_ON(ELV_ON_HASH(rq)); | 752 | BUG_ON(ELV_ON_HASH(rq)); |
677 | 753 | ||
678 | list_del_init(&rq->queuelist); | 754 | list_del_init(&rq->queuelist); |
679 | 755 | ||
680 | /* | 756 | /* |
681 | * the time frame between a request being removed from the lists | 757 | * the time frame between a request being removed from the lists |
682 | * and to it is freed is accounted as io that is in progress at | 758 | * and to it is freed is accounted as io that is in progress at |
683 | * the driver side. | 759 | * the driver side. |
684 | */ | 760 | */ |
685 | if (blk_account_rq(rq)) | 761 | if (blk_account_rq(rq)) |
686 | q->in_flight++; | 762 | q->in_flight++; |
687 | } | 763 | } |
688 | 764 | ||
765 | EXPORT_SYMBOL(elv_dequeue_request); | ||
766 | |||
689 | int elv_queue_empty(request_queue_t *q) | 767 | int elv_queue_empty(request_queue_t *q) |
690 | { | 768 | { |
691 | elevator_t *e = q->elevator; | 769 | elevator_t *e = q->elevator; |
692 | 770 | ||
693 | if (!list_empty(&q->queue_head)) | 771 | if (!list_empty(&q->queue_head)) |
694 | return 0; | 772 | return 0; |
695 | 773 | ||
696 | if (e->ops->elevator_queue_empty_fn) | 774 | if (e->ops->elevator_queue_empty_fn) |
697 | return e->ops->elevator_queue_empty_fn(q); | 775 | return e->ops->elevator_queue_empty_fn(q); |
698 | 776 | ||
699 | return 1; | 777 | return 1; |
700 | } | 778 | } |
701 | 779 | ||
780 | EXPORT_SYMBOL(elv_queue_empty); | ||
781 | |||
702 | struct request *elv_latter_request(request_queue_t *q, struct request *rq) | 782 | struct request *elv_latter_request(request_queue_t *q, struct request *rq) |
703 | { | 783 | { |
704 | elevator_t *e = q->elevator; | 784 | elevator_t *e = q->elevator; |
705 | 785 | ||
706 | if (e->ops->elevator_latter_req_fn) | 786 | if (e->ops->elevator_latter_req_fn) |
707 | return e->ops->elevator_latter_req_fn(q, rq); | 787 | return e->ops->elevator_latter_req_fn(q, rq); |
708 | return NULL; | 788 | return NULL; |
709 | } | 789 | } |
710 | 790 | ||
711 | struct request *elv_former_request(request_queue_t *q, struct request *rq) | 791 | struct request *elv_former_request(request_queue_t *q, struct request *rq) |
712 | { | 792 | { |
713 | elevator_t *e = q->elevator; | 793 | elevator_t *e = q->elevator; |
714 | 794 | ||
715 | if (e->ops->elevator_former_req_fn) | 795 | if (e->ops->elevator_former_req_fn) |
716 | return e->ops->elevator_former_req_fn(q, rq); | 796 | return e->ops->elevator_former_req_fn(q, rq); |
717 | return NULL; | 797 | return NULL; |
718 | } | 798 | } |
719 | 799 | ||
720 | int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, | 800 | int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, |
721 | gfp_t gfp_mask) | 801 | gfp_t gfp_mask) |
722 | { | 802 | { |
723 | elevator_t *e = q->elevator; | 803 | elevator_t *e = q->elevator; |
724 | 804 | ||
725 | if (e->ops->elevator_set_req_fn) | 805 | if (e->ops->elevator_set_req_fn) |
726 | return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); | 806 | return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); |
727 | 807 | ||
728 | rq->elevator_private = NULL; | 808 | rq->elevator_private = NULL; |
729 | return 0; | 809 | return 0; |
730 | } | 810 | } |
731 | 811 | ||
732 | void elv_put_request(request_queue_t *q, struct request *rq) | 812 | void elv_put_request(request_queue_t *q, struct request *rq) |
733 | { | 813 | { |
734 | elevator_t *e = q->elevator; | 814 | elevator_t *e = q->elevator; |
735 | 815 | ||
736 | if (e->ops->elevator_put_req_fn) | 816 | if (e->ops->elevator_put_req_fn) |
737 | e->ops->elevator_put_req_fn(q, rq); | 817 | e->ops->elevator_put_req_fn(q, rq); |
738 | } | 818 | } |
739 | 819 | ||
740 | int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) | 820 | int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) |
741 | { | 821 | { |
742 | elevator_t *e = q->elevator; | 822 | elevator_t *e = q->elevator; |
743 | 823 | ||
744 | if (e->ops->elevator_may_queue_fn) | 824 | if (e->ops->elevator_may_queue_fn) |
745 | return e->ops->elevator_may_queue_fn(q, rw, bio); | 825 | return e->ops->elevator_may_queue_fn(q, rw, bio); |
746 | 826 | ||
747 | return ELV_MQUEUE_MAY; | 827 | return ELV_MQUEUE_MAY; |
748 | } | 828 | } |
749 | 829 | ||
750 | void elv_completed_request(request_queue_t *q, struct request *rq) | 830 | void elv_completed_request(request_queue_t *q, struct request *rq) |
751 | { | 831 | { |
752 | elevator_t *e = q->elevator; | 832 | elevator_t *e = q->elevator; |
753 | 833 | ||
754 | /* | 834 | /* |
755 | * request is released from the driver, io must be done | 835 | * request is released from the driver, io must be done |
756 | */ | 836 | */ |
757 | if (blk_account_rq(rq)) { | 837 | if (blk_account_rq(rq)) { |
758 | q->in_flight--; | 838 | q->in_flight--; |
759 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) | 839 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) |
760 | e->ops->elevator_completed_req_fn(q, rq); | 840 | e->ops->elevator_completed_req_fn(q, rq); |
761 | } | 841 | } |
762 | 842 | ||
763 | /* | 843 | /* |
764 | * Check if the queue is waiting for fs requests to be | 844 | * Check if the queue is waiting for fs requests to be |
765 | * drained for flush sequence. | 845 | * drained for flush sequence. |
766 | */ | 846 | */ |
767 | if (unlikely(q->ordseq)) { | 847 | if (unlikely(q->ordseq)) { |
768 | struct request *first_rq = list_entry_rq(q->queue_head.next); | 848 | struct request *first_rq = list_entry_rq(q->queue_head.next); |
769 | if (q->in_flight == 0 && | 849 | if (q->in_flight == 0 && |
770 | blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && | 850 | blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && |
771 | blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { | 851 | blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { |
772 | blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); | 852 | blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); |
773 | q->request_fn(q); | 853 | q->request_fn(q); |
774 | } | 854 | } |
775 | } | 855 | } |
776 | } | 856 | } |
777 | 857 | ||
778 | #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) | 858 | #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) |
779 | 859 | ||
780 | static ssize_t | 860 | static ssize_t |
781 | elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | 861 | elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) |
782 | { | 862 | { |
783 | elevator_t *e = container_of(kobj, elevator_t, kobj); | 863 | elevator_t *e = container_of(kobj, elevator_t, kobj); |
784 | struct elv_fs_entry *entry = to_elv(attr); | 864 | struct elv_fs_entry *entry = to_elv(attr); |
785 | ssize_t error; | 865 | ssize_t error; |
786 | 866 | ||
787 | if (!entry->show) | 867 | if (!entry->show) |
788 | return -EIO; | 868 | return -EIO; |
789 | 869 | ||
790 | mutex_lock(&e->sysfs_lock); | 870 | mutex_lock(&e->sysfs_lock); |
791 | error = e->ops ? entry->show(e, page) : -ENOENT; | 871 | error = e->ops ? entry->show(e, page) : -ENOENT; |
792 | mutex_unlock(&e->sysfs_lock); | 872 | mutex_unlock(&e->sysfs_lock); |
793 | return error; | 873 | return error; |
794 | } | 874 | } |
795 | 875 | ||
796 | static ssize_t | 876 | static ssize_t |
797 | elv_attr_store(struct kobject *kobj, struct attribute *attr, | 877 | elv_attr_store(struct kobject *kobj, struct attribute *attr, |
798 | const char *page, size_t length) | 878 | const char *page, size_t length) |
799 | { | 879 | { |
800 | elevator_t *e = container_of(kobj, elevator_t, kobj); | 880 | elevator_t *e = container_of(kobj, elevator_t, kobj); |
801 | struct elv_fs_entry *entry = to_elv(attr); | 881 | struct elv_fs_entry *entry = to_elv(attr); |
802 | ssize_t error; | 882 | ssize_t error; |
803 | 883 | ||
804 | if (!entry->store) | 884 | if (!entry->store) |
805 | return -EIO; | 885 | return -EIO; |
806 | 886 | ||
807 | mutex_lock(&e->sysfs_lock); | 887 | mutex_lock(&e->sysfs_lock); |
808 | error = e->ops ? entry->store(e, page, length) : -ENOENT; | 888 | error = e->ops ? entry->store(e, page, length) : -ENOENT; |
809 | mutex_unlock(&e->sysfs_lock); | 889 | mutex_unlock(&e->sysfs_lock); |
810 | return error; | 890 | return error; |
811 | } | 891 | } |
812 | 892 | ||
813 | static struct sysfs_ops elv_sysfs_ops = { | 893 | static struct sysfs_ops elv_sysfs_ops = { |
814 | .show = elv_attr_show, | 894 | .show = elv_attr_show, |
815 | .store = elv_attr_store, | 895 | .store = elv_attr_store, |
816 | }; | 896 | }; |
817 | 897 | ||
818 | static struct kobj_type elv_ktype = { | 898 | static struct kobj_type elv_ktype = { |
819 | .sysfs_ops = &elv_sysfs_ops, | 899 | .sysfs_ops = &elv_sysfs_ops, |
820 | .release = elevator_release, | 900 | .release = elevator_release, |
821 | }; | 901 | }; |
822 | 902 | ||
823 | int elv_register_queue(struct request_queue *q) | 903 | int elv_register_queue(struct request_queue *q) |
824 | { | 904 | { |
825 | elevator_t *e = q->elevator; | 905 | elevator_t *e = q->elevator; |
826 | int error; | 906 | int error; |
827 | 907 | ||
828 | e->kobj.parent = &q->kobj; | 908 | e->kobj.parent = &q->kobj; |
829 | 909 | ||
830 | error = kobject_add(&e->kobj); | 910 | error = kobject_add(&e->kobj); |
831 | if (!error) { | 911 | if (!error) { |
832 | struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; | 912 | struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; |
833 | if (attr) { | 913 | if (attr) { |
834 | while (attr->attr.name) { | 914 | while (attr->attr.name) { |
835 | if (sysfs_create_file(&e->kobj, &attr->attr)) | 915 | if (sysfs_create_file(&e->kobj, &attr->attr)) |
836 | break; | 916 | break; |
837 | attr++; | 917 | attr++; |
838 | } | 918 | } |
839 | } | 919 | } |
840 | kobject_uevent(&e->kobj, KOBJ_ADD); | 920 | kobject_uevent(&e->kobj, KOBJ_ADD); |
841 | } | 921 | } |
842 | return error; | 922 | return error; |
843 | } | 923 | } |
844 | 924 | ||
845 | static void __elv_unregister_queue(elevator_t *e) | 925 | static void __elv_unregister_queue(elevator_t *e) |
846 | { | 926 | { |
847 | kobject_uevent(&e->kobj, KOBJ_REMOVE); | 927 | kobject_uevent(&e->kobj, KOBJ_REMOVE); |
848 | kobject_del(&e->kobj); | 928 | kobject_del(&e->kobj); |
849 | } | 929 | } |
850 | 930 | ||
851 | void elv_unregister_queue(struct request_queue *q) | 931 | void elv_unregister_queue(struct request_queue *q) |
852 | { | 932 | { |
853 | if (q) | 933 | if (q) |
854 | __elv_unregister_queue(q->elevator); | 934 | __elv_unregister_queue(q->elevator); |
855 | } | 935 | } |
856 | 936 | ||
857 | int elv_register(struct elevator_type *e) | 937 | int elv_register(struct elevator_type *e) |
858 | { | 938 | { |
859 | spin_lock_irq(&elv_list_lock); | 939 | spin_lock_irq(&elv_list_lock); |
860 | BUG_ON(elevator_find(e->elevator_name)); | 940 | BUG_ON(elevator_find(e->elevator_name)); |
861 | list_add_tail(&e->list, &elv_list); | 941 | list_add_tail(&e->list, &elv_list); |
862 | spin_unlock_irq(&elv_list_lock); | 942 | spin_unlock_irq(&elv_list_lock); |
863 | 943 | ||
864 | printk(KERN_INFO "io scheduler %s registered", e->elevator_name); | 944 | printk(KERN_INFO "io scheduler %s registered", e->elevator_name); |
865 | if (!strcmp(e->elevator_name, chosen_elevator) || | 945 | if (!strcmp(e->elevator_name, chosen_elevator) || |
866 | (!*chosen_elevator && | 946 | (!*chosen_elevator && |
867 | !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) | 947 | !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) |
868 | printk(" (default)"); | 948 | printk(" (default)"); |
869 | printk("\n"); | 949 | printk("\n"); |
870 | return 0; | 950 | return 0; |
871 | } | 951 | } |
872 | EXPORT_SYMBOL_GPL(elv_register); | 952 | EXPORT_SYMBOL_GPL(elv_register); |
873 | 953 | ||
874 | void elv_unregister(struct elevator_type *e) | 954 | void elv_unregister(struct elevator_type *e) |
875 | { | 955 | { |
876 | struct task_struct *g, *p; | 956 | struct task_struct *g, *p; |
877 | 957 | ||
878 | /* | 958 | /* |
879 | * Iterate every thread in the process to remove the io contexts. | 959 | * Iterate every thread in the process to remove the io contexts. |
880 | */ | 960 | */ |
881 | if (e->ops.trim) { | 961 | if (e->ops.trim) { |
882 | read_lock(&tasklist_lock); | 962 | read_lock(&tasklist_lock); |
883 | do_each_thread(g, p) { | 963 | do_each_thread(g, p) { |
884 | task_lock(p); | 964 | task_lock(p); |
885 | if (p->io_context) | 965 | if (p->io_context) |
886 | e->ops.trim(p->io_context); | 966 | e->ops.trim(p->io_context); |
887 | task_unlock(p); | 967 | task_unlock(p); |
888 | } while_each_thread(g, p); | 968 | } while_each_thread(g, p); |
889 | read_unlock(&tasklist_lock); | 969 | read_unlock(&tasklist_lock); |
890 | } | 970 | } |
891 | 971 | ||
892 | spin_lock_irq(&elv_list_lock); | 972 | spin_lock_irq(&elv_list_lock); |
893 | list_del_init(&e->list); | 973 | list_del_init(&e->list); |
894 | spin_unlock_irq(&elv_list_lock); | 974 | spin_unlock_irq(&elv_list_lock); |
895 | } | 975 | } |
896 | EXPORT_SYMBOL_GPL(elv_unregister); | 976 | EXPORT_SYMBOL_GPL(elv_unregister); |
897 | 977 | ||
898 | /* | 978 | /* |
899 | * switch to new_e io scheduler. be careful not to introduce deadlocks - | 979 | * switch to new_e io scheduler. be careful not to introduce deadlocks - |
900 | * we don't free the old io scheduler, before we have allocated what we | 980 | * we don't free the old io scheduler, before we have allocated what we |
901 | * need for the new one. this way we have a chance of going back to the old | 981 | * need for the new one. this way we have a chance of going back to the old |
902 | * one, if the new one fails init for some reason. | 982 | * one, if the new one fails init for some reason. |
903 | */ | 983 | */ |
904 | static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) | 984 | static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) |
905 | { | 985 | { |
906 | elevator_t *old_elevator, *e; | 986 | elevator_t *old_elevator, *e; |
907 | void *data; | 987 | void *data; |
908 | 988 | ||
909 | /* | 989 | /* |
910 | * Allocate new elevator | 990 | * Allocate new elevator |
911 | */ | 991 | */ |
912 | e = elevator_alloc(new_e); | 992 | e = elevator_alloc(new_e); |
913 | if (!e) | 993 | if (!e) |
914 | return 0; | 994 | return 0; |
915 | 995 | ||
916 | data = elevator_init_queue(q, e); | 996 | data = elevator_init_queue(q, e); |
917 | if (!data) { | 997 | if (!data) { |
918 | kobject_put(&e->kobj); | 998 | kobject_put(&e->kobj); |
919 | return 0; | 999 | return 0; |
920 | } | 1000 | } |
921 | 1001 | ||
922 | /* | 1002 | /* |
923 | * Turn on BYPASS and drain all requests w/ elevator private data | 1003 | * Turn on BYPASS and drain all requests w/ elevator private data |
924 | */ | 1004 | */ |
925 | spin_lock_irq(q->queue_lock); | 1005 | spin_lock_irq(q->queue_lock); |
926 | 1006 | ||
927 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 1007 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
928 | 1008 | ||
929 | elv_drain_elevator(q); | 1009 | elv_drain_elevator(q); |
930 | 1010 | ||
931 | while (q->rq.elvpriv) { | 1011 | while (q->rq.elvpriv) { |
932 | blk_remove_plug(q); | 1012 | blk_remove_plug(q); |
933 | q->request_fn(q); | 1013 | q->request_fn(q); |
934 | spin_unlock_irq(q->queue_lock); | 1014 | spin_unlock_irq(q->queue_lock); |
935 | msleep(10); | 1015 | msleep(10); |
936 | spin_lock_irq(q->queue_lock); | 1016 | spin_lock_irq(q->queue_lock); |
937 | elv_drain_elevator(q); | 1017 | elv_drain_elevator(q); |
938 | } | 1018 | } |
939 | 1019 | ||
940 | /* | 1020 | /* |
941 | * Remember old elevator. | 1021 | * Remember old elevator. |
942 | */ | 1022 | */ |
943 | old_elevator = q->elevator; | 1023 | old_elevator = q->elevator; |
944 | 1024 | ||
945 | /* | 1025 | /* |
946 | * attach and start new elevator | 1026 | * attach and start new elevator |
947 | */ | 1027 | */ |
948 | elevator_attach(q, e, data); | 1028 | elevator_attach(q, e, data); |
949 | 1029 | ||
950 | spin_unlock_irq(q->queue_lock); | 1030 | spin_unlock_irq(q->queue_lock); |
951 | 1031 | ||
952 | __elv_unregister_queue(old_elevator); | 1032 | __elv_unregister_queue(old_elevator); |
953 | 1033 | ||
954 | if (elv_register_queue(q)) | 1034 | if (elv_register_queue(q)) |
955 | goto fail_register; | 1035 | goto fail_register; |
956 | 1036 | ||
957 | /* | 1037 | /* |
958 | * finally exit old elevator and turn off BYPASS. | 1038 | * finally exit old elevator and turn off BYPASS. |
959 | */ | 1039 | */ |
960 | elevator_exit(old_elevator); | 1040 | elevator_exit(old_elevator); |
961 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 1041 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
962 | return 1; | 1042 | return 1; |
963 | 1043 | ||
964 | fail_register: | 1044 | fail_register: |
965 | /* | 1045 | /* |
966 | * switch failed, exit the new io scheduler and reattach the old | 1046 | * switch failed, exit the new io scheduler and reattach the old |
967 | * one again (along with re-adding the sysfs dir) | 1047 | * one again (along with re-adding the sysfs dir) |
968 | */ | 1048 | */ |
969 | elevator_exit(e); | 1049 | elevator_exit(e); |
970 | q->elevator = old_elevator; | 1050 | q->elevator = old_elevator; |
971 | elv_register_queue(q); | 1051 | elv_register_queue(q); |
972 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 1052 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
973 | return 0; | 1053 | return 0; |
974 | } | 1054 | } |
975 | 1055 | ||
976 | ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) | 1056 | ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) |
977 | { | 1057 | { |
978 | char elevator_name[ELV_NAME_MAX]; | 1058 | char elevator_name[ELV_NAME_MAX]; |
979 | size_t len; | 1059 | size_t len; |
980 | struct elevator_type *e; | 1060 | struct elevator_type *e; |
981 | 1061 | ||
982 | elevator_name[sizeof(elevator_name) - 1] = '\0'; | 1062 | elevator_name[sizeof(elevator_name) - 1] = '\0'; |
983 | strncpy(elevator_name, name, sizeof(elevator_name) - 1); | 1063 | strncpy(elevator_name, name, sizeof(elevator_name) - 1); |
984 | len = strlen(elevator_name); | 1064 | len = strlen(elevator_name); |
985 | 1065 | ||
986 | if (len && elevator_name[len - 1] == '\n') | 1066 | if (len && elevator_name[len - 1] == '\n') |
987 | elevator_name[len - 1] = '\0'; | 1067 | elevator_name[len - 1] = '\0'; |
988 | 1068 | ||
989 | e = elevator_get(elevator_name); | 1069 | e = elevator_get(elevator_name); |
990 | if (!e) { | 1070 | if (!e) { |
991 | printk(KERN_ERR "elevator: type %s not found\n", elevator_name); | 1071 | printk(KERN_ERR "elevator: type %s not found\n", elevator_name); |
992 | return -EINVAL; | 1072 | return -EINVAL; |
993 | } | 1073 | } |
994 | 1074 | ||
995 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { | 1075 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { |
996 | elevator_put(e); | 1076 | elevator_put(e); |
997 | return count; | 1077 | return count; |
998 | } | 1078 | } |
999 | 1079 | ||
1000 | if (!elevator_switch(q, e)) | 1080 | if (!elevator_switch(q, e)) |
1001 | printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); | 1081 | printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); |
1002 | return count; | 1082 | return count; |
1003 | } | 1083 | } |
1004 | 1084 | ||
1005 | ssize_t elv_iosched_show(request_queue_t *q, char *name) | 1085 | ssize_t elv_iosched_show(request_queue_t *q, char *name) |
1006 | { | 1086 | { |
1007 | elevator_t *e = q->elevator; | 1087 | elevator_t *e = q->elevator; |
1008 | struct elevator_type *elv = e->elevator_type; | 1088 | struct elevator_type *elv = e->elevator_type; |
1009 | struct list_head *entry; | 1089 | struct list_head *entry; |
1010 | int len = 0; | 1090 | int len = 0; |
1011 | 1091 | ||
1012 | spin_lock_irq(q->queue_lock); | 1092 | spin_lock_irq(q->queue_lock); |
1013 | list_for_each(entry, &elv_list) { | 1093 | list_for_each(entry, &elv_list) { |
1014 | struct elevator_type *__e; | 1094 | struct elevator_type *__e; |
1015 | 1095 | ||
1016 | __e = list_entry(entry, struct elevator_type, list); | 1096 | __e = list_entry(entry, struct elevator_type, list); |
1017 | if (!strcmp(elv->elevator_name, __e->elevator_name)) | 1097 | if (!strcmp(elv->elevator_name, __e->elevator_name)) |
1018 | len += sprintf(name+len, "[%s] ", elv->elevator_name); | 1098 | len += sprintf(name+len, "[%s] ", elv->elevator_name); |
1019 | else | 1099 | else |
1020 | len += sprintf(name+len, "%s ", __e->elevator_name); | 1100 | len += sprintf(name+len, "%s ", __e->elevator_name); |
1021 | } | 1101 | } |
1022 | spin_unlock_irq(q->queue_lock); | 1102 | spin_unlock_irq(q->queue_lock); |
1023 | 1103 | ||
1024 | len += sprintf(len+name, "\n"); | 1104 | len += sprintf(len+name, "\n"); |
1025 | return len; | 1105 | return len; |
1026 | } | 1106 | } |
1027 | 1107 | ||
1028 | EXPORT_SYMBOL(elv_dispatch_sort); | 1108 | struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) |
1029 | EXPORT_SYMBOL(elv_add_request); | 1109 | { |
1030 | EXPORT_SYMBOL(__elv_add_request); | 1110 | struct rb_node *rbprev = rb_prev(&rq->rb_node); |
1031 | EXPORT_SYMBOL(elv_next_request); | 1111 | |
1032 | EXPORT_SYMBOL(elv_dequeue_request); | 1112 | if (rbprev) |
1033 | EXPORT_SYMBOL(elv_queue_empty); | 1113 | return rb_entry_rq(rbprev); |
1034 | EXPORT_SYMBOL(elevator_exit); | 1114 | |
1035 | EXPORT_SYMBOL(elevator_init); | 1115 | return NULL; |
1116 | } | ||
1117 | |||
1118 | EXPORT_SYMBOL(elv_rb_former_request); | ||
1119 | |||
1120 | struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) | ||
1121 | { | ||
1122 | struct rb_node *rbnext = rb_next(&rq->rb_node); | ||
1123 | |||
1124 | if (rbnext) | ||
1125 | return rb_entry_rq(rbnext); | ||
1126 | |||
1127 | return NULL; | ||
1128 | } | ||
1129 | |||
1130 | EXPORT_SYMBOL(elv_rb_latter_request); | ||
1036 | 1131 |
block/ll_rw_blk.c
1 | /* | 1 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics | 3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics |
4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> | 5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> |
6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 | 6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 |
7 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 | 7 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 |
8 | */ | 8 | */ |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * This handles all read/write requests to block devices | 11 | * This handles all read/write requests to block devices |
12 | */ | 12 | */ |
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/backing-dev.h> | 15 | #include <linux/backing-dev.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/highmem.h> | 18 | #include <linux/highmem.h> |
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/kernel_stat.h> | 20 | #include <linux/kernel_stat.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ | 23 | #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ |
24 | #include <linux/completion.h> | 24 | #include <linux/completion.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/swap.h> | 26 | #include <linux/swap.h> |
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/interrupt.h> | 28 | #include <linux/interrupt.h> |
29 | #include <linux/cpu.h> | 29 | #include <linux/cpu.h> |
30 | #include <linux/blktrace_api.h> | 30 | #include <linux/blktrace_api.h> |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * for max sense size | 33 | * for max sense size |
34 | */ | 34 | */ |
35 | #include <scsi/scsi_cmnd.h> | 35 | #include <scsi/scsi_cmnd.h> |
36 | 36 | ||
37 | static void blk_unplug_work(void *data); | 37 | static void blk_unplug_work(void *data); |
38 | static void blk_unplug_timeout(unsigned long data); | 38 | static void blk_unplug_timeout(unsigned long data); |
39 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); | 39 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); |
40 | static void init_request_from_bio(struct request *req, struct bio *bio); | 40 | static void init_request_from_bio(struct request *req, struct bio *bio); |
41 | static int __make_request(request_queue_t *q, struct bio *bio); | 41 | static int __make_request(request_queue_t *q, struct bio *bio); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * For the allocated request tables | 44 | * For the allocated request tables |
45 | */ | 45 | */ |
46 | static kmem_cache_t *request_cachep; | 46 | static kmem_cache_t *request_cachep; |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * For queue allocation | 49 | * For queue allocation |
50 | */ | 50 | */ |
51 | static kmem_cache_t *requestq_cachep; | 51 | static kmem_cache_t *requestq_cachep; |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * For io context allocations | 54 | * For io context allocations |
55 | */ | 55 | */ |
56 | static kmem_cache_t *iocontext_cachep; | 56 | static kmem_cache_t *iocontext_cachep; |
57 | 57 | ||
58 | static wait_queue_head_t congestion_wqh[2] = { | 58 | static wait_queue_head_t congestion_wqh[2] = { |
59 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), | 59 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), |
60 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) | 60 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) |
61 | }; | 61 | }; |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * Controlling structure to kblockd | 64 | * Controlling structure to kblockd |
65 | */ | 65 | */ |
66 | static struct workqueue_struct *kblockd_workqueue; | 66 | static struct workqueue_struct *kblockd_workqueue; |
67 | 67 | ||
68 | unsigned long blk_max_low_pfn, blk_max_pfn; | 68 | unsigned long blk_max_low_pfn, blk_max_pfn; |
69 | 69 | ||
70 | EXPORT_SYMBOL(blk_max_low_pfn); | 70 | EXPORT_SYMBOL(blk_max_low_pfn); |
71 | EXPORT_SYMBOL(blk_max_pfn); | 71 | EXPORT_SYMBOL(blk_max_pfn); |
72 | 72 | ||
73 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | 73 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); |
74 | 74 | ||
75 | /* Amount of time in which a process may batch requests */ | 75 | /* Amount of time in which a process may batch requests */ |
76 | #define BLK_BATCH_TIME (HZ/50UL) | 76 | #define BLK_BATCH_TIME (HZ/50UL) |
77 | 77 | ||
78 | /* Number of requests a "batching" process may submit */ | 78 | /* Number of requests a "batching" process may submit */ |
79 | #define BLK_BATCH_REQ 32 | 79 | #define BLK_BATCH_REQ 32 |
80 | 80 | ||
81 | /* | 81 | /* |
82 | * Return the threshold (number of used requests) at which the queue is | 82 | * Return the threshold (number of used requests) at which the queue is |
83 | * considered to be congested. It include a little hysteresis to keep the | 83 | * considered to be congested. It include a little hysteresis to keep the |
84 | * context switch rate down. | 84 | * context switch rate down. |
85 | */ | 85 | */ |
86 | static inline int queue_congestion_on_threshold(struct request_queue *q) | 86 | static inline int queue_congestion_on_threshold(struct request_queue *q) |
87 | { | 87 | { |
88 | return q->nr_congestion_on; | 88 | return q->nr_congestion_on; |
89 | } | 89 | } |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * The threshold at which a queue is considered to be uncongested | 92 | * The threshold at which a queue is considered to be uncongested |
93 | */ | 93 | */ |
94 | static inline int queue_congestion_off_threshold(struct request_queue *q) | 94 | static inline int queue_congestion_off_threshold(struct request_queue *q) |
95 | { | 95 | { |
96 | return q->nr_congestion_off; | 96 | return q->nr_congestion_off; |
97 | } | 97 | } |
98 | 98 | ||
99 | static void blk_queue_congestion_threshold(struct request_queue *q) | 99 | static void blk_queue_congestion_threshold(struct request_queue *q) |
100 | { | 100 | { |
101 | int nr; | 101 | int nr; |
102 | 102 | ||
103 | nr = q->nr_requests - (q->nr_requests / 8) + 1; | 103 | nr = q->nr_requests - (q->nr_requests / 8) + 1; |
104 | if (nr > q->nr_requests) | 104 | if (nr > q->nr_requests) |
105 | nr = q->nr_requests; | 105 | nr = q->nr_requests; |
106 | q->nr_congestion_on = nr; | 106 | q->nr_congestion_on = nr; |
107 | 107 | ||
108 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; | 108 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; |
109 | if (nr < 1) | 109 | if (nr < 1) |
110 | nr = 1; | 110 | nr = 1; |
111 | q->nr_congestion_off = nr; | 111 | q->nr_congestion_off = nr; |
112 | } | 112 | } |
113 | 113 | ||
114 | /* | 114 | /* |
115 | * A queue has just exitted congestion. Note this in the global counter of | 115 | * A queue has just exitted congestion. Note this in the global counter of |
116 | * congested queues, and wake up anyone who was waiting for requests to be | 116 | * congested queues, and wake up anyone who was waiting for requests to be |
117 | * put back. | 117 | * put back. |
118 | */ | 118 | */ |
119 | static void clear_queue_congested(request_queue_t *q, int rw) | 119 | static void clear_queue_congested(request_queue_t *q, int rw) |
120 | { | 120 | { |
121 | enum bdi_state bit; | 121 | enum bdi_state bit; |
122 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | 122 | wait_queue_head_t *wqh = &congestion_wqh[rw]; |
123 | 123 | ||
124 | bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; | 124 | bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; |
125 | clear_bit(bit, &q->backing_dev_info.state); | 125 | clear_bit(bit, &q->backing_dev_info.state); |
126 | smp_mb__after_clear_bit(); | 126 | smp_mb__after_clear_bit(); |
127 | if (waitqueue_active(wqh)) | 127 | if (waitqueue_active(wqh)) |
128 | wake_up(wqh); | 128 | wake_up(wqh); |
129 | } | 129 | } |
130 | 130 | ||
131 | /* | 131 | /* |
132 | * A queue has just entered congestion. Flag that in the queue's VM-visible | 132 | * A queue has just entered congestion. Flag that in the queue's VM-visible |
133 | * state flags and increment the global gounter of congested queues. | 133 | * state flags and increment the global gounter of congested queues. |
134 | */ | 134 | */ |
135 | static void set_queue_congested(request_queue_t *q, int rw) | 135 | static void set_queue_congested(request_queue_t *q, int rw) |
136 | { | 136 | { |
137 | enum bdi_state bit; | 137 | enum bdi_state bit; |
138 | 138 | ||
139 | bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; | 139 | bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; |
140 | set_bit(bit, &q->backing_dev_info.state); | 140 | set_bit(bit, &q->backing_dev_info.state); |
141 | } | 141 | } |
142 | 142 | ||
143 | /** | 143 | /** |
144 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info | 144 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info |
145 | * @bdev: device | 145 | * @bdev: device |
146 | * | 146 | * |
147 | * Locates the passed device's request queue and returns the address of its | 147 | * Locates the passed device's request queue and returns the address of its |
148 | * backing_dev_info | 148 | * backing_dev_info |
149 | * | 149 | * |
150 | * Will return NULL if the request queue cannot be located. | 150 | * Will return NULL if the request queue cannot be located. |
151 | */ | 151 | */ |
152 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) | 152 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) |
153 | { | 153 | { |
154 | struct backing_dev_info *ret = NULL; | 154 | struct backing_dev_info *ret = NULL; |
155 | request_queue_t *q = bdev_get_queue(bdev); | 155 | request_queue_t *q = bdev_get_queue(bdev); |
156 | 156 | ||
157 | if (q) | 157 | if (q) |
158 | ret = &q->backing_dev_info; | 158 | ret = &q->backing_dev_info; |
159 | return ret; | 159 | return ret; |
160 | } | 160 | } |
161 | 161 | ||
162 | EXPORT_SYMBOL(blk_get_backing_dev_info); | 162 | EXPORT_SYMBOL(blk_get_backing_dev_info); |
163 | 163 | ||
164 | void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data) | 164 | void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data) |
165 | { | 165 | { |
166 | q->activity_fn = fn; | 166 | q->activity_fn = fn; |
167 | q->activity_data = data; | 167 | q->activity_data = data; |
168 | } | 168 | } |
169 | 169 | ||
170 | EXPORT_SYMBOL(blk_queue_activity_fn); | 170 | EXPORT_SYMBOL(blk_queue_activity_fn); |
171 | 171 | ||
172 | /** | 172 | /** |
173 | * blk_queue_prep_rq - set a prepare_request function for queue | 173 | * blk_queue_prep_rq - set a prepare_request function for queue |
174 | * @q: queue | 174 | * @q: queue |
175 | * @pfn: prepare_request function | 175 | * @pfn: prepare_request function |
176 | * | 176 | * |
177 | * It's possible for a queue to register a prepare_request callback which | 177 | * It's possible for a queue to register a prepare_request callback which |
178 | * is invoked before the request is handed to the request_fn. The goal of | 178 | * is invoked before the request is handed to the request_fn. The goal of |
179 | * the function is to prepare a request for I/O, it can be used to build a | 179 | * the function is to prepare a request for I/O, it can be used to build a |
180 | * cdb from the request data for instance. | 180 | * cdb from the request data for instance. |
181 | * | 181 | * |
182 | */ | 182 | */ |
183 | void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn) | 183 | void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn) |
184 | { | 184 | { |
185 | q->prep_rq_fn = pfn; | 185 | q->prep_rq_fn = pfn; |
186 | } | 186 | } |
187 | 187 | ||
188 | EXPORT_SYMBOL(blk_queue_prep_rq); | 188 | EXPORT_SYMBOL(blk_queue_prep_rq); |
189 | 189 | ||
190 | /** | 190 | /** |
191 | * blk_queue_merge_bvec - set a merge_bvec function for queue | 191 | * blk_queue_merge_bvec - set a merge_bvec function for queue |
192 | * @q: queue | 192 | * @q: queue |
193 | * @mbfn: merge_bvec_fn | 193 | * @mbfn: merge_bvec_fn |
194 | * | 194 | * |
195 | * Usually queues have static limitations on the max sectors or segments that | 195 | * Usually queues have static limitations on the max sectors or segments that |
196 | * we can put in a request. Stacking drivers may have some settings that | 196 | * we can put in a request. Stacking drivers may have some settings that |
197 | * are dynamic, and thus we have to query the queue whether it is ok to | 197 | * are dynamic, and thus we have to query the queue whether it is ok to |
198 | * add a new bio_vec to a bio at a given offset or not. If the block device | 198 | * add a new bio_vec to a bio at a given offset or not. If the block device |
199 | * has such limitations, it needs to register a merge_bvec_fn to control | 199 | * has such limitations, it needs to register a merge_bvec_fn to control |
200 | * the size of bio's sent to it. Note that a block device *must* allow a | 200 | * the size of bio's sent to it. Note that a block device *must* allow a |
201 | * single page to be added to an empty bio. The block device driver may want | 201 | * single page to be added to an empty bio. The block device driver may want |
202 | * to use the bio_split() function to deal with these bio's. By default | 202 | * to use the bio_split() function to deal with these bio's. By default |
203 | * no merge_bvec_fn is defined for a queue, and only the fixed limits are | 203 | * no merge_bvec_fn is defined for a queue, and only the fixed limits are |
204 | * honored. | 204 | * honored. |
205 | */ | 205 | */ |
206 | void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) | 206 | void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) |
207 | { | 207 | { |
208 | q->merge_bvec_fn = mbfn; | 208 | q->merge_bvec_fn = mbfn; |
209 | } | 209 | } |
210 | 210 | ||
211 | EXPORT_SYMBOL(blk_queue_merge_bvec); | 211 | EXPORT_SYMBOL(blk_queue_merge_bvec); |
212 | 212 | ||
213 | void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) | 213 | void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) |
214 | { | 214 | { |
215 | q->softirq_done_fn = fn; | 215 | q->softirq_done_fn = fn; |
216 | } | 216 | } |
217 | 217 | ||
218 | EXPORT_SYMBOL(blk_queue_softirq_done); | 218 | EXPORT_SYMBOL(blk_queue_softirq_done); |
219 | 219 | ||
220 | /** | 220 | /** |
221 | * blk_queue_make_request - define an alternate make_request function for a device | 221 | * blk_queue_make_request - define an alternate make_request function for a device |
222 | * @q: the request queue for the device to be affected | 222 | * @q: the request queue for the device to be affected |
223 | * @mfn: the alternate make_request function | 223 | * @mfn: the alternate make_request function |
224 | * | 224 | * |
225 | * Description: | 225 | * Description: |
226 | * The normal way for &struct bios to be passed to a device | 226 | * The normal way for &struct bios to be passed to a device |
227 | * driver is for them to be collected into requests on a request | 227 | * driver is for them to be collected into requests on a request |
228 | * queue, and then to allow the device driver to select requests | 228 | * queue, and then to allow the device driver to select requests |
229 | * off that queue when it is ready. This works well for many block | 229 | * off that queue when it is ready. This works well for many block |
230 | * devices. However some block devices (typically virtual devices | 230 | * devices. However some block devices (typically virtual devices |
231 | * such as md or lvm) do not benefit from the processing on the | 231 | * such as md or lvm) do not benefit from the processing on the |
232 | * request queue, and are served best by having the requests passed | 232 | * request queue, and are served best by having the requests passed |
233 | * directly to them. This can be achieved by providing a function | 233 | * directly to them. This can be achieved by providing a function |
234 | * to blk_queue_make_request(). | 234 | * to blk_queue_make_request(). |
235 | * | 235 | * |
236 | * Caveat: | 236 | * Caveat: |
237 | * The driver that does this *must* be able to deal appropriately | 237 | * The driver that does this *must* be able to deal appropriately |
238 | * with buffers in "highmemory". This can be accomplished by either calling | 238 | * with buffers in "highmemory". This can be accomplished by either calling |
239 | * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling | 239 | * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling |
240 | * blk_queue_bounce() to create a buffer in normal memory. | 240 | * blk_queue_bounce() to create a buffer in normal memory. |
241 | **/ | 241 | **/ |
242 | void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) | 242 | void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) |
243 | { | 243 | { |
244 | /* | 244 | /* |
245 | * set defaults | 245 | * set defaults |
246 | */ | 246 | */ |
247 | q->nr_requests = BLKDEV_MAX_RQ; | 247 | q->nr_requests = BLKDEV_MAX_RQ; |
248 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); | 248 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); |
249 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); | 249 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); |
250 | q->make_request_fn = mfn; | 250 | q->make_request_fn = mfn; |
251 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 251 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
252 | q->backing_dev_info.state = 0; | 252 | q->backing_dev_info.state = 0; |
253 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; | 253 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; |
254 | blk_queue_max_sectors(q, SAFE_MAX_SECTORS); | 254 | blk_queue_max_sectors(q, SAFE_MAX_SECTORS); |
255 | blk_queue_hardsect_size(q, 512); | 255 | blk_queue_hardsect_size(q, 512); |
256 | blk_queue_dma_alignment(q, 511); | 256 | blk_queue_dma_alignment(q, 511); |
257 | blk_queue_congestion_threshold(q); | 257 | blk_queue_congestion_threshold(q); |
258 | q->nr_batching = BLK_BATCH_REQ; | 258 | q->nr_batching = BLK_BATCH_REQ; |
259 | 259 | ||
260 | q->unplug_thresh = 4; /* hmm */ | 260 | q->unplug_thresh = 4; /* hmm */ |
261 | q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ | 261 | q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ |
262 | if (q->unplug_delay == 0) | 262 | if (q->unplug_delay == 0) |
263 | q->unplug_delay = 1; | 263 | q->unplug_delay = 1; |
264 | 264 | ||
265 | INIT_WORK(&q->unplug_work, blk_unplug_work, q); | 265 | INIT_WORK(&q->unplug_work, blk_unplug_work, q); |
266 | 266 | ||
267 | q->unplug_timer.function = blk_unplug_timeout; | 267 | q->unplug_timer.function = blk_unplug_timeout; |
268 | q->unplug_timer.data = (unsigned long)q; | 268 | q->unplug_timer.data = (unsigned long)q; |
269 | 269 | ||
270 | /* | 270 | /* |
271 | * by default assume old behaviour and bounce for any highmem page | 271 | * by default assume old behaviour and bounce for any highmem page |
272 | */ | 272 | */ |
273 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | 273 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); |
274 | 274 | ||
275 | blk_queue_activity_fn(q, NULL, NULL); | 275 | blk_queue_activity_fn(q, NULL, NULL); |
276 | } | 276 | } |
277 | 277 | ||
278 | EXPORT_SYMBOL(blk_queue_make_request); | 278 | EXPORT_SYMBOL(blk_queue_make_request); |
279 | 279 | ||
280 | static inline void rq_init(request_queue_t *q, struct request *rq) | 280 | static inline void rq_init(request_queue_t *q, struct request *rq) |
281 | { | 281 | { |
282 | INIT_LIST_HEAD(&rq->queuelist); | 282 | INIT_LIST_HEAD(&rq->queuelist); |
283 | INIT_LIST_HEAD(&rq->donelist); | 283 | INIT_LIST_HEAD(&rq->donelist); |
284 | INIT_HLIST_NODE(&rq->hash); | ||
285 | 284 | ||
286 | rq->errors = 0; | 285 | rq->errors = 0; |
287 | rq->rq_status = RQ_ACTIVE; | 286 | rq->rq_status = RQ_ACTIVE; |
288 | rq->bio = rq->biotail = NULL; | 287 | rq->bio = rq->biotail = NULL; |
288 | INIT_HLIST_NODE(&rq->hash); | ||
289 | RB_CLEAR_NODE(&rq->rb_node); | ||
289 | rq->ioprio = 0; | 290 | rq->ioprio = 0; |
290 | rq->buffer = NULL; | 291 | rq->buffer = NULL; |
291 | rq->ref_count = 1; | 292 | rq->ref_count = 1; |
292 | rq->q = q; | 293 | rq->q = q; |
293 | rq->waiting = NULL; | 294 | rq->waiting = NULL; |
294 | rq->special = NULL; | 295 | rq->special = NULL; |
295 | rq->data_len = 0; | 296 | rq->data_len = 0; |
296 | rq->data = NULL; | 297 | rq->data = NULL; |
297 | rq->nr_phys_segments = 0; | 298 | rq->nr_phys_segments = 0; |
298 | rq->sense = NULL; | 299 | rq->sense = NULL; |
299 | rq->end_io = NULL; | 300 | rq->end_io = NULL; |
300 | rq->end_io_data = NULL; | 301 | rq->end_io_data = NULL; |
301 | rq->completion_data = NULL; | 302 | rq->completion_data = NULL; |
302 | } | 303 | } |
303 | 304 | ||
304 | /** | 305 | /** |
305 | * blk_queue_ordered - does this queue support ordered writes | 306 | * blk_queue_ordered - does this queue support ordered writes |
306 | * @q: the request queue | 307 | * @q: the request queue |
307 | * @ordered: one of QUEUE_ORDERED_* | 308 | * @ordered: one of QUEUE_ORDERED_* |
308 | * @prepare_flush_fn: rq setup helper for cache flush ordered writes | 309 | * @prepare_flush_fn: rq setup helper for cache flush ordered writes |
309 | * | 310 | * |
310 | * Description: | 311 | * Description: |
311 | * For journalled file systems, doing ordered writes on a commit | 312 | * For journalled file systems, doing ordered writes on a commit |
312 | * block instead of explicitly doing wait_on_buffer (which is bad | 313 | * block instead of explicitly doing wait_on_buffer (which is bad |
313 | * for performance) can be a big win. Block drivers supporting this | 314 | * for performance) can be a big win. Block drivers supporting this |
314 | * feature should call this function and indicate so. | 315 | * feature should call this function and indicate so. |
315 | * | 316 | * |
316 | **/ | 317 | **/ |
317 | int blk_queue_ordered(request_queue_t *q, unsigned ordered, | 318 | int blk_queue_ordered(request_queue_t *q, unsigned ordered, |
318 | prepare_flush_fn *prepare_flush_fn) | 319 | prepare_flush_fn *prepare_flush_fn) |
319 | { | 320 | { |
320 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && | 321 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && |
321 | prepare_flush_fn == NULL) { | 322 | prepare_flush_fn == NULL) { |
322 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); | 323 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); |
323 | return -EINVAL; | 324 | return -EINVAL; |
324 | } | 325 | } |
325 | 326 | ||
326 | if (ordered != QUEUE_ORDERED_NONE && | 327 | if (ordered != QUEUE_ORDERED_NONE && |
327 | ordered != QUEUE_ORDERED_DRAIN && | 328 | ordered != QUEUE_ORDERED_DRAIN && |
328 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && | 329 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && |
329 | ordered != QUEUE_ORDERED_DRAIN_FUA && | 330 | ordered != QUEUE_ORDERED_DRAIN_FUA && |
330 | ordered != QUEUE_ORDERED_TAG && | 331 | ordered != QUEUE_ORDERED_TAG && |
331 | ordered != QUEUE_ORDERED_TAG_FLUSH && | 332 | ordered != QUEUE_ORDERED_TAG_FLUSH && |
332 | ordered != QUEUE_ORDERED_TAG_FUA) { | 333 | ordered != QUEUE_ORDERED_TAG_FUA) { |
333 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); | 334 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); |
334 | return -EINVAL; | 335 | return -EINVAL; |
335 | } | 336 | } |
336 | 337 | ||
337 | q->ordered = ordered; | 338 | q->ordered = ordered; |
338 | q->next_ordered = ordered; | 339 | q->next_ordered = ordered; |
339 | q->prepare_flush_fn = prepare_flush_fn; | 340 | q->prepare_flush_fn = prepare_flush_fn; |
340 | 341 | ||
341 | return 0; | 342 | return 0; |
342 | } | 343 | } |
343 | 344 | ||
344 | EXPORT_SYMBOL(blk_queue_ordered); | 345 | EXPORT_SYMBOL(blk_queue_ordered); |
345 | 346 | ||
346 | /** | 347 | /** |
347 | * blk_queue_issue_flush_fn - set function for issuing a flush | 348 | * blk_queue_issue_flush_fn - set function for issuing a flush |
348 | * @q: the request queue | 349 | * @q: the request queue |
349 | * @iff: the function to be called issuing the flush | 350 | * @iff: the function to be called issuing the flush |
350 | * | 351 | * |
351 | * Description: | 352 | * Description: |
352 | * If a driver supports issuing a flush command, the support is notified | 353 | * If a driver supports issuing a flush command, the support is notified |
353 | * to the block layer by defining it through this call. | 354 | * to the block layer by defining it through this call. |
354 | * | 355 | * |
355 | **/ | 356 | **/ |
356 | void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff) | 357 | void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff) |
357 | { | 358 | { |
358 | q->issue_flush_fn = iff; | 359 | q->issue_flush_fn = iff; |
359 | } | 360 | } |
360 | 361 | ||
361 | EXPORT_SYMBOL(blk_queue_issue_flush_fn); | 362 | EXPORT_SYMBOL(blk_queue_issue_flush_fn); |
362 | 363 | ||
363 | /* | 364 | /* |
364 | * Cache flushing for ordered writes handling | 365 | * Cache flushing for ordered writes handling |
365 | */ | 366 | */ |
366 | inline unsigned blk_ordered_cur_seq(request_queue_t *q) | 367 | inline unsigned blk_ordered_cur_seq(request_queue_t *q) |
367 | { | 368 | { |
368 | if (!q->ordseq) | 369 | if (!q->ordseq) |
369 | return 0; | 370 | return 0; |
370 | return 1 << ffz(q->ordseq); | 371 | return 1 << ffz(q->ordseq); |
371 | } | 372 | } |
372 | 373 | ||
373 | unsigned blk_ordered_req_seq(struct request *rq) | 374 | unsigned blk_ordered_req_seq(struct request *rq) |
374 | { | 375 | { |
375 | request_queue_t *q = rq->q; | 376 | request_queue_t *q = rq->q; |
376 | 377 | ||
377 | BUG_ON(q->ordseq == 0); | 378 | BUG_ON(q->ordseq == 0); |
378 | 379 | ||
379 | if (rq == &q->pre_flush_rq) | 380 | if (rq == &q->pre_flush_rq) |
380 | return QUEUE_ORDSEQ_PREFLUSH; | 381 | return QUEUE_ORDSEQ_PREFLUSH; |
381 | if (rq == &q->bar_rq) | 382 | if (rq == &q->bar_rq) |
382 | return QUEUE_ORDSEQ_BAR; | 383 | return QUEUE_ORDSEQ_BAR; |
383 | if (rq == &q->post_flush_rq) | 384 | if (rq == &q->post_flush_rq) |
384 | return QUEUE_ORDSEQ_POSTFLUSH; | 385 | return QUEUE_ORDSEQ_POSTFLUSH; |
385 | 386 | ||
386 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == | 387 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == |
387 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) | 388 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) |
388 | return QUEUE_ORDSEQ_DRAIN; | 389 | return QUEUE_ORDSEQ_DRAIN; |
389 | else | 390 | else |
390 | return QUEUE_ORDSEQ_DONE; | 391 | return QUEUE_ORDSEQ_DONE; |
391 | } | 392 | } |
392 | 393 | ||
393 | void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) | 394 | void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) |
394 | { | 395 | { |
395 | struct request *rq; | 396 | struct request *rq; |
396 | int uptodate; | 397 | int uptodate; |
397 | 398 | ||
398 | if (error && !q->orderr) | 399 | if (error && !q->orderr) |
399 | q->orderr = error; | 400 | q->orderr = error; |
400 | 401 | ||
401 | BUG_ON(q->ordseq & seq); | 402 | BUG_ON(q->ordseq & seq); |
402 | q->ordseq |= seq; | 403 | q->ordseq |= seq; |
403 | 404 | ||
404 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) | 405 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) |
405 | return; | 406 | return; |
406 | 407 | ||
407 | /* | 408 | /* |
408 | * Okay, sequence complete. | 409 | * Okay, sequence complete. |
409 | */ | 410 | */ |
410 | rq = q->orig_bar_rq; | 411 | rq = q->orig_bar_rq; |
411 | uptodate = q->orderr ? q->orderr : 1; | 412 | uptodate = q->orderr ? q->orderr : 1; |
412 | 413 | ||
413 | q->ordseq = 0; | 414 | q->ordseq = 0; |
414 | 415 | ||
415 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); | 416 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); |
416 | end_that_request_last(rq, uptodate); | 417 | end_that_request_last(rq, uptodate); |
417 | } | 418 | } |
418 | 419 | ||
419 | static void pre_flush_end_io(struct request *rq, int error) | 420 | static void pre_flush_end_io(struct request *rq, int error) |
420 | { | 421 | { |
421 | elv_completed_request(rq->q, rq); | 422 | elv_completed_request(rq->q, rq); |
422 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); | 423 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); |
423 | } | 424 | } |
424 | 425 | ||
425 | static void bar_end_io(struct request *rq, int error) | 426 | static void bar_end_io(struct request *rq, int error) |
426 | { | 427 | { |
427 | elv_completed_request(rq->q, rq); | 428 | elv_completed_request(rq->q, rq); |
428 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); | 429 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); |
429 | } | 430 | } |
430 | 431 | ||
431 | static void post_flush_end_io(struct request *rq, int error) | 432 | static void post_flush_end_io(struct request *rq, int error) |
432 | { | 433 | { |
433 | elv_completed_request(rq->q, rq); | 434 | elv_completed_request(rq->q, rq); |
434 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); | 435 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); |
435 | } | 436 | } |
436 | 437 | ||
437 | static void queue_flush(request_queue_t *q, unsigned which) | 438 | static void queue_flush(request_queue_t *q, unsigned which) |
438 | { | 439 | { |
439 | struct request *rq; | 440 | struct request *rq; |
440 | rq_end_io_fn *end_io; | 441 | rq_end_io_fn *end_io; |
441 | 442 | ||
442 | if (which == QUEUE_ORDERED_PREFLUSH) { | 443 | if (which == QUEUE_ORDERED_PREFLUSH) { |
443 | rq = &q->pre_flush_rq; | 444 | rq = &q->pre_flush_rq; |
444 | end_io = pre_flush_end_io; | 445 | end_io = pre_flush_end_io; |
445 | } else { | 446 | } else { |
446 | rq = &q->post_flush_rq; | 447 | rq = &q->post_flush_rq; |
447 | end_io = post_flush_end_io; | 448 | end_io = post_flush_end_io; |
448 | } | 449 | } |
449 | 450 | ||
450 | rq->cmd_flags = REQ_HARDBARRIER; | 451 | rq->cmd_flags = REQ_HARDBARRIER; |
451 | rq_init(q, rq); | 452 | rq_init(q, rq); |
452 | rq->elevator_private = NULL; | 453 | rq->elevator_private = NULL; |
453 | rq->rq_disk = q->bar_rq.rq_disk; | 454 | rq->rq_disk = q->bar_rq.rq_disk; |
454 | rq->rl = NULL; | 455 | rq->rl = NULL; |
455 | rq->end_io = end_io; | 456 | rq->end_io = end_io; |
456 | q->prepare_flush_fn(q, rq); | 457 | q->prepare_flush_fn(q, rq); |
457 | 458 | ||
458 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | 459 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
459 | } | 460 | } |
460 | 461 | ||
461 | static inline struct request *start_ordered(request_queue_t *q, | 462 | static inline struct request *start_ordered(request_queue_t *q, |
462 | struct request *rq) | 463 | struct request *rq) |
463 | { | 464 | { |
464 | q->bi_size = 0; | 465 | q->bi_size = 0; |
465 | q->orderr = 0; | 466 | q->orderr = 0; |
466 | q->ordered = q->next_ordered; | 467 | q->ordered = q->next_ordered; |
467 | q->ordseq |= QUEUE_ORDSEQ_STARTED; | 468 | q->ordseq |= QUEUE_ORDSEQ_STARTED; |
468 | 469 | ||
469 | /* | 470 | /* |
470 | * Prep proxy barrier request. | 471 | * Prep proxy barrier request. |
471 | */ | 472 | */ |
472 | blkdev_dequeue_request(rq); | 473 | blkdev_dequeue_request(rq); |
473 | q->orig_bar_rq = rq; | 474 | q->orig_bar_rq = rq; |
474 | rq = &q->bar_rq; | 475 | rq = &q->bar_rq; |
475 | rq->cmd_flags = 0; | 476 | rq->cmd_flags = 0; |
476 | rq_init(q, rq); | 477 | rq_init(q, rq); |
477 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) | 478 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) |
478 | rq->cmd_flags |= REQ_RW; | 479 | rq->cmd_flags |= REQ_RW; |
479 | rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; | 480 | rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; |
480 | rq->elevator_private = NULL; | 481 | rq->elevator_private = NULL; |
481 | rq->rl = NULL; | 482 | rq->rl = NULL; |
482 | init_request_from_bio(rq, q->orig_bar_rq->bio); | 483 | init_request_from_bio(rq, q->orig_bar_rq->bio); |
483 | rq->end_io = bar_end_io; | 484 | rq->end_io = bar_end_io; |
484 | 485 | ||
485 | /* | 486 | /* |
486 | * Queue ordered sequence. As we stack them at the head, we | 487 | * Queue ordered sequence. As we stack them at the head, we |
487 | * need to queue in reverse order. Note that we rely on that | 488 | * need to queue in reverse order. Note that we rely on that |
488 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | 489 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs |
489 | * request gets inbetween ordered sequence. | 490 | * request gets inbetween ordered sequence. |
490 | */ | 491 | */ |
491 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) | 492 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) |
492 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); | 493 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); |
493 | else | 494 | else |
494 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; | 495 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; |
495 | 496 | ||
496 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | 497 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
497 | 498 | ||
498 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { | 499 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { |
499 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); | 500 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); |
500 | rq = &q->pre_flush_rq; | 501 | rq = &q->pre_flush_rq; |
501 | } else | 502 | } else |
502 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; | 503 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; |
503 | 504 | ||
504 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) | 505 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) |
505 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; | 506 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; |
506 | else | 507 | else |
507 | rq = NULL; | 508 | rq = NULL; |
508 | 509 | ||
509 | return rq; | 510 | return rq; |
510 | } | 511 | } |
511 | 512 | ||
512 | int blk_do_ordered(request_queue_t *q, struct request **rqp) | 513 | int blk_do_ordered(request_queue_t *q, struct request **rqp) |
513 | { | 514 | { |
514 | struct request *rq = *rqp; | 515 | struct request *rq = *rqp; |
515 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); | 516 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); |
516 | 517 | ||
517 | if (!q->ordseq) { | 518 | if (!q->ordseq) { |
518 | if (!is_barrier) | 519 | if (!is_barrier) |
519 | return 1; | 520 | return 1; |
520 | 521 | ||
521 | if (q->next_ordered != QUEUE_ORDERED_NONE) { | 522 | if (q->next_ordered != QUEUE_ORDERED_NONE) { |
522 | *rqp = start_ordered(q, rq); | 523 | *rqp = start_ordered(q, rq); |
523 | return 1; | 524 | return 1; |
524 | } else { | 525 | } else { |
525 | /* | 526 | /* |
526 | * This can happen when the queue switches to | 527 | * This can happen when the queue switches to |
527 | * ORDERED_NONE while this request is on it. | 528 | * ORDERED_NONE while this request is on it. |
528 | */ | 529 | */ |
529 | blkdev_dequeue_request(rq); | 530 | blkdev_dequeue_request(rq); |
530 | end_that_request_first(rq, -EOPNOTSUPP, | 531 | end_that_request_first(rq, -EOPNOTSUPP, |
531 | rq->hard_nr_sectors); | 532 | rq->hard_nr_sectors); |
532 | end_that_request_last(rq, -EOPNOTSUPP); | 533 | end_that_request_last(rq, -EOPNOTSUPP); |
533 | *rqp = NULL; | 534 | *rqp = NULL; |
534 | return 0; | 535 | return 0; |
535 | } | 536 | } |
536 | } | 537 | } |
537 | 538 | ||
538 | /* | 539 | /* |
539 | * Ordered sequence in progress | 540 | * Ordered sequence in progress |
540 | */ | 541 | */ |
541 | 542 | ||
542 | /* Special requests are not subject to ordering rules. */ | 543 | /* Special requests are not subject to ordering rules. */ |
543 | if (!blk_fs_request(rq) && | 544 | if (!blk_fs_request(rq) && |
544 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) | 545 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) |
545 | return 1; | 546 | return 1; |
546 | 547 | ||
547 | if (q->ordered & QUEUE_ORDERED_TAG) { | 548 | if (q->ordered & QUEUE_ORDERED_TAG) { |
548 | /* Ordered by tag. Blocking the next barrier is enough. */ | 549 | /* Ordered by tag. Blocking the next barrier is enough. */ |
549 | if (is_barrier && rq != &q->bar_rq) | 550 | if (is_barrier && rq != &q->bar_rq) |
550 | *rqp = NULL; | 551 | *rqp = NULL; |
551 | } else { | 552 | } else { |
552 | /* Ordered by draining. Wait for turn. */ | 553 | /* Ordered by draining. Wait for turn. */ |
553 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); | 554 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); |
554 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) | 555 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) |
555 | *rqp = NULL; | 556 | *rqp = NULL; |
556 | } | 557 | } |
557 | 558 | ||
558 | return 1; | 559 | return 1; |
559 | } | 560 | } |
560 | 561 | ||
561 | static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) | 562 | static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) |
562 | { | 563 | { |
563 | request_queue_t *q = bio->bi_private; | 564 | request_queue_t *q = bio->bi_private; |
564 | struct bio_vec *bvec; | 565 | struct bio_vec *bvec; |
565 | int i; | 566 | int i; |
566 | 567 | ||
567 | /* | 568 | /* |
568 | * This is dry run, restore bio_sector and size. We'll finish | 569 | * This is dry run, restore bio_sector and size. We'll finish |
569 | * this request again with the original bi_end_io after an | 570 | * this request again with the original bi_end_io after an |
570 | * error occurs or post flush is complete. | 571 | * error occurs or post flush is complete. |
571 | */ | 572 | */ |
572 | q->bi_size += bytes; | 573 | q->bi_size += bytes; |
573 | 574 | ||
574 | if (bio->bi_size) | 575 | if (bio->bi_size) |
575 | return 1; | 576 | return 1; |
576 | 577 | ||
577 | /* Rewind bvec's */ | 578 | /* Rewind bvec's */ |
578 | bio->bi_idx = 0; | 579 | bio->bi_idx = 0; |
579 | bio_for_each_segment(bvec, bio, i) { | 580 | bio_for_each_segment(bvec, bio, i) { |
580 | bvec->bv_len += bvec->bv_offset; | 581 | bvec->bv_len += bvec->bv_offset; |
581 | bvec->bv_offset = 0; | 582 | bvec->bv_offset = 0; |
582 | } | 583 | } |
583 | 584 | ||
584 | /* Reset bio */ | 585 | /* Reset bio */ |
585 | set_bit(BIO_UPTODATE, &bio->bi_flags); | 586 | set_bit(BIO_UPTODATE, &bio->bi_flags); |
586 | bio->bi_size = q->bi_size; | 587 | bio->bi_size = q->bi_size; |
587 | bio->bi_sector -= (q->bi_size >> 9); | 588 | bio->bi_sector -= (q->bi_size >> 9); |
588 | q->bi_size = 0; | 589 | q->bi_size = 0; |
589 | 590 | ||
590 | return 0; | 591 | return 0; |
591 | } | 592 | } |
592 | 593 | ||
593 | static inline int ordered_bio_endio(struct request *rq, struct bio *bio, | 594 | static inline int ordered_bio_endio(struct request *rq, struct bio *bio, |
594 | unsigned int nbytes, int error) | 595 | unsigned int nbytes, int error) |
595 | { | 596 | { |
596 | request_queue_t *q = rq->q; | 597 | request_queue_t *q = rq->q; |
597 | bio_end_io_t *endio; | 598 | bio_end_io_t *endio; |
598 | void *private; | 599 | void *private; |
599 | 600 | ||
600 | if (&q->bar_rq != rq) | 601 | if (&q->bar_rq != rq) |
601 | return 0; | 602 | return 0; |
602 | 603 | ||
603 | /* | 604 | /* |
604 | * Okay, this is the barrier request in progress, dry finish it. | 605 | * Okay, this is the barrier request in progress, dry finish it. |
605 | */ | 606 | */ |
606 | if (error && !q->orderr) | 607 | if (error && !q->orderr) |
607 | q->orderr = error; | 608 | q->orderr = error; |
608 | 609 | ||
609 | endio = bio->bi_end_io; | 610 | endio = bio->bi_end_io; |
610 | private = bio->bi_private; | 611 | private = bio->bi_private; |
611 | bio->bi_end_io = flush_dry_bio_endio; | 612 | bio->bi_end_io = flush_dry_bio_endio; |
612 | bio->bi_private = q; | 613 | bio->bi_private = q; |
613 | 614 | ||
614 | bio_endio(bio, nbytes, error); | 615 | bio_endio(bio, nbytes, error); |
615 | 616 | ||
616 | bio->bi_end_io = endio; | 617 | bio->bi_end_io = endio; |
617 | bio->bi_private = private; | 618 | bio->bi_private = private; |
618 | 619 | ||
619 | return 1; | 620 | return 1; |
620 | } | 621 | } |
621 | 622 | ||
622 | /** | 623 | /** |
623 | * blk_queue_bounce_limit - set bounce buffer limit for queue | 624 | * blk_queue_bounce_limit - set bounce buffer limit for queue |
624 | * @q: the request queue for the device | 625 | * @q: the request queue for the device |
625 | * @dma_addr: bus address limit | 626 | * @dma_addr: bus address limit |
626 | * | 627 | * |
627 | * Description: | 628 | * Description: |
628 | * Different hardware can have different requirements as to what pages | 629 | * Different hardware can have different requirements as to what pages |
629 | * it can do I/O directly to. A low level driver can call | 630 | * it can do I/O directly to. A low level driver can call |
630 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce | 631 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce |
631 | * buffers for doing I/O to pages residing above @page. | 632 | * buffers for doing I/O to pages residing above @page. |
632 | **/ | 633 | **/ |
633 | void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) | 634 | void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) |
634 | { | 635 | { |
635 | unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; | 636 | unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; |
636 | int dma = 0; | 637 | int dma = 0; |
637 | 638 | ||
638 | q->bounce_gfp = GFP_NOIO; | 639 | q->bounce_gfp = GFP_NOIO; |
639 | #if BITS_PER_LONG == 64 | 640 | #if BITS_PER_LONG == 64 |
640 | /* Assume anything <= 4GB can be handled by IOMMU. | 641 | /* Assume anything <= 4GB can be handled by IOMMU. |
641 | Actually some IOMMUs can handle everything, but I don't | 642 | Actually some IOMMUs can handle everything, but I don't |
642 | know of a way to test this here. */ | 643 | know of a way to test this here. */ |
643 | if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) | 644 | if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) |
644 | dma = 1; | 645 | dma = 1; |
645 | q->bounce_pfn = max_low_pfn; | 646 | q->bounce_pfn = max_low_pfn; |
646 | #else | 647 | #else |
647 | if (bounce_pfn < blk_max_low_pfn) | 648 | if (bounce_pfn < blk_max_low_pfn) |
648 | dma = 1; | 649 | dma = 1; |
649 | q->bounce_pfn = bounce_pfn; | 650 | q->bounce_pfn = bounce_pfn; |
650 | #endif | 651 | #endif |
651 | if (dma) { | 652 | if (dma) { |
652 | init_emergency_isa_pool(); | 653 | init_emergency_isa_pool(); |
653 | q->bounce_gfp = GFP_NOIO | GFP_DMA; | 654 | q->bounce_gfp = GFP_NOIO | GFP_DMA; |
654 | q->bounce_pfn = bounce_pfn; | 655 | q->bounce_pfn = bounce_pfn; |
655 | } | 656 | } |
656 | } | 657 | } |
657 | 658 | ||
658 | EXPORT_SYMBOL(blk_queue_bounce_limit); | 659 | EXPORT_SYMBOL(blk_queue_bounce_limit); |
659 | 660 | ||
660 | /** | 661 | /** |
661 | * blk_queue_max_sectors - set max sectors for a request for this queue | 662 | * blk_queue_max_sectors - set max sectors for a request for this queue |
662 | * @q: the request queue for the device | 663 | * @q: the request queue for the device |
663 | * @max_sectors: max sectors in the usual 512b unit | 664 | * @max_sectors: max sectors in the usual 512b unit |
664 | * | 665 | * |
665 | * Description: | 666 | * Description: |
666 | * Enables a low level driver to set an upper limit on the size of | 667 | * Enables a low level driver to set an upper limit on the size of |
667 | * received requests. | 668 | * received requests. |
668 | **/ | 669 | **/ |
669 | void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors) | 670 | void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors) |
670 | { | 671 | { |
671 | if ((max_sectors << 9) < PAGE_CACHE_SIZE) { | 672 | if ((max_sectors << 9) < PAGE_CACHE_SIZE) { |
672 | max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); | 673 | max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); |
673 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); | 674 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); |
674 | } | 675 | } |
675 | 676 | ||
676 | if (BLK_DEF_MAX_SECTORS > max_sectors) | 677 | if (BLK_DEF_MAX_SECTORS > max_sectors) |
677 | q->max_hw_sectors = q->max_sectors = max_sectors; | 678 | q->max_hw_sectors = q->max_sectors = max_sectors; |
678 | else { | 679 | else { |
679 | q->max_sectors = BLK_DEF_MAX_SECTORS; | 680 | q->max_sectors = BLK_DEF_MAX_SECTORS; |
680 | q->max_hw_sectors = max_sectors; | 681 | q->max_hw_sectors = max_sectors; |
681 | } | 682 | } |
682 | } | 683 | } |
683 | 684 | ||
684 | EXPORT_SYMBOL(blk_queue_max_sectors); | 685 | EXPORT_SYMBOL(blk_queue_max_sectors); |
685 | 686 | ||
686 | /** | 687 | /** |
687 | * blk_queue_max_phys_segments - set max phys segments for a request for this queue | 688 | * blk_queue_max_phys_segments - set max phys segments for a request for this queue |
688 | * @q: the request queue for the device | 689 | * @q: the request queue for the device |
689 | * @max_segments: max number of segments | 690 | * @max_segments: max number of segments |
690 | * | 691 | * |
691 | * Description: | 692 | * Description: |
692 | * Enables a low level driver to set an upper limit on the number of | 693 | * Enables a low level driver to set an upper limit on the number of |
693 | * physical data segments in a request. This would be the largest sized | 694 | * physical data segments in a request. This would be the largest sized |
694 | * scatter list the driver could handle. | 695 | * scatter list the driver could handle. |
695 | **/ | 696 | **/ |
696 | void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments) | 697 | void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments) |
697 | { | 698 | { |
698 | if (!max_segments) { | 699 | if (!max_segments) { |
699 | max_segments = 1; | 700 | max_segments = 1; |
700 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); | 701 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); |
701 | } | 702 | } |
702 | 703 | ||
703 | q->max_phys_segments = max_segments; | 704 | q->max_phys_segments = max_segments; |
704 | } | 705 | } |
705 | 706 | ||
706 | EXPORT_SYMBOL(blk_queue_max_phys_segments); | 707 | EXPORT_SYMBOL(blk_queue_max_phys_segments); |
707 | 708 | ||
708 | /** | 709 | /** |
709 | * blk_queue_max_hw_segments - set max hw segments for a request for this queue | 710 | * blk_queue_max_hw_segments - set max hw segments for a request for this queue |
710 | * @q: the request queue for the device | 711 | * @q: the request queue for the device |
711 | * @max_segments: max number of segments | 712 | * @max_segments: max number of segments |
712 | * | 713 | * |
713 | * Description: | 714 | * Description: |
714 | * Enables a low level driver to set an upper limit on the number of | 715 | * Enables a low level driver to set an upper limit on the number of |
715 | * hw data segments in a request. This would be the largest number of | 716 | * hw data segments in a request. This would be the largest number of |
716 | * address/length pairs the host adapter can actually give as once | 717 | * address/length pairs the host adapter can actually give as once |
717 | * to the device. | 718 | * to the device. |
718 | **/ | 719 | **/ |
719 | void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments) | 720 | void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments) |
720 | { | 721 | { |
721 | if (!max_segments) { | 722 | if (!max_segments) { |
722 | max_segments = 1; | 723 | max_segments = 1; |
723 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); | 724 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); |
724 | } | 725 | } |
725 | 726 | ||
726 | q->max_hw_segments = max_segments; | 727 | q->max_hw_segments = max_segments; |
727 | } | 728 | } |
728 | 729 | ||
729 | EXPORT_SYMBOL(blk_queue_max_hw_segments); | 730 | EXPORT_SYMBOL(blk_queue_max_hw_segments); |
730 | 731 | ||
731 | /** | 732 | /** |
732 | * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg | 733 | * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg |
733 | * @q: the request queue for the device | 734 | * @q: the request queue for the device |
734 | * @max_size: max size of segment in bytes | 735 | * @max_size: max size of segment in bytes |
735 | * | 736 | * |
736 | * Description: | 737 | * Description: |
737 | * Enables a low level driver to set an upper limit on the size of a | 738 | * Enables a low level driver to set an upper limit on the size of a |
738 | * coalesced segment | 739 | * coalesced segment |
739 | **/ | 740 | **/ |
740 | void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size) | 741 | void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size) |
741 | { | 742 | { |
742 | if (max_size < PAGE_CACHE_SIZE) { | 743 | if (max_size < PAGE_CACHE_SIZE) { |
743 | max_size = PAGE_CACHE_SIZE; | 744 | max_size = PAGE_CACHE_SIZE; |
744 | printk("%s: set to minimum %d\n", __FUNCTION__, max_size); | 745 | printk("%s: set to minimum %d\n", __FUNCTION__, max_size); |
745 | } | 746 | } |
746 | 747 | ||
747 | q->max_segment_size = max_size; | 748 | q->max_segment_size = max_size; |
748 | } | 749 | } |
749 | 750 | ||
750 | EXPORT_SYMBOL(blk_queue_max_segment_size); | 751 | EXPORT_SYMBOL(blk_queue_max_segment_size); |
751 | 752 | ||
752 | /** | 753 | /** |
753 | * blk_queue_hardsect_size - set hardware sector size for the queue | 754 | * blk_queue_hardsect_size - set hardware sector size for the queue |
754 | * @q: the request queue for the device | 755 | * @q: the request queue for the device |
755 | * @size: the hardware sector size, in bytes | 756 | * @size: the hardware sector size, in bytes |
756 | * | 757 | * |
757 | * Description: | 758 | * Description: |
758 | * This should typically be set to the lowest possible sector size | 759 | * This should typically be set to the lowest possible sector size |
759 | * that the hardware can operate on (possible without reverting to | 760 | * that the hardware can operate on (possible without reverting to |
760 | * even internal read-modify-write operations). Usually the default | 761 | * even internal read-modify-write operations). Usually the default |
761 | * of 512 covers most hardware. | 762 | * of 512 covers most hardware. |
762 | **/ | 763 | **/ |
763 | void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) | 764 | void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) |
764 | { | 765 | { |
765 | q->hardsect_size = size; | 766 | q->hardsect_size = size; |
766 | } | 767 | } |
767 | 768 | ||
768 | EXPORT_SYMBOL(blk_queue_hardsect_size); | 769 | EXPORT_SYMBOL(blk_queue_hardsect_size); |
769 | 770 | ||
770 | /* | 771 | /* |
771 | * Returns the minimum that is _not_ zero, unless both are zero. | 772 | * Returns the minimum that is _not_ zero, unless both are zero. |
772 | */ | 773 | */ |
773 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | 774 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) |
774 | 775 | ||
775 | /** | 776 | /** |
776 | * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers | 777 | * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers |
777 | * @t: the stacking driver (top) | 778 | * @t: the stacking driver (top) |
778 | * @b: the underlying device (bottom) | 779 | * @b: the underlying device (bottom) |
779 | **/ | 780 | **/ |
780 | void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) | 781 | void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) |
781 | { | 782 | { |
782 | /* zero is "infinity" */ | 783 | /* zero is "infinity" */ |
783 | t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); | 784 | t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); |
784 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); | 785 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); |
785 | 786 | ||
786 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); | 787 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); |
787 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); | 788 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); |
788 | t->max_segment_size = min(t->max_segment_size,b->max_segment_size); | 789 | t->max_segment_size = min(t->max_segment_size,b->max_segment_size); |
789 | t->hardsect_size = max(t->hardsect_size,b->hardsect_size); | 790 | t->hardsect_size = max(t->hardsect_size,b->hardsect_size); |
790 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) | 791 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) |
791 | clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); | 792 | clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); |
792 | } | 793 | } |
793 | 794 | ||
794 | EXPORT_SYMBOL(blk_queue_stack_limits); | 795 | EXPORT_SYMBOL(blk_queue_stack_limits); |
795 | 796 | ||
796 | /** | 797 | /** |
797 | * blk_queue_segment_boundary - set boundary rules for segment merging | 798 | * blk_queue_segment_boundary - set boundary rules for segment merging |
798 | * @q: the request queue for the device | 799 | * @q: the request queue for the device |
799 | * @mask: the memory boundary mask | 800 | * @mask: the memory boundary mask |
800 | **/ | 801 | **/ |
801 | void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask) | 802 | void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask) |
802 | { | 803 | { |
803 | if (mask < PAGE_CACHE_SIZE - 1) { | 804 | if (mask < PAGE_CACHE_SIZE - 1) { |
804 | mask = PAGE_CACHE_SIZE - 1; | 805 | mask = PAGE_CACHE_SIZE - 1; |
805 | printk("%s: set to minimum %lx\n", __FUNCTION__, mask); | 806 | printk("%s: set to minimum %lx\n", __FUNCTION__, mask); |
806 | } | 807 | } |
807 | 808 | ||
808 | q->seg_boundary_mask = mask; | 809 | q->seg_boundary_mask = mask; |
809 | } | 810 | } |
810 | 811 | ||
811 | EXPORT_SYMBOL(blk_queue_segment_boundary); | 812 | EXPORT_SYMBOL(blk_queue_segment_boundary); |
812 | 813 | ||
813 | /** | 814 | /** |
814 | * blk_queue_dma_alignment - set dma length and memory alignment | 815 | * blk_queue_dma_alignment - set dma length and memory alignment |
815 | * @q: the request queue for the device | 816 | * @q: the request queue for the device |
816 | * @mask: alignment mask | 817 | * @mask: alignment mask |
817 | * | 818 | * |
818 | * description: | 819 | * description: |
819 | * set required memory and length aligment for direct dma transactions. | 820 | * set required memory and length aligment for direct dma transactions. |
820 | * this is used when buiding direct io requests for the queue. | 821 | * this is used when buiding direct io requests for the queue. |
821 | * | 822 | * |
822 | **/ | 823 | **/ |
823 | void blk_queue_dma_alignment(request_queue_t *q, int mask) | 824 | void blk_queue_dma_alignment(request_queue_t *q, int mask) |
824 | { | 825 | { |
825 | q->dma_alignment = mask; | 826 | q->dma_alignment = mask; |
826 | } | 827 | } |
827 | 828 | ||
828 | EXPORT_SYMBOL(blk_queue_dma_alignment); | 829 | EXPORT_SYMBOL(blk_queue_dma_alignment); |
829 | 830 | ||
830 | /** | 831 | /** |
831 | * blk_queue_find_tag - find a request by its tag and queue | 832 | * blk_queue_find_tag - find a request by its tag and queue |
832 | * @q: The request queue for the device | 833 | * @q: The request queue for the device |
833 | * @tag: The tag of the request | 834 | * @tag: The tag of the request |
834 | * | 835 | * |
835 | * Notes: | 836 | * Notes: |
836 | * Should be used when a device returns a tag and you want to match | 837 | * Should be used when a device returns a tag and you want to match |
837 | * it with a request. | 838 | * it with a request. |
838 | * | 839 | * |
839 | * no locks need be held. | 840 | * no locks need be held. |
840 | **/ | 841 | **/ |
841 | struct request *blk_queue_find_tag(request_queue_t *q, int tag) | 842 | struct request *blk_queue_find_tag(request_queue_t *q, int tag) |
842 | { | 843 | { |
843 | struct blk_queue_tag *bqt = q->queue_tags; | 844 | struct blk_queue_tag *bqt = q->queue_tags; |
844 | 845 | ||
845 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) | 846 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) |
846 | return NULL; | 847 | return NULL; |
847 | 848 | ||
848 | return bqt->tag_index[tag]; | 849 | return bqt->tag_index[tag]; |
849 | } | 850 | } |
850 | 851 | ||
851 | EXPORT_SYMBOL(blk_queue_find_tag); | 852 | EXPORT_SYMBOL(blk_queue_find_tag); |
852 | 853 | ||
853 | /** | 854 | /** |
854 | * __blk_free_tags - release a given set of tag maintenance info | 855 | * __blk_free_tags - release a given set of tag maintenance info |
855 | * @bqt: the tag map to free | 856 | * @bqt: the tag map to free |
856 | * | 857 | * |
857 | * Tries to free the specified @bqt@. Returns true if it was | 858 | * Tries to free the specified @bqt@. Returns true if it was |
858 | * actually freed and false if there are still references using it | 859 | * actually freed and false if there are still references using it |
859 | */ | 860 | */ |
860 | static int __blk_free_tags(struct blk_queue_tag *bqt) | 861 | static int __blk_free_tags(struct blk_queue_tag *bqt) |
861 | { | 862 | { |
862 | int retval; | 863 | int retval; |
863 | 864 | ||
864 | retval = atomic_dec_and_test(&bqt->refcnt); | 865 | retval = atomic_dec_and_test(&bqt->refcnt); |
865 | if (retval) { | 866 | if (retval) { |
866 | BUG_ON(bqt->busy); | 867 | BUG_ON(bqt->busy); |
867 | BUG_ON(!list_empty(&bqt->busy_list)); | 868 | BUG_ON(!list_empty(&bqt->busy_list)); |
868 | 869 | ||
869 | kfree(bqt->tag_index); | 870 | kfree(bqt->tag_index); |
870 | bqt->tag_index = NULL; | 871 | bqt->tag_index = NULL; |
871 | 872 | ||
872 | kfree(bqt->tag_map); | 873 | kfree(bqt->tag_map); |
873 | bqt->tag_map = NULL; | 874 | bqt->tag_map = NULL; |
874 | 875 | ||
875 | kfree(bqt); | 876 | kfree(bqt); |
876 | 877 | ||
877 | } | 878 | } |
878 | 879 | ||
879 | return retval; | 880 | return retval; |
880 | } | 881 | } |
881 | 882 | ||
882 | /** | 883 | /** |
883 | * __blk_queue_free_tags - release tag maintenance info | 884 | * __blk_queue_free_tags - release tag maintenance info |
884 | * @q: the request queue for the device | 885 | * @q: the request queue for the device |
885 | * | 886 | * |
886 | * Notes: | 887 | * Notes: |
887 | * blk_cleanup_queue() will take care of calling this function, if tagging | 888 | * blk_cleanup_queue() will take care of calling this function, if tagging |
888 | * has been used. So there's no need to call this directly. | 889 | * has been used. So there's no need to call this directly. |
889 | **/ | 890 | **/ |
890 | static void __blk_queue_free_tags(request_queue_t *q) | 891 | static void __blk_queue_free_tags(request_queue_t *q) |
891 | { | 892 | { |
892 | struct blk_queue_tag *bqt = q->queue_tags; | 893 | struct blk_queue_tag *bqt = q->queue_tags; |
893 | 894 | ||
894 | if (!bqt) | 895 | if (!bqt) |
895 | return; | 896 | return; |
896 | 897 | ||
897 | __blk_free_tags(bqt); | 898 | __blk_free_tags(bqt); |
898 | 899 | ||
899 | q->queue_tags = NULL; | 900 | q->queue_tags = NULL; |
900 | q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); | 901 | q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); |
901 | } | 902 | } |
902 | 903 | ||
903 | 904 | ||
904 | /** | 905 | /** |
905 | * blk_free_tags - release a given set of tag maintenance info | 906 | * blk_free_tags - release a given set of tag maintenance info |
906 | * @bqt: the tag map to free | 907 | * @bqt: the tag map to free |
907 | * | 908 | * |
908 | * For externally managed @bqt@ frees the map. Callers of this | 909 | * For externally managed @bqt@ frees the map. Callers of this |
909 | * function must guarantee to have released all the queues that | 910 | * function must guarantee to have released all the queues that |
910 | * might have been using this tag map. | 911 | * might have been using this tag map. |
911 | */ | 912 | */ |
912 | void blk_free_tags(struct blk_queue_tag *bqt) | 913 | void blk_free_tags(struct blk_queue_tag *bqt) |
913 | { | 914 | { |
914 | if (unlikely(!__blk_free_tags(bqt))) | 915 | if (unlikely(!__blk_free_tags(bqt))) |
915 | BUG(); | 916 | BUG(); |
916 | } | 917 | } |
917 | EXPORT_SYMBOL(blk_free_tags); | 918 | EXPORT_SYMBOL(blk_free_tags); |
918 | 919 | ||
919 | /** | 920 | /** |
920 | * blk_queue_free_tags - release tag maintenance info | 921 | * blk_queue_free_tags - release tag maintenance info |
921 | * @q: the request queue for the device | 922 | * @q: the request queue for the device |
922 | * | 923 | * |
923 | * Notes: | 924 | * Notes: |
924 | * This is used to disabled tagged queuing to a device, yet leave | 925 | * This is used to disabled tagged queuing to a device, yet leave |
925 | * queue in function. | 926 | * queue in function. |
926 | **/ | 927 | **/ |
927 | void blk_queue_free_tags(request_queue_t *q) | 928 | void blk_queue_free_tags(request_queue_t *q) |
928 | { | 929 | { |
929 | clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | 930 | clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); |
930 | } | 931 | } |
931 | 932 | ||
932 | EXPORT_SYMBOL(blk_queue_free_tags); | 933 | EXPORT_SYMBOL(blk_queue_free_tags); |
933 | 934 | ||
934 | static int | 935 | static int |
935 | init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) | 936 | init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) |
936 | { | 937 | { |
937 | struct request **tag_index; | 938 | struct request **tag_index; |
938 | unsigned long *tag_map; | 939 | unsigned long *tag_map; |
939 | int nr_ulongs; | 940 | int nr_ulongs; |
940 | 941 | ||
941 | if (q && depth > q->nr_requests * 2) { | 942 | if (q && depth > q->nr_requests * 2) { |
942 | depth = q->nr_requests * 2; | 943 | depth = q->nr_requests * 2; |
943 | printk(KERN_ERR "%s: adjusted depth to %d\n", | 944 | printk(KERN_ERR "%s: adjusted depth to %d\n", |
944 | __FUNCTION__, depth); | 945 | __FUNCTION__, depth); |
945 | } | 946 | } |
946 | 947 | ||
947 | tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); | 948 | tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); |
948 | if (!tag_index) | 949 | if (!tag_index) |
949 | goto fail; | 950 | goto fail; |
950 | 951 | ||
951 | nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; | 952 | nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; |
952 | tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); | 953 | tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); |
953 | if (!tag_map) | 954 | if (!tag_map) |
954 | goto fail; | 955 | goto fail; |
955 | 956 | ||
956 | tags->real_max_depth = depth; | 957 | tags->real_max_depth = depth; |
957 | tags->max_depth = depth; | 958 | tags->max_depth = depth; |
958 | tags->tag_index = tag_index; | 959 | tags->tag_index = tag_index; |
959 | tags->tag_map = tag_map; | 960 | tags->tag_map = tag_map; |
960 | 961 | ||
961 | return 0; | 962 | return 0; |
962 | fail: | 963 | fail: |
963 | kfree(tag_index); | 964 | kfree(tag_index); |
964 | return -ENOMEM; | 965 | return -ENOMEM; |
965 | } | 966 | } |
966 | 967 | ||
967 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, | 968 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, |
968 | int depth) | 969 | int depth) |
969 | { | 970 | { |
970 | struct blk_queue_tag *tags; | 971 | struct blk_queue_tag *tags; |
971 | 972 | ||
972 | tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); | 973 | tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); |
973 | if (!tags) | 974 | if (!tags) |
974 | goto fail; | 975 | goto fail; |
975 | 976 | ||
976 | if (init_tag_map(q, tags, depth)) | 977 | if (init_tag_map(q, tags, depth)) |
977 | goto fail; | 978 | goto fail; |
978 | 979 | ||
979 | INIT_LIST_HEAD(&tags->busy_list); | 980 | INIT_LIST_HEAD(&tags->busy_list); |
980 | tags->busy = 0; | 981 | tags->busy = 0; |
981 | atomic_set(&tags->refcnt, 1); | 982 | atomic_set(&tags->refcnt, 1); |
982 | return tags; | 983 | return tags; |
983 | fail: | 984 | fail: |
984 | kfree(tags); | 985 | kfree(tags); |
985 | return NULL; | 986 | return NULL; |
986 | } | 987 | } |
987 | 988 | ||
988 | /** | 989 | /** |
989 | * blk_init_tags - initialize the tag info for an external tag map | 990 | * blk_init_tags - initialize the tag info for an external tag map |
990 | * @depth: the maximum queue depth supported | 991 | * @depth: the maximum queue depth supported |
991 | * @tags: the tag to use | 992 | * @tags: the tag to use |
992 | **/ | 993 | **/ |
993 | struct blk_queue_tag *blk_init_tags(int depth) | 994 | struct blk_queue_tag *blk_init_tags(int depth) |
994 | { | 995 | { |
995 | return __blk_queue_init_tags(NULL, depth); | 996 | return __blk_queue_init_tags(NULL, depth); |
996 | } | 997 | } |
997 | EXPORT_SYMBOL(blk_init_tags); | 998 | EXPORT_SYMBOL(blk_init_tags); |
998 | 999 | ||
999 | /** | 1000 | /** |
1000 | * blk_queue_init_tags - initialize the queue tag info | 1001 | * blk_queue_init_tags - initialize the queue tag info |
1001 | * @q: the request queue for the device | 1002 | * @q: the request queue for the device |
1002 | * @depth: the maximum queue depth supported | 1003 | * @depth: the maximum queue depth supported |
1003 | * @tags: the tag to use | 1004 | * @tags: the tag to use |
1004 | **/ | 1005 | **/ |
1005 | int blk_queue_init_tags(request_queue_t *q, int depth, | 1006 | int blk_queue_init_tags(request_queue_t *q, int depth, |
1006 | struct blk_queue_tag *tags) | 1007 | struct blk_queue_tag *tags) |
1007 | { | 1008 | { |
1008 | int rc; | 1009 | int rc; |
1009 | 1010 | ||
1010 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); | 1011 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); |
1011 | 1012 | ||
1012 | if (!tags && !q->queue_tags) { | 1013 | if (!tags && !q->queue_tags) { |
1013 | tags = __blk_queue_init_tags(q, depth); | 1014 | tags = __blk_queue_init_tags(q, depth); |
1014 | 1015 | ||
1015 | if (!tags) | 1016 | if (!tags) |
1016 | goto fail; | 1017 | goto fail; |
1017 | } else if (q->queue_tags) { | 1018 | } else if (q->queue_tags) { |
1018 | if ((rc = blk_queue_resize_tags(q, depth))) | 1019 | if ((rc = blk_queue_resize_tags(q, depth))) |
1019 | return rc; | 1020 | return rc; |
1020 | set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | 1021 | set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); |
1021 | return 0; | 1022 | return 0; |
1022 | } else | 1023 | } else |
1023 | atomic_inc(&tags->refcnt); | 1024 | atomic_inc(&tags->refcnt); |
1024 | 1025 | ||
1025 | /* | 1026 | /* |
1026 | * assign it, all done | 1027 | * assign it, all done |
1027 | */ | 1028 | */ |
1028 | q->queue_tags = tags; | 1029 | q->queue_tags = tags; |
1029 | q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); | 1030 | q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); |
1030 | return 0; | 1031 | return 0; |
1031 | fail: | 1032 | fail: |
1032 | kfree(tags); | 1033 | kfree(tags); |
1033 | return -ENOMEM; | 1034 | return -ENOMEM; |
1034 | } | 1035 | } |
1035 | 1036 | ||
1036 | EXPORT_SYMBOL(blk_queue_init_tags); | 1037 | EXPORT_SYMBOL(blk_queue_init_tags); |
1037 | 1038 | ||
1038 | /** | 1039 | /** |
1039 | * blk_queue_resize_tags - change the queueing depth | 1040 | * blk_queue_resize_tags - change the queueing depth |
1040 | * @q: the request queue for the device | 1041 | * @q: the request queue for the device |
1041 | * @new_depth: the new max command queueing depth | 1042 | * @new_depth: the new max command queueing depth |
1042 | * | 1043 | * |
1043 | * Notes: | 1044 | * Notes: |
1044 | * Must be called with the queue lock held. | 1045 | * Must be called with the queue lock held. |
1045 | **/ | 1046 | **/ |
1046 | int blk_queue_resize_tags(request_queue_t *q, int new_depth) | 1047 | int blk_queue_resize_tags(request_queue_t *q, int new_depth) |
1047 | { | 1048 | { |
1048 | struct blk_queue_tag *bqt = q->queue_tags; | 1049 | struct blk_queue_tag *bqt = q->queue_tags; |
1049 | struct request **tag_index; | 1050 | struct request **tag_index; |
1050 | unsigned long *tag_map; | 1051 | unsigned long *tag_map; |
1051 | int max_depth, nr_ulongs; | 1052 | int max_depth, nr_ulongs; |
1052 | 1053 | ||
1053 | if (!bqt) | 1054 | if (!bqt) |
1054 | return -ENXIO; | 1055 | return -ENXIO; |
1055 | 1056 | ||
1056 | /* | 1057 | /* |
1057 | * if we already have large enough real_max_depth. just | 1058 | * if we already have large enough real_max_depth. just |
1058 | * adjust max_depth. *NOTE* as requests with tag value | 1059 | * adjust max_depth. *NOTE* as requests with tag value |
1059 | * between new_depth and real_max_depth can be in-flight, tag | 1060 | * between new_depth and real_max_depth can be in-flight, tag |
1060 | * map can not be shrunk blindly here. | 1061 | * map can not be shrunk blindly here. |
1061 | */ | 1062 | */ |
1062 | if (new_depth <= bqt->real_max_depth) { | 1063 | if (new_depth <= bqt->real_max_depth) { |
1063 | bqt->max_depth = new_depth; | 1064 | bqt->max_depth = new_depth; |
1064 | return 0; | 1065 | return 0; |
1065 | } | 1066 | } |
1066 | 1067 | ||
1067 | /* | 1068 | /* |
1068 | * Currently cannot replace a shared tag map with a new | 1069 | * Currently cannot replace a shared tag map with a new |
1069 | * one, so error out if this is the case | 1070 | * one, so error out if this is the case |
1070 | */ | 1071 | */ |
1071 | if (atomic_read(&bqt->refcnt) != 1) | 1072 | if (atomic_read(&bqt->refcnt) != 1) |
1072 | return -EBUSY; | 1073 | return -EBUSY; |
1073 | 1074 | ||
1074 | /* | 1075 | /* |
1075 | * save the old state info, so we can copy it back | 1076 | * save the old state info, so we can copy it back |
1076 | */ | 1077 | */ |
1077 | tag_index = bqt->tag_index; | 1078 | tag_index = bqt->tag_index; |
1078 | tag_map = bqt->tag_map; | 1079 | tag_map = bqt->tag_map; |
1079 | max_depth = bqt->real_max_depth; | 1080 | max_depth = bqt->real_max_depth; |
1080 | 1081 | ||
1081 | if (init_tag_map(q, bqt, new_depth)) | 1082 | if (init_tag_map(q, bqt, new_depth)) |
1082 | return -ENOMEM; | 1083 | return -ENOMEM; |
1083 | 1084 | ||
1084 | memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); | 1085 | memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); |
1085 | nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; | 1086 | nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; |
1086 | memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); | 1087 | memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); |
1087 | 1088 | ||
1088 | kfree(tag_index); | 1089 | kfree(tag_index); |
1089 | kfree(tag_map); | 1090 | kfree(tag_map); |
1090 | return 0; | 1091 | return 0; |
1091 | } | 1092 | } |
1092 | 1093 | ||
1093 | EXPORT_SYMBOL(blk_queue_resize_tags); | 1094 | EXPORT_SYMBOL(blk_queue_resize_tags); |
1094 | 1095 | ||
1095 | /** | 1096 | /** |
1096 | * blk_queue_end_tag - end tag operations for a request | 1097 | * blk_queue_end_tag - end tag operations for a request |
1097 | * @q: the request queue for the device | 1098 | * @q: the request queue for the device |
1098 | * @rq: the request that has completed | 1099 | * @rq: the request that has completed |
1099 | * | 1100 | * |
1100 | * Description: | 1101 | * Description: |
1101 | * Typically called when end_that_request_first() returns 0, meaning | 1102 | * Typically called when end_that_request_first() returns 0, meaning |
1102 | * all transfers have been done for a request. It's important to call | 1103 | * all transfers have been done for a request. It's important to call |
1103 | * this function before end_that_request_last(), as that will put the | 1104 | * this function before end_that_request_last(), as that will put the |
1104 | * request back on the free list thus corrupting the internal tag list. | 1105 | * request back on the free list thus corrupting the internal tag list. |
1105 | * | 1106 | * |
1106 | * Notes: | 1107 | * Notes: |
1107 | * queue lock must be held. | 1108 | * queue lock must be held. |
1108 | **/ | 1109 | **/ |
1109 | void blk_queue_end_tag(request_queue_t *q, struct request *rq) | 1110 | void blk_queue_end_tag(request_queue_t *q, struct request *rq) |
1110 | { | 1111 | { |
1111 | struct blk_queue_tag *bqt = q->queue_tags; | 1112 | struct blk_queue_tag *bqt = q->queue_tags; |
1112 | int tag = rq->tag; | 1113 | int tag = rq->tag; |
1113 | 1114 | ||
1114 | BUG_ON(tag == -1); | 1115 | BUG_ON(tag == -1); |
1115 | 1116 | ||
1116 | if (unlikely(tag >= bqt->real_max_depth)) | 1117 | if (unlikely(tag >= bqt->real_max_depth)) |
1117 | /* | 1118 | /* |
1118 | * This can happen after tag depth has been reduced. | 1119 | * This can happen after tag depth has been reduced. |
1119 | * FIXME: how about a warning or info message here? | 1120 | * FIXME: how about a warning or info message here? |
1120 | */ | 1121 | */ |
1121 | return; | 1122 | return; |
1122 | 1123 | ||
1123 | if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) { | 1124 | if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) { |
1124 | printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", | 1125 | printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", |
1125 | __FUNCTION__, tag); | 1126 | __FUNCTION__, tag); |
1126 | return; | 1127 | return; |
1127 | } | 1128 | } |
1128 | 1129 | ||
1129 | list_del_init(&rq->queuelist); | 1130 | list_del_init(&rq->queuelist); |
1130 | rq->cmd_flags &= ~REQ_QUEUED; | 1131 | rq->cmd_flags &= ~REQ_QUEUED; |
1131 | rq->tag = -1; | 1132 | rq->tag = -1; |
1132 | 1133 | ||
1133 | if (unlikely(bqt->tag_index[tag] == NULL)) | 1134 | if (unlikely(bqt->tag_index[tag] == NULL)) |
1134 | printk(KERN_ERR "%s: tag %d is missing\n", | 1135 | printk(KERN_ERR "%s: tag %d is missing\n", |
1135 | __FUNCTION__, tag); | 1136 | __FUNCTION__, tag); |
1136 | 1137 | ||
1137 | bqt->tag_index[tag] = NULL; | 1138 | bqt->tag_index[tag] = NULL; |
1138 | bqt->busy--; | 1139 | bqt->busy--; |
1139 | } | 1140 | } |
1140 | 1141 | ||
1141 | EXPORT_SYMBOL(blk_queue_end_tag); | 1142 | EXPORT_SYMBOL(blk_queue_end_tag); |
1142 | 1143 | ||
1143 | /** | 1144 | /** |
1144 | * blk_queue_start_tag - find a free tag and assign it | 1145 | * blk_queue_start_tag - find a free tag and assign it |
1145 | * @q: the request queue for the device | 1146 | * @q: the request queue for the device |
1146 | * @rq: the block request that needs tagging | 1147 | * @rq: the block request that needs tagging |
1147 | * | 1148 | * |
1148 | * Description: | 1149 | * Description: |
1149 | * This can either be used as a stand-alone helper, or possibly be | 1150 | * This can either be used as a stand-alone helper, or possibly be |
1150 | * assigned as the queue &prep_rq_fn (in which case &struct request | 1151 | * assigned as the queue &prep_rq_fn (in which case &struct request |
1151 | * automagically gets a tag assigned). Note that this function | 1152 | * automagically gets a tag assigned). Note that this function |
1152 | * assumes that any type of request can be queued! if this is not | 1153 | * assumes that any type of request can be queued! if this is not |
1153 | * true for your device, you must check the request type before | 1154 | * true for your device, you must check the request type before |
1154 | * calling this function. The request will also be removed from | 1155 | * calling this function. The request will also be removed from |
1155 | * the request queue, so it's the drivers responsibility to readd | 1156 | * the request queue, so it's the drivers responsibility to readd |
1156 | * it if it should need to be restarted for some reason. | 1157 | * it if it should need to be restarted for some reason. |
1157 | * | 1158 | * |
1158 | * Notes: | 1159 | * Notes: |
1159 | * queue lock must be held. | 1160 | * queue lock must be held. |
1160 | **/ | 1161 | **/ |
1161 | int blk_queue_start_tag(request_queue_t *q, struct request *rq) | 1162 | int blk_queue_start_tag(request_queue_t *q, struct request *rq) |
1162 | { | 1163 | { |
1163 | struct blk_queue_tag *bqt = q->queue_tags; | 1164 | struct blk_queue_tag *bqt = q->queue_tags; |
1164 | int tag; | 1165 | int tag; |
1165 | 1166 | ||
1166 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { | 1167 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { |
1167 | printk(KERN_ERR | 1168 | printk(KERN_ERR |
1168 | "%s: request %p for device [%s] already tagged %d", | 1169 | "%s: request %p for device [%s] already tagged %d", |
1169 | __FUNCTION__, rq, | 1170 | __FUNCTION__, rq, |
1170 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); | 1171 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); |
1171 | BUG(); | 1172 | BUG(); |
1172 | } | 1173 | } |
1173 | 1174 | ||
1174 | tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); | 1175 | tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); |
1175 | if (tag >= bqt->max_depth) | 1176 | if (tag >= bqt->max_depth) |
1176 | return 1; | 1177 | return 1; |
1177 | 1178 | ||
1178 | __set_bit(tag, bqt->tag_map); | 1179 | __set_bit(tag, bqt->tag_map); |
1179 | 1180 | ||
1180 | rq->cmd_flags |= REQ_QUEUED; | 1181 | rq->cmd_flags |= REQ_QUEUED; |
1181 | rq->tag = tag; | 1182 | rq->tag = tag; |
1182 | bqt->tag_index[tag] = rq; | 1183 | bqt->tag_index[tag] = rq; |
1183 | blkdev_dequeue_request(rq); | 1184 | blkdev_dequeue_request(rq); |
1184 | list_add(&rq->queuelist, &bqt->busy_list); | 1185 | list_add(&rq->queuelist, &bqt->busy_list); |
1185 | bqt->busy++; | 1186 | bqt->busy++; |
1186 | return 0; | 1187 | return 0; |
1187 | } | 1188 | } |
1188 | 1189 | ||
1189 | EXPORT_SYMBOL(blk_queue_start_tag); | 1190 | EXPORT_SYMBOL(blk_queue_start_tag); |
1190 | 1191 | ||
1191 | /** | 1192 | /** |
1192 | * blk_queue_invalidate_tags - invalidate all pending tags | 1193 | * blk_queue_invalidate_tags - invalidate all pending tags |
1193 | * @q: the request queue for the device | 1194 | * @q: the request queue for the device |
1194 | * | 1195 | * |
1195 | * Description: | 1196 | * Description: |
1196 | * Hardware conditions may dictate a need to stop all pending requests. | 1197 | * Hardware conditions may dictate a need to stop all pending requests. |
1197 | * In this case, we will safely clear the block side of the tag queue and | 1198 | * In this case, we will safely clear the block side of the tag queue and |
1198 | * readd all requests to the request queue in the right order. | 1199 | * readd all requests to the request queue in the right order. |
1199 | * | 1200 | * |
1200 | * Notes: | 1201 | * Notes: |
1201 | * queue lock must be held. | 1202 | * queue lock must be held. |
1202 | **/ | 1203 | **/ |
1203 | void blk_queue_invalidate_tags(request_queue_t *q) | 1204 | void blk_queue_invalidate_tags(request_queue_t *q) |
1204 | { | 1205 | { |
1205 | struct blk_queue_tag *bqt = q->queue_tags; | 1206 | struct blk_queue_tag *bqt = q->queue_tags; |
1206 | struct list_head *tmp, *n; | 1207 | struct list_head *tmp, *n; |
1207 | struct request *rq; | 1208 | struct request *rq; |
1208 | 1209 | ||
1209 | list_for_each_safe(tmp, n, &bqt->busy_list) { | 1210 | list_for_each_safe(tmp, n, &bqt->busy_list) { |
1210 | rq = list_entry_rq(tmp); | 1211 | rq = list_entry_rq(tmp); |
1211 | 1212 | ||
1212 | if (rq->tag == -1) { | 1213 | if (rq->tag == -1) { |
1213 | printk(KERN_ERR | 1214 | printk(KERN_ERR |
1214 | "%s: bad tag found on list\n", __FUNCTION__); | 1215 | "%s: bad tag found on list\n", __FUNCTION__); |
1215 | list_del_init(&rq->queuelist); | 1216 | list_del_init(&rq->queuelist); |
1216 | rq->cmd_flags &= ~REQ_QUEUED; | 1217 | rq->cmd_flags &= ~REQ_QUEUED; |
1217 | } else | 1218 | } else |
1218 | blk_queue_end_tag(q, rq); | 1219 | blk_queue_end_tag(q, rq); |
1219 | 1220 | ||
1220 | rq->cmd_flags &= ~REQ_STARTED; | 1221 | rq->cmd_flags &= ~REQ_STARTED; |
1221 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); | 1222 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); |
1222 | } | 1223 | } |
1223 | } | 1224 | } |
1224 | 1225 | ||
1225 | EXPORT_SYMBOL(blk_queue_invalidate_tags); | 1226 | EXPORT_SYMBOL(blk_queue_invalidate_tags); |
1226 | 1227 | ||
1227 | void blk_dump_rq_flags(struct request *rq, char *msg) | 1228 | void blk_dump_rq_flags(struct request *rq, char *msg) |
1228 | { | 1229 | { |
1229 | int bit; | 1230 | int bit; |
1230 | 1231 | ||
1231 | printk("%s: dev %s: type=%x, flags=%x\n", msg, | 1232 | printk("%s: dev %s: type=%x, flags=%x\n", msg, |
1232 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, | 1233 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, |
1233 | rq->cmd_flags); | 1234 | rq->cmd_flags); |
1234 | 1235 | ||
1235 | printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, | 1236 | printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, |
1236 | rq->nr_sectors, | 1237 | rq->nr_sectors, |
1237 | rq->current_nr_sectors); | 1238 | rq->current_nr_sectors); |
1238 | printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); | 1239 | printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); |
1239 | 1240 | ||
1240 | if (blk_pc_request(rq)) { | 1241 | if (blk_pc_request(rq)) { |
1241 | printk("cdb: "); | 1242 | printk("cdb: "); |
1242 | for (bit = 0; bit < sizeof(rq->cmd); bit++) | 1243 | for (bit = 0; bit < sizeof(rq->cmd); bit++) |
1243 | printk("%02x ", rq->cmd[bit]); | 1244 | printk("%02x ", rq->cmd[bit]); |
1244 | printk("\n"); | 1245 | printk("\n"); |
1245 | } | 1246 | } |
1246 | } | 1247 | } |
1247 | 1248 | ||
1248 | EXPORT_SYMBOL(blk_dump_rq_flags); | 1249 | EXPORT_SYMBOL(blk_dump_rq_flags); |
1249 | 1250 | ||
1250 | void blk_recount_segments(request_queue_t *q, struct bio *bio) | 1251 | void blk_recount_segments(request_queue_t *q, struct bio *bio) |
1251 | { | 1252 | { |
1252 | struct bio_vec *bv, *bvprv = NULL; | 1253 | struct bio_vec *bv, *bvprv = NULL; |
1253 | int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster; | 1254 | int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster; |
1254 | int high, highprv = 1; | 1255 | int high, highprv = 1; |
1255 | 1256 | ||
1256 | if (unlikely(!bio->bi_io_vec)) | 1257 | if (unlikely(!bio->bi_io_vec)) |
1257 | return; | 1258 | return; |
1258 | 1259 | ||
1259 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); | 1260 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); |
1260 | hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0; | 1261 | hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0; |
1261 | bio_for_each_segment(bv, bio, i) { | 1262 | bio_for_each_segment(bv, bio, i) { |
1262 | /* | 1263 | /* |
1263 | * the trick here is making sure that a high page is never | 1264 | * the trick here is making sure that a high page is never |
1264 | * considered part of another segment, since that might | 1265 | * considered part of another segment, since that might |
1265 | * change with the bounce page. | 1266 | * change with the bounce page. |
1266 | */ | 1267 | */ |
1267 | high = page_to_pfn(bv->bv_page) >= q->bounce_pfn; | 1268 | high = page_to_pfn(bv->bv_page) >= q->bounce_pfn; |
1268 | if (high || highprv) | 1269 | if (high || highprv) |
1269 | goto new_hw_segment; | 1270 | goto new_hw_segment; |
1270 | if (cluster) { | 1271 | if (cluster) { |
1271 | if (seg_size + bv->bv_len > q->max_segment_size) | 1272 | if (seg_size + bv->bv_len > q->max_segment_size) |
1272 | goto new_segment; | 1273 | goto new_segment; |
1273 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) | 1274 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) |
1274 | goto new_segment; | 1275 | goto new_segment; |
1275 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) | 1276 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) |
1276 | goto new_segment; | 1277 | goto new_segment; |
1277 | if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | 1278 | if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) |
1278 | goto new_hw_segment; | 1279 | goto new_hw_segment; |
1279 | 1280 | ||
1280 | seg_size += bv->bv_len; | 1281 | seg_size += bv->bv_len; |
1281 | hw_seg_size += bv->bv_len; | 1282 | hw_seg_size += bv->bv_len; |
1282 | bvprv = bv; | 1283 | bvprv = bv; |
1283 | continue; | 1284 | continue; |
1284 | } | 1285 | } |
1285 | new_segment: | 1286 | new_segment: |
1286 | if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && | 1287 | if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && |
1287 | !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) { | 1288 | !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) { |
1288 | hw_seg_size += bv->bv_len; | 1289 | hw_seg_size += bv->bv_len; |
1289 | } else { | 1290 | } else { |
1290 | new_hw_segment: | 1291 | new_hw_segment: |
1291 | if (hw_seg_size > bio->bi_hw_front_size) | 1292 | if (hw_seg_size > bio->bi_hw_front_size) |
1292 | bio->bi_hw_front_size = hw_seg_size; | 1293 | bio->bi_hw_front_size = hw_seg_size; |
1293 | hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; | 1294 | hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; |
1294 | nr_hw_segs++; | 1295 | nr_hw_segs++; |
1295 | } | 1296 | } |
1296 | 1297 | ||
1297 | nr_phys_segs++; | 1298 | nr_phys_segs++; |
1298 | bvprv = bv; | 1299 | bvprv = bv; |
1299 | seg_size = bv->bv_len; | 1300 | seg_size = bv->bv_len; |
1300 | highprv = high; | 1301 | highprv = high; |
1301 | } | 1302 | } |
1302 | if (hw_seg_size > bio->bi_hw_back_size) | 1303 | if (hw_seg_size > bio->bi_hw_back_size) |
1303 | bio->bi_hw_back_size = hw_seg_size; | 1304 | bio->bi_hw_back_size = hw_seg_size; |
1304 | if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size) | 1305 | if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size) |
1305 | bio->bi_hw_front_size = hw_seg_size; | 1306 | bio->bi_hw_front_size = hw_seg_size; |
1306 | bio->bi_phys_segments = nr_phys_segs; | 1307 | bio->bi_phys_segments = nr_phys_segs; |
1307 | bio->bi_hw_segments = nr_hw_segs; | 1308 | bio->bi_hw_segments = nr_hw_segs; |
1308 | bio->bi_flags |= (1 << BIO_SEG_VALID); | 1309 | bio->bi_flags |= (1 << BIO_SEG_VALID); |
1309 | } | 1310 | } |
1310 | 1311 | ||
1311 | 1312 | ||
1312 | static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio, | 1313 | static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio, |
1313 | struct bio *nxt) | 1314 | struct bio *nxt) |
1314 | { | 1315 | { |
1315 | if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) | 1316 | if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) |
1316 | return 0; | 1317 | return 0; |
1317 | 1318 | ||
1318 | if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) | 1319 | if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) |
1319 | return 0; | 1320 | return 0; |
1320 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) | 1321 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) |
1321 | return 0; | 1322 | return 0; |
1322 | 1323 | ||
1323 | /* | 1324 | /* |
1324 | * bio and nxt are contigous in memory, check if the queue allows | 1325 | * bio and nxt are contigous in memory, check if the queue allows |
1325 | * these two to be merged into one | 1326 | * these two to be merged into one |
1326 | */ | 1327 | */ |
1327 | if (BIO_SEG_BOUNDARY(q, bio, nxt)) | 1328 | if (BIO_SEG_BOUNDARY(q, bio, nxt)) |
1328 | return 1; | 1329 | return 1; |
1329 | 1330 | ||
1330 | return 0; | 1331 | return 0; |
1331 | } | 1332 | } |
1332 | 1333 | ||
1333 | static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio, | 1334 | static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio, |
1334 | struct bio *nxt) | 1335 | struct bio *nxt) |
1335 | { | 1336 | { |
1336 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 1337 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
1337 | blk_recount_segments(q, bio); | 1338 | blk_recount_segments(q, bio); |
1338 | if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) | 1339 | if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) |
1339 | blk_recount_segments(q, nxt); | 1340 | blk_recount_segments(q, nxt); |
1340 | if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || | 1341 | if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || |
1341 | BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size)) | 1342 | BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size)) |
1342 | return 0; | 1343 | return 0; |
1343 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) | 1344 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) |
1344 | return 0; | 1345 | return 0; |
1345 | 1346 | ||
1346 | return 1; | 1347 | return 1; |
1347 | } | 1348 | } |
1348 | 1349 | ||
1349 | /* | 1350 | /* |
1350 | * map a request to scatterlist, return number of sg entries setup. Caller | 1351 | * map a request to scatterlist, return number of sg entries setup. Caller |
1351 | * must make sure sg can hold rq->nr_phys_segments entries | 1352 | * must make sure sg can hold rq->nr_phys_segments entries |
1352 | */ | 1353 | */ |
1353 | int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg) | 1354 | int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg) |
1354 | { | 1355 | { |
1355 | struct bio_vec *bvec, *bvprv; | 1356 | struct bio_vec *bvec, *bvprv; |
1356 | struct bio *bio; | 1357 | struct bio *bio; |
1357 | int nsegs, i, cluster; | 1358 | int nsegs, i, cluster; |
1358 | 1359 | ||
1359 | nsegs = 0; | 1360 | nsegs = 0; |
1360 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); | 1361 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); |
1361 | 1362 | ||
1362 | /* | 1363 | /* |
1363 | * for each bio in rq | 1364 | * for each bio in rq |
1364 | */ | 1365 | */ |
1365 | bvprv = NULL; | 1366 | bvprv = NULL; |
1366 | rq_for_each_bio(bio, rq) { | 1367 | rq_for_each_bio(bio, rq) { |
1367 | /* | 1368 | /* |
1368 | * for each segment in bio | 1369 | * for each segment in bio |
1369 | */ | 1370 | */ |
1370 | bio_for_each_segment(bvec, bio, i) { | 1371 | bio_for_each_segment(bvec, bio, i) { |
1371 | int nbytes = bvec->bv_len; | 1372 | int nbytes = bvec->bv_len; |
1372 | 1373 | ||
1373 | if (bvprv && cluster) { | 1374 | if (bvprv && cluster) { |
1374 | if (sg[nsegs - 1].length + nbytes > q->max_segment_size) | 1375 | if (sg[nsegs - 1].length + nbytes > q->max_segment_size) |
1375 | goto new_segment; | 1376 | goto new_segment; |
1376 | 1377 | ||
1377 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) | 1378 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) |
1378 | goto new_segment; | 1379 | goto new_segment; |
1379 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) | 1380 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) |
1380 | goto new_segment; | 1381 | goto new_segment; |
1381 | 1382 | ||
1382 | sg[nsegs - 1].length += nbytes; | 1383 | sg[nsegs - 1].length += nbytes; |
1383 | } else { | 1384 | } else { |
1384 | new_segment: | 1385 | new_segment: |
1385 | memset(&sg[nsegs],0,sizeof(struct scatterlist)); | 1386 | memset(&sg[nsegs],0,sizeof(struct scatterlist)); |
1386 | sg[nsegs].page = bvec->bv_page; | 1387 | sg[nsegs].page = bvec->bv_page; |
1387 | sg[nsegs].length = nbytes; | 1388 | sg[nsegs].length = nbytes; |
1388 | sg[nsegs].offset = bvec->bv_offset; | 1389 | sg[nsegs].offset = bvec->bv_offset; |
1389 | 1390 | ||
1390 | nsegs++; | 1391 | nsegs++; |
1391 | } | 1392 | } |
1392 | bvprv = bvec; | 1393 | bvprv = bvec; |
1393 | } /* segments in bio */ | 1394 | } /* segments in bio */ |
1394 | } /* bios in rq */ | 1395 | } /* bios in rq */ |
1395 | 1396 | ||
1396 | return nsegs; | 1397 | return nsegs; |
1397 | } | 1398 | } |
1398 | 1399 | ||
1399 | EXPORT_SYMBOL(blk_rq_map_sg); | 1400 | EXPORT_SYMBOL(blk_rq_map_sg); |
1400 | 1401 | ||
1401 | /* | 1402 | /* |
1402 | * the standard queue merge functions, can be overridden with device | 1403 | * the standard queue merge functions, can be overridden with device |
1403 | * specific ones if so desired | 1404 | * specific ones if so desired |
1404 | */ | 1405 | */ |
1405 | 1406 | ||
1406 | static inline int ll_new_mergeable(request_queue_t *q, | 1407 | static inline int ll_new_mergeable(request_queue_t *q, |
1407 | struct request *req, | 1408 | struct request *req, |
1408 | struct bio *bio) | 1409 | struct bio *bio) |
1409 | { | 1410 | { |
1410 | int nr_phys_segs = bio_phys_segments(q, bio); | 1411 | int nr_phys_segs = bio_phys_segments(q, bio); |
1411 | 1412 | ||
1412 | if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { | 1413 | if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { |
1413 | req->cmd_flags |= REQ_NOMERGE; | 1414 | req->cmd_flags |= REQ_NOMERGE; |
1414 | if (req == q->last_merge) | 1415 | if (req == q->last_merge) |
1415 | q->last_merge = NULL; | 1416 | q->last_merge = NULL; |
1416 | return 0; | 1417 | return 0; |
1417 | } | 1418 | } |
1418 | 1419 | ||
1419 | /* | 1420 | /* |
1420 | * A hw segment is just getting larger, bump just the phys | 1421 | * A hw segment is just getting larger, bump just the phys |
1421 | * counter. | 1422 | * counter. |
1422 | */ | 1423 | */ |
1423 | req->nr_phys_segments += nr_phys_segs; | 1424 | req->nr_phys_segments += nr_phys_segs; |
1424 | return 1; | 1425 | return 1; |
1425 | } | 1426 | } |
1426 | 1427 | ||
1427 | static inline int ll_new_hw_segment(request_queue_t *q, | 1428 | static inline int ll_new_hw_segment(request_queue_t *q, |
1428 | struct request *req, | 1429 | struct request *req, |
1429 | struct bio *bio) | 1430 | struct bio *bio) |
1430 | { | 1431 | { |
1431 | int nr_hw_segs = bio_hw_segments(q, bio); | 1432 | int nr_hw_segs = bio_hw_segments(q, bio); |
1432 | int nr_phys_segs = bio_phys_segments(q, bio); | 1433 | int nr_phys_segs = bio_phys_segments(q, bio); |
1433 | 1434 | ||
1434 | if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments | 1435 | if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments |
1435 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { | 1436 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { |
1436 | req->cmd_flags |= REQ_NOMERGE; | 1437 | req->cmd_flags |= REQ_NOMERGE; |
1437 | if (req == q->last_merge) | 1438 | if (req == q->last_merge) |
1438 | q->last_merge = NULL; | 1439 | q->last_merge = NULL; |
1439 | return 0; | 1440 | return 0; |
1440 | } | 1441 | } |
1441 | 1442 | ||
1442 | /* | 1443 | /* |
1443 | * This will form the start of a new hw segment. Bump both | 1444 | * This will form the start of a new hw segment. Bump both |
1444 | * counters. | 1445 | * counters. |
1445 | */ | 1446 | */ |
1446 | req->nr_hw_segments += nr_hw_segs; | 1447 | req->nr_hw_segments += nr_hw_segs; |
1447 | req->nr_phys_segments += nr_phys_segs; | 1448 | req->nr_phys_segments += nr_phys_segs; |
1448 | return 1; | 1449 | return 1; |
1449 | } | 1450 | } |
1450 | 1451 | ||
1451 | static int ll_back_merge_fn(request_queue_t *q, struct request *req, | 1452 | static int ll_back_merge_fn(request_queue_t *q, struct request *req, |
1452 | struct bio *bio) | 1453 | struct bio *bio) |
1453 | { | 1454 | { |
1454 | unsigned short max_sectors; | 1455 | unsigned short max_sectors; |
1455 | int len; | 1456 | int len; |
1456 | 1457 | ||
1457 | if (unlikely(blk_pc_request(req))) | 1458 | if (unlikely(blk_pc_request(req))) |
1458 | max_sectors = q->max_hw_sectors; | 1459 | max_sectors = q->max_hw_sectors; |
1459 | else | 1460 | else |
1460 | max_sectors = q->max_sectors; | 1461 | max_sectors = q->max_sectors; |
1461 | 1462 | ||
1462 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | 1463 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { |
1463 | req->cmd_flags |= REQ_NOMERGE; | 1464 | req->cmd_flags |= REQ_NOMERGE; |
1464 | if (req == q->last_merge) | 1465 | if (req == q->last_merge) |
1465 | q->last_merge = NULL; | 1466 | q->last_merge = NULL; |
1466 | return 0; | 1467 | return 0; |
1467 | } | 1468 | } |
1468 | if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) | 1469 | if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) |
1469 | blk_recount_segments(q, req->biotail); | 1470 | blk_recount_segments(q, req->biotail); |
1470 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 1471 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
1471 | blk_recount_segments(q, bio); | 1472 | blk_recount_segments(q, bio); |
1472 | len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; | 1473 | len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; |
1473 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && | 1474 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && |
1474 | !BIOVEC_VIRT_OVERSIZE(len)) { | 1475 | !BIOVEC_VIRT_OVERSIZE(len)) { |
1475 | int mergeable = ll_new_mergeable(q, req, bio); | 1476 | int mergeable = ll_new_mergeable(q, req, bio); |
1476 | 1477 | ||
1477 | if (mergeable) { | 1478 | if (mergeable) { |
1478 | if (req->nr_hw_segments == 1) | 1479 | if (req->nr_hw_segments == 1) |
1479 | req->bio->bi_hw_front_size = len; | 1480 | req->bio->bi_hw_front_size = len; |
1480 | if (bio->bi_hw_segments == 1) | 1481 | if (bio->bi_hw_segments == 1) |
1481 | bio->bi_hw_back_size = len; | 1482 | bio->bi_hw_back_size = len; |
1482 | } | 1483 | } |
1483 | return mergeable; | 1484 | return mergeable; |
1484 | } | 1485 | } |
1485 | 1486 | ||
1486 | return ll_new_hw_segment(q, req, bio); | 1487 | return ll_new_hw_segment(q, req, bio); |
1487 | } | 1488 | } |
1488 | 1489 | ||
1489 | static int ll_front_merge_fn(request_queue_t *q, struct request *req, | 1490 | static int ll_front_merge_fn(request_queue_t *q, struct request *req, |
1490 | struct bio *bio) | 1491 | struct bio *bio) |
1491 | { | 1492 | { |
1492 | unsigned short max_sectors; | 1493 | unsigned short max_sectors; |
1493 | int len; | 1494 | int len; |
1494 | 1495 | ||
1495 | if (unlikely(blk_pc_request(req))) | 1496 | if (unlikely(blk_pc_request(req))) |
1496 | max_sectors = q->max_hw_sectors; | 1497 | max_sectors = q->max_hw_sectors; |
1497 | else | 1498 | else |
1498 | max_sectors = q->max_sectors; | 1499 | max_sectors = q->max_sectors; |
1499 | 1500 | ||
1500 | 1501 | ||
1501 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | 1502 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { |
1502 | req->cmd_flags |= REQ_NOMERGE; | 1503 | req->cmd_flags |= REQ_NOMERGE; |
1503 | if (req == q->last_merge) | 1504 | if (req == q->last_merge) |
1504 | q->last_merge = NULL; | 1505 | q->last_merge = NULL; |
1505 | return 0; | 1506 | return 0; |
1506 | } | 1507 | } |
1507 | len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; | 1508 | len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; |
1508 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 1509 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
1509 | blk_recount_segments(q, bio); | 1510 | blk_recount_segments(q, bio); |
1510 | if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) | 1511 | if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) |
1511 | blk_recount_segments(q, req->bio); | 1512 | blk_recount_segments(q, req->bio); |
1512 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && | 1513 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && |
1513 | !BIOVEC_VIRT_OVERSIZE(len)) { | 1514 | !BIOVEC_VIRT_OVERSIZE(len)) { |
1514 | int mergeable = ll_new_mergeable(q, req, bio); | 1515 | int mergeable = ll_new_mergeable(q, req, bio); |
1515 | 1516 | ||
1516 | if (mergeable) { | 1517 | if (mergeable) { |
1517 | if (bio->bi_hw_segments == 1) | 1518 | if (bio->bi_hw_segments == 1) |
1518 | bio->bi_hw_front_size = len; | 1519 | bio->bi_hw_front_size = len; |
1519 | if (req->nr_hw_segments == 1) | 1520 | if (req->nr_hw_segments == 1) |
1520 | req->biotail->bi_hw_back_size = len; | 1521 | req->biotail->bi_hw_back_size = len; |
1521 | } | 1522 | } |
1522 | return mergeable; | 1523 | return mergeable; |
1523 | } | 1524 | } |
1524 | 1525 | ||
1525 | return ll_new_hw_segment(q, req, bio); | 1526 | return ll_new_hw_segment(q, req, bio); |
1526 | } | 1527 | } |
1527 | 1528 | ||
1528 | static int ll_merge_requests_fn(request_queue_t *q, struct request *req, | 1529 | static int ll_merge_requests_fn(request_queue_t *q, struct request *req, |
1529 | struct request *next) | 1530 | struct request *next) |
1530 | { | 1531 | { |
1531 | int total_phys_segments; | 1532 | int total_phys_segments; |
1532 | int total_hw_segments; | 1533 | int total_hw_segments; |
1533 | 1534 | ||
1534 | /* | 1535 | /* |
1535 | * First check if the either of the requests are re-queued | 1536 | * First check if the either of the requests are re-queued |
1536 | * requests. Can't merge them if they are. | 1537 | * requests. Can't merge them if they are. |
1537 | */ | 1538 | */ |
1538 | if (req->special || next->special) | 1539 | if (req->special || next->special) |
1539 | return 0; | 1540 | return 0; |
1540 | 1541 | ||
1541 | /* | 1542 | /* |
1542 | * Will it become too large? | 1543 | * Will it become too large? |
1543 | */ | 1544 | */ |
1544 | if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) | 1545 | if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) |
1545 | return 0; | 1546 | return 0; |
1546 | 1547 | ||
1547 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; | 1548 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; |
1548 | if (blk_phys_contig_segment(q, req->biotail, next->bio)) | 1549 | if (blk_phys_contig_segment(q, req->biotail, next->bio)) |
1549 | total_phys_segments--; | 1550 | total_phys_segments--; |
1550 | 1551 | ||
1551 | if (total_phys_segments > q->max_phys_segments) | 1552 | if (total_phys_segments > q->max_phys_segments) |
1552 | return 0; | 1553 | return 0; |
1553 | 1554 | ||
1554 | total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; | 1555 | total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; |
1555 | if (blk_hw_contig_segment(q, req->biotail, next->bio)) { | 1556 | if (blk_hw_contig_segment(q, req->biotail, next->bio)) { |
1556 | int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; | 1557 | int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; |
1557 | /* | 1558 | /* |
1558 | * propagate the combined length to the end of the requests | 1559 | * propagate the combined length to the end of the requests |
1559 | */ | 1560 | */ |
1560 | if (req->nr_hw_segments == 1) | 1561 | if (req->nr_hw_segments == 1) |
1561 | req->bio->bi_hw_front_size = len; | 1562 | req->bio->bi_hw_front_size = len; |
1562 | if (next->nr_hw_segments == 1) | 1563 | if (next->nr_hw_segments == 1) |
1563 | next->biotail->bi_hw_back_size = len; | 1564 | next->biotail->bi_hw_back_size = len; |
1564 | total_hw_segments--; | 1565 | total_hw_segments--; |
1565 | } | 1566 | } |
1566 | 1567 | ||
1567 | if (total_hw_segments > q->max_hw_segments) | 1568 | if (total_hw_segments > q->max_hw_segments) |
1568 | return 0; | 1569 | return 0; |
1569 | 1570 | ||
1570 | /* Merge is OK... */ | 1571 | /* Merge is OK... */ |
1571 | req->nr_phys_segments = total_phys_segments; | 1572 | req->nr_phys_segments = total_phys_segments; |
1572 | req->nr_hw_segments = total_hw_segments; | 1573 | req->nr_hw_segments = total_hw_segments; |
1573 | return 1; | 1574 | return 1; |
1574 | } | 1575 | } |
1575 | 1576 | ||
1576 | /* | 1577 | /* |
1577 | * "plug" the device if there are no outstanding requests: this will | 1578 | * "plug" the device if there are no outstanding requests: this will |
1578 | * force the transfer to start only after we have put all the requests | 1579 | * force the transfer to start only after we have put all the requests |
1579 | * on the list. | 1580 | * on the list. |
1580 | * | 1581 | * |
1581 | * This is called with interrupts off and no requests on the queue and | 1582 | * This is called with interrupts off and no requests on the queue and |
1582 | * with the queue lock held. | 1583 | * with the queue lock held. |
1583 | */ | 1584 | */ |
1584 | void blk_plug_device(request_queue_t *q) | 1585 | void blk_plug_device(request_queue_t *q) |
1585 | { | 1586 | { |
1586 | WARN_ON(!irqs_disabled()); | 1587 | WARN_ON(!irqs_disabled()); |
1587 | 1588 | ||
1588 | /* | 1589 | /* |
1589 | * don't plug a stopped queue, it must be paired with blk_start_queue() | 1590 | * don't plug a stopped queue, it must be paired with blk_start_queue() |
1590 | * which will restart the queueing | 1591 | * which will restart the queueing |
1591 | */ | 1592 | */ |
1592 | if (blk_queue_stopped(q)) | 1593 | if (blk_queue_stopped(q)) |
1593 | return; | 1594 | return; |
1594 | 1595 | ||
1595 | if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { | 1596 | if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { |
1596 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); | 1597 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); |
1597 | blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); | 1598 | blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); |
1598 | } | 1599 | } |
1599 | } | 1600 | } |
1600 | 1601 | ||
1601 | EXPORT_SYMBOL(blk_plug_device); | 1602 | EXPORT_SYMBOL(blk_plug_device); |
1602 | 1603 | ||
1603 | /* | 1604 | /* |
1604 | * remove the queue from the plugged list, if present. called with | 1605 | * remove the queue from the plugged list, if present. called with |
1605 | * queue lock held and interrupts disabled. | 1606 | * queue lock held and interrupts disabled. |
1606 | */ | 1607 | */ |
1607 | int blk_remove_plug(request_queue_t *q) | 1608 | int blk_remove_plug(request_queue_t *q) |
1608 | { | 1609 | { |
1609 | WARN_ON(!irqs_disabled()); | 1610 | WARN_ON(!irqs_disabled()); |
1610 | 1611 | ||
1611 | if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) | 1612 | if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) |
1612 | return 0; | 1613 | return 0; |
1613 | 1614 | ||
1614 | del_timer(&q->unplug_timer); | 1615 | del_timer(&q->unplug_timer); |
1615 | return 1; | 1616 | return 1; |
1616 | } | 1617 | } |
1617 | 1618 | ||
1618 | EXPORT_SYMBOL(blk_remove_plug); | 1619 | EXPORT_SYMBOL(blk_remove_plug); |
1619 | 1620 | ||
1620 | /* | 1621 | /* |
1621 | * remove the plug and let it rip.. | 1622 | * remove the plug and let it rip.. |
1622 | */ | 1623 | */ |
1623 | void __generic_unplug_device(request_queue_t *q) | 1624 | void __generic_unplug_device(request_queue_t *q) |
1624 | { | 1625 | { |
1625 | if (unlikely(blk_queue_stopped(q))) | 1626 | if (unlikely(blk_queue_stopped(q))) |
1626 | return; | 1627 | return; |
1627 | 1628 | ||
1628 | if (!blk_remove_plug(q)) | 1629 | if (!blk_remove_plug(q)) |
1629 | return; | 1630 | return; |
1630 | 1631 | ||
1631 | q->request_fn(q); | 1632 | q->request_fn(q); |
1632 | } | 1633 | } |
1633 | EXPORT_SYMBOL(__generic_unplug_device); | 1634 | EXPORT_SYMBOL(__generic_unplug_device); |
1634 | 1635 | ||
1635 | /** | 1636 | /** |
1636 | * generic_unplug_device - fire a request queue | 1637 | * generic_unplug_device - fire a request queue |
1637 | * @q: The &request_queue_t in question | 1638 | * @q: The &request_queue_t in question |
1638 | * | 1639 | * |
1639 | * Description: | 1640 | * Description: |
1640 | * Linux uses plugging to build bigger requests queues before letting | 1641 | * Linux uses plugging to build bigger requests queues before letting |
1641 | * the device have at them. If a queue is plugged, the I/O scheduler | 1642 | * the device have at them. If a queue is plugged, the I/O scheduler |
1642 | * is still adding and merging requests on the queue. Once the queue | 1643 | * is still adding and merging requests on the queue. Once the queue |
1643 | * gets unplugged, the request_fn defined for the queue is invoked and | 1644 | * gets unplugged, the request_fn defined for the queue is invoked and |
1644 | * transfers started. | 1645 | * transfers started. |
1645 | **/ | 1646 | **/ |
1646 | void generic_unplug_device(request_queue_t *q) | 1647 | void generic_unplug_device(request_queue_t *q) |
1647 | { | 1648 | { |
1648 | spin_lock_irq(q->queue_lock); | 1649 | spin_lock_irq(q->queue_lock); |
1649 | __generic_unplug_device(q); | 1650 | __generic_unplug_device(q); |
1650 | spin_unlock_irq(q->queue_lock); | 1651 | spin_unlock_irq(q->queue_lock); |
1651 | } | 1652 | } |
1652 | EXPORT_SYMBOL(generic_unplug_device); | 1653 | EXPORT_SYMBOL(generic_unplug_device); |
1653 | 1654 | ||
1654 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, | 1655 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, |
1655 | struct page *page) | 1656 | struct page *page) |
1656 | { | 1657 | { |
1657 | request_queue_t *q = bdi->unplug_io_data; | 1658 | request_queue_t *q = bdi->unplug_io_data; |
1658 | 1659 | ||
1659 | /* | 1660 | /* |
1660 | * devices don't necessarily have an ->unplug_fn defined | 1661 | * devices don't necessarily have an ->unplug_fn defined |
1661 | */ | 1662 | */ |
1662 | if (q->unplug_fn) { | 1663 | if (q->unplug_fn) { |
1663 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, | 1664 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, |
1664 | q->rq.count[READ] + q->rq.count[WRITE]); | 1665 | q->rq.count[READ] + q->rq.count[WRITE]); |
1665 | 1666 | ||
1666 | q->unplug_fn(q); | 1667 | q->unplug_fn(q); |
1667 | } | 1668 | } |
1668 | } | 1669 | } |
1669 | 1670 | ||
1670 | static void blk_unplug_work(void *data) | 1671 | static void blk_unplug_work(void *data) |
1671 | { | 1672 | { |
1672 | request_queue_t *q = data; | 1673 | request_queue_t *q = data; |
1673 | 1674 | ||
1674 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, | 1675 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, |
1675 | q->rq.count[READ] + q->rq.count[WRITE]); | 1676 | q->rq.count[READ] + q->rq.count[WRITE]); |
1676 | 1677 | ||
1677 | q->unplug_fn(q); | 1678 | q->unplug_fn(q); |
1678 | } | 1679 | } |
1679 | 1680 | ||
1680 | static void blk_unplug_timeout(unsigned long data) | 1681 | static void blk_unplug_timeout(unsigned long data) |
1681 | { | 1682 | { |
1682 | request_queue_t *q = (request_queue_t *)data; | 1683 | request_queue_t *q = (request_queue_t *)data; |
1683 | 1684 | ||
1684 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, | 1685 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, |
1685 | q->rq.count[READ] + q->rq.count[WRITE]); | 1686 | q->rq.count[READ] + q->rq.count[WRITE]); |
1686 | 1687 | ||
1687 | kblockd_schedule_work(&q->unplug_work); | 1688 | kblockd_schedule_work(&q->unplug_work); |
1688 | } | 1689 | } |
1689 | 1690 | ||
1690 | /** | 1691 | /** |
1691 | * blk_start_queue - restart a previously stopped queue | 1692 | * blk_start_queue - restart a previously stopped queue |
1692 | * @q: The &request_queue_t in question | 1693 | * @q: The &request_queue_t in question |
1693 | * | 1694 | * |
1694 | * Description: | 1695 | * Description: |
1695 | * blk_start_queue() will clear the stop flag on the queue, and call | 1696 | * blk_start_queue() will clear the stop flag on the queue, and call |
1696 | * the request_fn for the queue if it was in a stopped state when | 1697 | * the request_fn for the queue if it was in a stopped state when |
1697 | * entered. Also see blk_stop_queue(). Queue lock must be held. | 1698 | * entered. Also see blk_stop_queue(). Queue lock must be held. |
1698 | **/ | 1699 | **/ |
1699 | void blk_start_queue(request_queue_t *q) | 1700 | void blk_start_queue(request_queue_t *q) |
1700 | { | 1701 | { |
1701 | WARN_ON(!irqs_disabled()); | 1702 | WARN_ON(!irqs_disabled()); |
1702 | 1703 | ||
1703 | clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); | 1704 | clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); |
1704 | 1705 | ||
1705 | /* | 1706 | /* |
1706 | * one level of recursion is ok and is much faster than kicking | 1707 | * one level of recursion is ok and is much faster than kicking |
1707 | * the unplug handling | 1708 | * the unplug handling |
1708 | */ | 1709 | */ |
1709 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { | 1710 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { |
1710 | q->request_fn(q); | 1711 | q->request_fn(q); |
1711 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); | 1712 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); |
1712 | } else { | 1713 | } else { |
1713 | blk_plug_device(q); | 1714 | blk_plug_device(q); |
1714 | kblockd_schedule_work(&q->unplug_work); | 1715 | kblockd_schedule_work(&q->unplug_work); |
1715 | } | 1716 | } |
1716 | } | 1717 | } |
1717 | 1718 | ||
1718 | EXPORT_SYMBOL(blk_start_queue); | 1719 | EXPORT_SYMBOL(blk_start_queue); |
1719 | 1720 | ||
1720 | /** | 1721 | /** |
1721 | * blk_stop_queue - stop a queue | 1722 | * blk_stop_queue - stop a queue |
1722 | * @q: The &request_queue_t in question | 1723 | * @q: The &request_queue_t in question |
1723 | * | 1724 | * |
1724 | * Description: | 1725 | * Description: |
1725 | * The Linux block layer assumes that a block driver will consume all | 1726 | * The Linux block layer assumes that a block driver will consume all |
1726 | * entries on the request queue when the request_fn strategy is called. | 1727 | * entries on the request queue when the request_fn strategy is called. |
1727 | * Often this will not happen, because of hardware limitations (queue | 1728 | * Often this will not happen, because of hardware limitations (queue |
1728 | * depth settings). If a device driver gets a 'queue full' response, | 1729 | * depth settings). If a device driver gets a 'queue full' response, |
1729 | * or if it simply chooses not to queue more I/O at one point, it can | 1730 | * or if it simply chooses not to queue more I/O at one point, it can |
1730 | * call this function to prevent the request_fn from being called until | 1731 | * call this function to prevent the request_fn from being called until |
1731 | * the driver has signalled it's ready to go again. This happens by calling | 1732 | * the driver has signalled it's ready to go again. This happens by calling |
1732 | * blk_start_queue() to restart queue operations. Queue lock must be held. | 1733 | * blk_start_queue() to restart queue operations. Queue lock must be held. |
1733 | **/ | 1734 | **/ |
1734 | void blk_stop_queue(request_queue_t *q) | 1735 | void blk_stop_queue(request_queue_t *q) |
1735 | { | 1736 | { |
1736 | blk_remove_plug(q); | 1737 | blk_remove_plug(q); |
1737 | set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); | 1738 | set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); |
1738 | } | 1739 | } |
1739 | EXPORT_SYMBOL(blk_stop_queue); | 1740 | EXPORT_SYMBOL(blk_stop_queue); |
1740 | 1741 | ||
1741 | /** | 1742 | /** |
1742 | * blk_sync_queue - cancel any pending callbacks on a queue | 1743 | * blk_sync_queue - cancel any pending callbacks on a queue |
1743 | * @q: the queue | 1744 | * @q: the queue |
1744 | * | 1745 | * |
1745 | * Description: | 1746 | * Description: |
1746 | * The block layer may perform asynchronous callback activity | 1747 | * The block layer may perform asynchronous callback activity |
1747 | * on a queue, such as calling the unplug function after a timeout. | 1748 | * on a queue, such as calling the unplug function after a timeout. |
1748 | * A block device may call blk_sync_queue to ensure that any | 1749 | * A block device may call blk_sync_queue to ensure that any |
1749 | * such activity is cancelled, thus allowing it to release resources | 1750 | * such activity is cancelled, thus allowing it to release resources |
1750 | * the the callbacks might use. The caller must already have made sure | 1751 | * the the callbacks might use. The caller must already have made sure |
1751 | * that its ->make_request_fn will not re-add plugging prior to calling | 1752 | * that its ->make_request_fn will not re-add plugging prior to calling |
1752 | * this function. | 1753 | * this function. |
1753 | * | 1754 | * |
1754 | */ | 1755 | */ |
1755 | void blk_sync_queue(struct request_queue *q) | 1756 | void blk_sync_queue(struct request_queue *q) |
1756 | { | 1757 | { |
1757 | del_timer_sync(&q->unplug_timer); | 1758 | del_timer_sync(&q->unplug_timer); |
1758 | kblockd_flush(); | 1759 | kblockd_flush(); |
1759 | } | 1760 | } |
1760 | EXPORT_SYMBOL(blk_sync_queue); | 1761 | EXPORT_SYMBOL(blk_sync_queue); |
1761 | 1762 | ||
1762 | /** | 1763 | /** |
1763 | * blk_run_queue - run a single device queue | 1764 | * blk_run_queue - run a single device queue |
1764 | * @q: The queue to run | 1765 | * @q: The queue to run |
1765 | */ | 1766 | */ |
1766 | void blk_run_queue(struct request_queue *q) | 1767 | void blk_run_queue(struct request_queue *q) |
1767 | { | 1768 | { |
1768 | unsigned long flags; | 1769 | unsigned long flags; |
1769 | 1770 | ||
1770 | spin_lock_irqsave(q->queue_lock, flags); | 1771 | spin_lock_irqsave(q->queue_lock, flags); |
1771 | blk_remove_plug(q); | 1772 | blk_remove_plug(q); |
1772 | 1773 | ||
1773 | /* | 1774 | /* |
1774 | * Only recurse once to avoid overrunning the stack, let the unplug | 1775 | * Only recurse once to avoid overrunning the stack, let the unplug |
1775 | * handling reinvoke the handler shortly if we already got there. | 1776 | * handling reinvoke the handler shortly if we already got there. |
1776 | */ | 1777 | */ |
1777 | if (!elv_queue_empty(q)) { | 1778 | if (!elv_queue_empty(q)) { |
1778 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { | 1779 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { |
1779 | q->request_fn(q); | 1780 | q->request_fn(q); |
1780 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); | 1781 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); |
1781 | } else { | 1782 | } else { |
1782 | blk_plug_device(q); | 1783 | blk_plug_device(q); |
1783 | kblockd_schedule_work(&q->unplug_work); | 1784 | kblockd_schedule_work(&q->unplug_work); |
1784 | } | 1785 | } |
1785 | } | 1786 | } |
1786 | 1787 | ||
1787 | spin_unlock_irqrestore(q->queue_lock, flags); | 1788 | spin_unlock_irqrestore(q->queue_lock, flags); |
1788 | } | 1789 | } |
1789 | EXPORT_SYMBOL(blk_run_queue); | 1790 | EXPORT_SYMBOL(blk_run_queue); |
1790 | 1791 | ||
1791 | /** | 1792 | /** |
1792 | * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed | 1793 | * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed |
1793 | * @kobj: the kobj belonging of the request queue to be released | 1794 | * @kobj: the kobj belonging of the request queue to be released |
1794 | * | 1795 | * |
1795 | * Description: | 1796 | * Description: |
1796 | * blk_cleanup_queue is the pair to blk_init_queue() or | 1797 | * blk_cleanup_queue is the pair to blk_init_queue() or |
1797 | * blk_queue_make_request(). It should be called when a request queue is | 1798 | * blk_queue_make_request(). It should be called when a request queue is |
1798 | * being released; typically when a block device is being de-registered. | 1799 | * being released; typically when a block device is being de-registered. |
1799 | * Currently, its primary task it to free all the &struct request | 1800 | * Currently, its primary task it to free all the &struct request |
1800 | * structures that were allocated to the queue and the queue itself. | 1801 | * structures that were allocated to the queue and the queue itself. |
1801 | * | 1802 | * |
1802 | * Caveat: | 1803 | * Caveat: |
1803 | * Hopefully the low level driver will have finished any | 1804 | * Hopefully the low level driver will have finished any |
1804 | * outstanding requests first... | 1805 | * outstanding requests first... |
1805 | **/ | 1806 | **/ |
1806 | static void blk_release_queue(struct kobject *kobj) | 1807 | static void blk_release_queue(struct kobject *kobj) |
1807 | { | 1808 | { |
1808 | request_queue_t *q = container_of(kobj, struct request_queue, kobj); | 1809 | request_queue_t *q = container_of(kobj, struct request_queue, kobj); |
1809 | struct request_list *rl = &q->rq; | 1810 | struct request_list *rl = &q->rq; |
1810 | 1811 | ||
1811 | blk_sync_queue(q); | 1812 | blk_sync_queue(q); |
1812 | 1813 | ||
1813 | if (rl->rq_pool) | 1814 | if (rl->rq_pool) |
1814 | mempool_destroy(rl->rq_pool); | 1815 | mempool_destroy(rl->rq_pool); |
1815 | 1816 | ||
1816 | if (q->queue_tags) | 1817 | if (q->queue_tags) |
1817 | __blk_queue_free_tags(q); | 1818 | __blk_queue_free_tags(q); |
1818 | 1819 | ||
1819 | blk_trace_shutdown(q); | 1820 | blk_trace_shutdown(q); |
1820 | 1821 | ||
1821 | kmem_cache_free(requestq_cachep, q); | 1822 | kmem_cache_free(requestq_cachep, q); |
1822 | } | 1823 | } |
1823 | 1824 | ||
1824 | void blk_put_queue(request_queue_t *q) | 1825 | void blk_put_queue(request_queue_t *q) |
1825 | { | 1826 | { |
1826 | kobject_put(&q->kobj); | 1827 | kobject_put(&q->kobj); |
1827 | } | 1828 | } |
1828 | EXPORT_SYMBOL(blk_put_queue); | 1829 | EXPORT_SYMBOL(blk_put_queue); |
1829 | 1830 | ||
1830 | void blk_cleanup_queue(request_queue_t * q) | 1831 | void blk_cleanup_queue(request_queue_t * q) |
1831 | { | 1832 | { |
1832 | mutex_lock(&q->sysfs_lock); | 1833 | mutex_lock(&q->sysfs_lock); |
1833 | set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); | 1834 | set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); |
1834 | mutex_unlock(&q->sysfs_lock); | 1835 | mutex_unlock(&q->sysfs_lock); |
1835 | 1836 | ||
1836 | if (q->elevator) | 1837 | if (q->elevator) |
1837 | elevator_exit(q->elevator); | 1838 | elevator_exit(q->elevator); |
1838 | 1839 | ||
1839 | blk_put_queue(q); | 1840 | blk_put_queue(q); |
1840 | } | 1841 | } |
1841 | 1842 | ||
1842 | EXPORT_SYMBOL(blk_cleanup_queue); | 1843 | EXPORT_SYMBOL(blk_cleanup_queue); |
1843 | 1844 | ||
1844 | static int blk_init_free_list(request_queue_t *q) | 1845 | static int blk_init_free_list(request_queue_t *q) |
1845 | { | 1846 | { |
1846 | struct request_list *rl = &q->rq; | 1847 | struct request_list *rl = &q->rq; |
1847 | 1848 | ||
1848 | rl->count[READ] = rl->count[WRITE] = 0; | 1849 | rl->count[READ] = rl->count[WRITE] = 0; |
1849 | rl->starved[READ] = rl->starved[WRITE] = 0; | 1850 | rl->starved[READ] = rl->starved[WRITE] = 0; |
1850 | rl->elvpriv = 0; | 1851 | rl->elvpriv = 0; |
1851 | init_waitqueue_head(&rl->wait[READ]); | 1852 | init_waitqueue_head(&rl->wait[READ]); |
1852 | init_waitqueue_head(&rl->wait[WRITE]); | 1853 | init_waitqueue_head(&rl->wait[WRITE]); |
1853 | 1854 | ||
1854 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 1855 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
1855 | mempool_free_slab, request_cachep, q->node); | 1856 | mempool_free_slab, request_cachep, q->node); |
1856 | 1857 | ||
1857 | if (!rl->rq_pool) | 1858 | if (!rl->rq_pool) |
1858 | return -ENOMEM; | 1859 | return -ENOMEM; |
1859 | 1860 | ||
1860 | return 0; | 1861 | return 0; |
1861 | } | 1862 | } |
1862 | 1863 | ||
1863 | request_queue_t *blk_alloc_queue(gfp_t gfp_mask) | 1864 | request_queue_t *blk_alloc_queue(gfp_t gfp_mask) |
1864 | { | 1865 | { |
1865 | return blk_alloc_queue_node(gfp_mask, -1); | 1866 | return blk_alloc_queue_node(gfp_mask, -1); |
1866 | } | 1867 | } |
1867 | EXPORT_SYMBOL(blk_alloc_queue); | 1868 | EXPORT_SYMBOL(blk_alloc_queue); |
1868 | 1869 | ||
1869 | static struct kobj_type queue_ktype; | 1870 | static struct kobj_type queue_ktype; |
1870 | 1871 | ||
1871 | request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | 1872 | request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) |
1872 | { | 1873 | { |
1873 | request_queue_t *q; | 1874 | request_queue_t *q; |
1874 | 1875 | ||
1875 | q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id); | 1876 | q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id); |
1876 | if (!q) | 1877 | if (!q) |
1877 | return NULL; | 1878 | return NULL; |
1878 | 1879 | ||
1879 | memset(q, 0, sizeof(*q)); | 1880 | memset(q, 0, sizeof(*q)); |
1880 | init_timer(&q->unplug_timer); | 1881 | init_timer(&q->unplug_timer); |
1881 | 1882 | ||
1882 | snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); | 1883 | snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); |
1883 | q->kobj.ktype = &queue_ktype; | 1884 | q->kobj.ktype = &queue_ktype; |
1884 | kobject_init(&q->kobj); | 1885 | kobject_init(&q->kobj); |
1885 | 1886 | ||
1886 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; | 1887 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; |
1887 | q->backing_dev_info.unplug_io_data = q; | 1888 | q->backing_dev_info.unplug_io_data = q; |
1888 | 1889 | ||
1889 | mutex_init(&q->sysfs_lock); | 1890 | mutex_init(&q->sysfs_lock); |
1890 | 1891 | ||
1891 | return q; | 1892 | return q; |
1892 | } | 1893 | } |
1893 | EXPORT_SYMBOL(blk_alloc_queue_node); | 1894 | EXPORT_SYMBOL(blk_alloc_queue_node); |
1894 | 1895 | ||
1895 | /** | 1896 | /** |
1896 | * blk_init_queue - prepare a request queue for use with a block device | 1897 | * blk_init_queue - prepare a request queue for use with a block device |
1897 | * @rfn: The function to be called to process requests that have been | 1898 | * @rfn: The function to be called to process requests that have been |
1898 | * placed on the queue. | 1899 | * placed on the queue. |
1899 | * @lock: Request queue spin lock | 1900 | * @lock: Request queue spin lock |
1900 | * | 1901 | * |
1901 | * Description: | 1902 | * Description: |
1902 | * If a block device wishes to use the standard request handling procedures, | 1903 | * If a block device wishes to use the standard request handling procedures, |
1903 | * which sorts requests and coalesces adjacent requests, then it must | 1904 | * which sorts requests and coalesces adjacent requests, then it must |
1904 | * call blk_init_queue(). The function @rfn will be called when there | 1905 | * call blk_init_queue(). The function @rfn will be called when there |
1905 | * are requests on the queue that need to be processed. If the device | 1906 | * are requests on the queue that need to be processed. If the device |
1906 | * supports plugging, then @rfn may not be called immediately when requests | 1907 | * supports plugging, then @rfn may not be called immediately when requests |
1907 | * are available on the queue, but may be called at some time later instead. | 1908 | * are available on the queue, but may be called at some time later instead. |
1908 | * Plugged queues are generally unplugged when a buffer belonging to one | 1909 | * Plugged queues are generally unplugged when a buffer belonging to one |
1909 | * of the requests on the queue is needed, or due to memory pressure. | 1910 | * of the requests on the queue is needed, or due to memory pressure. |
1910 | * | 1911 | * |
1911 | * @rfn is not required, or even expected, to remove all requests off the | 1912 | * @rfn is not required, or even expected, to remove all requests off the |
1912 | * queue, but only as many as it can handle at a time. If it does leave | 1913 | * queue, but only as many as it can handle at a time. If it does leave |
1913 | * requests on the queue, it is responsible for arranging that the requests | 1914 | * requests on the queue, it is responsible for arranging that the requests |
1914 | * get dealt with eventually. | 1915 | * get dealt with eventually. |
1915 | * | 1916 | * |
1916 | * The queue spin lock must be held while manipulating the requests on the | 1917 | * The queue spin lock must be held while manipulating the requests on the |
1917 | * request queue; this lock will be taken also from interrupt context, so irq | 1918 | * request queue; this lock will be taken also from interrupt context, so irq |
1918 | * disabling is needed for it. | 1919 | * disabling is needed for it. |
1919 | * | 1920 | * |
1920 | * Function returns a pointer to the initialized request queue, or NULL if | 1921 | * Function returns a pointer to the initialized request queue, or NULL if |
1921 | * it didn't succeed. | 1922 | * it didn't succeed. |
1922 | * | 1923 | * |
1923 | * Note: | 1924 | * Note: |
1924 | * blk_init_queue() must be paired with a blk_cleanup_queue() call | 1925 | * blk_init_queue() must be paired with a blk_cleanup_queue() call |
1925 | * when the block device is deactivated (such as at module unload). | 1926 | * when the block device is deactivated (such as at module unload). |
1926 | **/ | 1927 | **/ |
1927 | 1928 | ||
1928 | request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) | 1929 | request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) |
1929 | { | 1930 | { |
1930 | return blk_init_queue_node(rfn, lock, -1); | 1931 | return blk_init_queue_node(rfn, lock, -1); |
1931 | } | 1932 | } |
1932 | EXPORT_SYMBOL(blk_init_queue); | 1933 | EXPORT_SYMBOL(blk_init_queue); |
1933 | 1934 | ||
1934 | request_queue_t * | 1935 | request_queue_t * |
1935 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | 1936 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) |
1936 | { | 1937 | { |
1937 | request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id); | 1938 | request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id); |
1938 | 1939 | ||
1939 | if (!q) | 1940 | if (!q) |
1940 | return NULL; | 1941 | return NULL; |
1941 | 1942 | ||
1942 | q->node = node_id; | 1943 | q->node = node_id; |
1943 | if (blk_init_free_list(q)) { | 1944 | if (blk_init_free_list(q)) { |
1944 | kmem_cache_free(requestq_cachep, q); | 1945 | kmem_cache_free(requestq_cachep, q); |
1945 | return NULL; | 1946 | return NULL; |
1946 | } | 1947 | } |
1947 | 1948 | ||
1948 | /* | 1949 | /* |
1949 | * if caller didn't supply a lock, they get per-queue locking with | 1950 | * if caller didn't supply a lock, they get per-queue locking with |
1950 | * our embedded lock | 1951 | * our embedded lock |
1951 | */ | 1952 | */ |
1952 | if (!lock) { | 1953 | if (!lock) { |
1953 | spin_lock_init(&q->__queue_lock); | 1954 | spin_lock_init(&q->__queue_lock); |
1954 | lock = &q->__queue_lock; | 1955 | lock = &q->__queue_lock; |
1955 | } | 1956 | } |
1956 | 1957 | ||
1957 | q->request_fn = rfn; | 1958 | q->request_fn = rfn; |
1958 | q->back_merge_fn = ll_back_merge_fn; | 1959 | q->back_merge_fn = ll_back_merge_fn; |
1959 | q->front_merge_fn = ll_front_merge_fn; | 1960 | q->front_merge_fn = ll_front_merge_fn; |
1960 | q->merge_requests_fn = ll_merge_requests_fn; | 1961 | q->merge_requests_fn = ll_merge_requests_fn; |
1961 | q->prep_rq_fn = NULL; | 1962 | q->prep_rq_fn = NULL; |
1962 | q->unplug_fn = generic_unplug_device; | 1963 | q->unplug_fn = generic_unplug_device; |
1963 | q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); | 1964 | q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); |
1964 | q->queue_lock = lock; | 1965 | q->queue_lock = lock; |
1965 | 1966 | ||
1966 | blk_queue_segment_boundary(q, 0xffffffff); | 1967 | blk_queue_segment_boundary(q, 0xffffffff); |
1967 | 1968 | ||
1968 | blk_queue_make_request(q, __make_request); | 1969 | blk_queue_make_request(q, __make_request); |
1969 | blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); | 1970 | blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); |
1970 | 1971 | ||
1971 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); | 1972 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); |
1972 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); | 1973 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); |
1973 | 1974 | ||
1974 | /* | 1975 | /* |
1975 | * all done | 1976 | * all done |
1976 | */ | 1977 | */ |
1977 | if (!elevator_init(q, NULL)) { | 1978 | if (!elevator_init(q, NULL)) { |
1978 | blk_queue_congestion_threshold(q); | 1979 | blk_queue_congestion_threshold(q); |
1979 | return q; | 1980 | return q; |
1980 | } | 1981 | } |
1981 | 1982 | ||
1982 | blk_put_queue(q); | 1983 | blk_put_queue(q); |
1983 | return NULL; | 1984 | return NULL; |
1984 | } | 1985 | } |
1985 | EXPORT_SYMBOL(blk_init_queue_node); | 1986 | EXPORT_SYMBOL(blk_init_queue_node); |
1986 | 1987 | ||
1987 | int blk_get_queue(request_queue_t *q) | 1988 | int blk_get_queue(request_queue_t *q) |
1988 | { | 1989 | { |
1989 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | 1990 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { |
1990 | kobject_get(&q->kobj); | 1991 | kobject_get(&q->kobj); |
1991 | return 0; | 1992 | return 0; |
1992 | } | 1993 | } |
1993 | 1994 | ||
1994 | return 1; | 1995 | return 1; |
1995 | } | 1996 | } |
1996 | 1997 | ||
1997 | EXPORT_SYMBOL(blk_get_queue); | 1998 | EXPORT_SYMBOL(blk_get_queue); |
1998 | 1999 | ||
1999 | static inline void blk_free_request(request_queue_t *q, struct request *rq) | 2000 | static inline void blk_free_request(request_queue_t *q, struct request *rq) |
2000 | { | 2001 | { |
2001 | if (rq->cmd_flags & REQ_ELVPRIV) | 2002 | if (rq->cmd_flags & REQ_ELVPRIV) |
2002 | elv_put_request(q, rq); | 2003 | elv_put_request(q, rq); |
2003 | mempool_free(rq, q->rq.rq_pool); | 2004 | mempool_free(rq, q->rq.rq_pool); |
2004 | } | 2005 | } |
2005 | 2006 | ||
2006 | static inline struct request * | 2007 | static inline struct request * |
2007 | blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, | 2008 | blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, |
2008 | int priv, gfp_t gfp_mask) | 2009 | int priv, gfp_t gfp_mask) |
2009 | { | 2010 | { |
2010 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 2011 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
2011 | 2012 | ||
2012 | if (!rq) | 2013 | if (!rq) |
2013 | return NULL; | 2014 | return NULL; |
2014 | 2015 | ||
2015 | /* | 2016 | /* |
2016 | * first three bits are identical in rq->cmd_flags and bio->bi_rw, | 2017 | * first three bits are identical in rq->cmd_flags and bio->bi_rw, |
2017 | * see bio.h and blkdev.h | 2018 | * see bio.h and blkdev.h |
2018 | */ | 2019 | */ |
2019 | rq->cmd_flags = rw; | 2020 | rq->cmd_flags = rw; |
2020 | 2021 | ||
2021 | if (priv) { | 2022 | if (priv) { |
2022 | if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { | 2023 | if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { |
2023 | mempool_free(rq, q->rq.rq_pool); | 2024 | mempool_free(rq, q->rq.rq_pool); |
2024 | return NULL; | 2025 | return NULL; |
2025 | } | 2026 | } |
2026 | rq->cmd_flags |= REQ_ELVPRIV; | 2027 | rq->cmd_flags |= REQ_ELVPRIV; |
2027 | } | 2028 | } |
2028 | 2029 | ||
2029 | return rq; | 2030 | return rq; |
2030 | } | 2031 | } |
2031 | 2032 | ||
2032 | /* | 2033 | /* |
2033 | * ioc_batching returns true if the ioc is a valid batching request and | 2034 | * ioc_batching returns true if the ioc is a valid batching request and |
2034 | * should be given priority access to a request. | 2035 | * should be given priority access to a request. |
2035 | */ | 2036 | */ |
2036 | static inline int ioc_batching(request_queue_t *q, struct io_context *ioc) | 2037 | static inline int ioc_batching(request_queue_t *q, struct io_context *ioc) |
2037 | { | 2038 | { |
2038 | if (!ioc) | 2039 | if (!ioc) |
2039 | return 0; | 2040 | return 0; |
2040 | 2041 | ||
2041 | /* | 2042 | /* |
2042 | * Make sure the process is able to allocate at least 1 request | 2043 | * Make sure the process is able to allocate at least 1 request |
2043 | * even if the batch times out, otherwise we could theoretically | 2044 | * even if the batch times out, otherwise we could theoretically |
2044 | * lose wakeups. | 2045 | * lose wakeups. |
2045 | */ | 2046 | */ |
2046 | return ioc->nr_batch_requests == q->nr_batching || | 2047 | return ioc->nr_batch_requests == q->nr_batching || |
2047 | (ioc->nr_batch_requests > 0 | 2048 | (ioc->nr_batch_requests > 0 |
2048 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); | 2049 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); |
2049 | } | 2050 | } |
2050 | 2051 | ||
2051 | /* | 2052 | /* |
2052 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This | 2053 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This |
2053 | * will cause the process to be a "batcher" on all queues in the system. This | 2054 | * will cause the process to be a "batcher" on all queues in the system. This |
2054 | * is the behaviour we want though - once it gets a wakeup it should be given | 2055 | * is the behaviour we want though - once it gets a wakeup it should be given |
2055 | * a nice run. | 2056 | * a nice run. |
2056 | */ | 2057 | */ |
2057 | static void ioc_set_batching(request_queue_t *q, struct io_context *ioc) | 2058 | static void ioc_set_batching(request_queue_t *q, struct io_context *ioc) |
2058 | { | 2059 | { |
2059 | if (!ioc || ioc_batching(q, ioc)) | 2060 | if (!ioc || ioc_batching(q, ioc)) |
2060 | return; | 2061 | return; |
2061 | 2062 | ||
2062 | ioc->nr_batch_requests = q->nr_batching; | 2063 | ioc->nr_batch_requests = q->nr_batching; |
2063 | ioc->last_waited = jiffies; | 2064 | ioc->last_waited = jiffies; |
2064 | } | 2065 | } |
2065 | 2066 | ||
2066 | static void __freed_request(request_queue_t *q, int rw) | 2067 | static void __freed_request(request_queue_t *q, int rw) |
2067 | { | 2068 | { |
2068 | struct request_list *rl = &q->rq; | 2069 | struct request_list *rl = &q->rq; |
2069 | 2070 | ||
2070 | if (rl->count[rw] < queue_congestion_off_threshold(q)) | 2071 | if (rl->count[rw] < queue_congestion_off_threshold(q)) |
2071 | clear_queue_congested(q, rw); | 2072 | clear_queue_congested(q, rw); |
2072 | 2073 | ||
2073 | if (rl->count[rw] + 1 <= q->nr_requests) { | 2074 | if (rl->count[rw] + 1 <= q->nr_requests) { |
2074 | if (waitqueue_active(&rl->wait[rw])) | 2075 | if (waitqueue_active(&rl->wait[rw])) |
2075 | wake_up(&rl->wait[rw]); | 2076 | wake_up(&rl->wait[rw]); |
2076 | 2077 | ||
2077 | blk_clear_queue_full(q, rw); | 2078 | blk_clear_queue_full(q, rw); |
2078 | } | 2079 | } |
2079 | } | 2080 | } |
2080 | 2081 | ||
2081 | /* | 2082 | /* |
2082 | * A request has just been released. Account for it, update the full and | 2083 | * A request has just been released. Account for it, update the full and |
2083 | * congestion status, wake up any waiters. Called under q->queue_lock. | 2084 | * congestion status, wake up any waiters. Called under q->queue_lock. |
2084 | */ | 2085 | */ |
2085 | static void freed_request(request_queue_t *q, int rw, int priv) | 2086 | static void freed_request(request_queue_t *q, int rw, int priv) |
2086 | { | 2087 | { |
2087 | struct request_list *rl = &q->rq; | 2088 | struct request_list *rl = &q->rq; |
2088 | 2089 | ||
2089 | rl->count[rw]--; | 2090 | rl->count[rw]--; |
2090 | if (priv) | 2091 | if (priv) |
2091 | rl->elvpriv--; | 2092 | rl->elvpriv--; |
2092 | 2093 | ||
2093 | __freed_request(q, rw); | 2094 | __freed_request(q, rw); |
2094 | 2095 | ||
2095 | if (unlikely(rl->starved[rw ^ 1])) | 2096 | if (unlikely(rl->starved[rw ^ 1])) |
2096 | __freed_request(q, rw ^ 1); | 2097 | __freed_request(q, rw ^ 1); |
2097 | } | 2098 | } |
2098 | 2099 | ||
2099 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) | 2100 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) |
2100 | /* | 2101 | /* |
2101 | * Get a free request, queue_lock must be held. | 2102 | * Get a free request, queue_lock must be held. |
2102 | * Returns NULL on failure, with queue_lock held. | 2103 | * Returns NULL on failure, with queue_lock held. |
2103 | * Returns !NULL on success, with queue_lock *not held*. | 2104 | * Returns !NULL on success, with queue_lock *not held*. |
2104 | */ | 2105 | */ |
2105 | static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, | 2106 | static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, |
2106 | gfp_t gfp_mask) | 2107 | gfp_t gfp_mask) |
2107 | { | 2108 | { |
2108 | struct request *rq = NULL; | 2109 | struct request *rq = NULL; |
2109 | struct request_list *rl = &q->rq; | 2110 | struct request_list *rl = &q->rq; |
2110 | struct io_context *ioc = NULL; | 2111 | struct io_context *ioc = NULL; |
2111 | int may_queue, priv; | 2112 | int may_queue, priv; |
2112 | 2113 | ||
2113 | may_queue = elv_may_queue(q, rw, bio); | 2114 | may_queue = elv_may_queue(q, rw, bio); |
2114 | if (may_queue == ELV_MQUEUE_NO) | 2115 | if (may_queue == ELV_MQUEUE_NO) |
2115 | goto rq_starved; | 2116 | goto rq_starved; |
2116 | 2117 | ||
2117 | if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { | 2118 | if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { |
2118 | if (rl->count[rw]+1 >= q->nr_requests) { | 2119 | if (rl->count[rw]+1 >= q->nr_requests) { |
2119 | ioc = current_io_context(GFP_ATOMIC); | 2120 | ioc = current_io_context(GFP_ATOMIC); |
2120 | /* | 2121 | /* |
2121 | * The queue will fill after this allocation, so set | 2122 | * The queue will fill after this allocation, so set |
2122 | * it as full, and mark this process as "batching". | 2123 | * it as full, and mark this process as "batching". |
2123 | * This process will be allowed to complete a batch of | 2124 | * This process will be allowed to complete a batch of |
2124 | * requests, others will be blocked. | 2125 | * requests, others will be blocked. |
2125 | */ | 2126 | */ |
2126 | if (!blk_queue_full(q, rw)) { | 2127 | if (!blk_queue_full(q, rw)) { |
2127 | ioc_set_batching(q, ioc); | 2128 | ioc_set_batching(q, ioc); |
2128 | blk_set_queue_full(q, rw); | 2129 | blk_set_queue_full(q, rw); |
2129 | } else { | 2130 | } else { |
2130 | if (may_queue != ELV_MQUEUE_MUST | 2131 | if (may_queue != ELV_MQUEUE_MUST |
2131 | && !ioc_batching(q, ioc)) { | 2132 | && !ioc_batching(q, ioc)) { |
2132 | /* | 2133 | /* |
2133 | * The queue is full and the allocating | 2134 | * The queue is full and the allocating |
2134 | * process is not a "batcher", and not | 2135 | * process is not a "batcher", and not |
2135 | * exempted by the IO scheduler | 2136 | * exempted by the IO scheduler |
2136 | */ | 2137 | */ |
2137 | goto out; | 2138 | goto out; |
2138 | } | 2139 | } |
2139 | } | 2140 | } |
2140 | } | 2141 | } |
2141 | set_queue_congested(q, rw); | 2142 | set_queue_congested(q, rw); |
2142 | } | 2143 | } |
2143 | 2144 | ||
2144 | /* | 2145 | /* |
2145 | * Only allow batching queuers to allocate up to 50% over the defined | 2146 | * Only allow batching queuers to allocate up to 50% over the defined |
2146 | * limit of requests, otherwise we could have thousands of requests | 2147 | * limit of requests, otherwise we could have thousands of requests |
2147 | * allocated with any setting of ->nr_requests | 2148 | * allocated with any setting of ->nr_requests |
2148 | */ | 2149 | */ |
2149 | if (rl->count[rw] >= (3 * q->nr_requests / 2)) | 2150 | if (rl->count[rw] >= (3 * q->nr_requests / 2)) |
2150 | goto out; | 2151 | goto out; |
2151 | 2152 | ||
2152 | rl->count[rw]++; | 2153 | rl->count[rw]++; |
2153 | rl->starved[rw] = 0; | 2154 | rl->starved[rw] = 0; |
2154 | 2155 | ||
2155 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 2156 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
2156 | if (priv) | 2157 | if (priv) |
2157 | rl->elvpriv++; | 2158 | rl->elvpriv++; |
2158 | 2159 | ||
2159 | spin_unlock_irq(q->queue_lock); | 2160 | spin_unlock_irq(q->queue_lock); |
2160 | 2161 | ||
2161 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); | 2162 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); |
2162 | if (unlikely(!rq)) { | 2163 | if (unlikely(!rq)) { |
2163 | /* | 2164 | /* |
2164 | * Allocation failed presumably due to memory. Undo anything | 2165 | * Allocation failed presumably due to memory. Undo anything |
2165 | * we might have messed up. | 2166 | * we might have messed up. |
2166 | * | 2167 | * |
2167 | * Allocating task should really be put onto the front of the | 2168 | * Allocating task should really be put onto the front of the |
2168 | * wait queue, but this is pretty rare. | 2169 | * wait queue, but this is pretty rare. |
2169 | */ | 2170 | */ |
2170 | spin_lock_irq(q->queue_lock); | 2171 | spin_lock_irq(q->queue_lock); |
2171 | freed_request(q, rw, priv); | 2172 | freed_request(q, rw, priv); |
2172 | 2173 | ||
2173 | /* | 2174 | /* |
2174 | * in the very unlikely event that allocation failed and no | 2175 | * in the very unlikely event that allocation failed and no |
2175 | * requests for this direction was pending, mark us starved | 2176 | * requests for this direction was pending, mark us starved |
2176 | * so that freeing of a request in the other direction will | 2177 | * so that freeing of a request in the other direction will |
2177 | * notice us. another possible fix would be to split the | 2178 | * notice us. another possible fix would be to split the |
2178 | * rq mempool into READ and WRITE | 2179 | * rq mempool into READ and WRITE |
2179 | */ | 2180 | */ |
2180 | rq_starved: | 2181 | rq_starved: |
2181 | if (unlikely(rl->count[rw] == 0)) | 2182 | if (unlikely(rl->count[rw] == 0)) |
2182 | rl->starved[rw] = 1; | 2183 | rl->starved[rw] = 1; |
2183 | 2184 | ||
2184 | goto out; | 2185 | goto out; |
2185 | } | 2186 | } |
2186 | 2187 | ||
2187 | /* | 2188 | /* |
2188 | * ioc may be NULL here, and ioc_batching will be false. That's | 2189 | * ioc may be NULL here, and ioc_batching will be false. That's |
2189 | * OK, if the queue is under the request limit then requests need | 2190 | * OK, if the queue is under the request limit then requests need |
2190 | * not count toward the nr_batch_requests limit. There will always | 2191 | * not count toward the nr_batch_requests limit. There will always |
2191 | * be some limit enforced by BLK_BATCH_TIME. | 2192 | * be some limit enforced by BLK_BATCH_TIME. |
2192 | */ | 2193 | */ |
2193 | if (ioc_batching(q, ioc)) | 2194 | if (ioc_batching(q, ioc)) |
2194 | ioc->nr_batch_requests--; | 2195 | ioc->nr_batch_requests--; |
2195 | 2196 | ||
2196 | rq_init(q, rq); | 2197 | rq_init(q, rq); |
2197 | rq->rl = rl; | 2198 | rq->rl = rl; |
2198 | 2199 | ||
2199 | blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); | 2200 | blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); |
2200 | out: | 2201 | out: |
2201 | return rq; | 2202 | return rq; |
2202 | } | 2203 | } |
2203 | 2204 | ||
2204 | /* | 2205 | /* |
2205 | * No available requests for this queue, unplug the device and wait for some | 2206 | * No available requests for this queue, unplug the device and wait for some |
2206 | * requests to become available. | 2207 | * requests to become available. |
2207 | * | 2208 | * |
2208 | * Called with q->queue_lock held, and returns with it unlocked. | 2209 | * Called with q->queue_lock held, and returns with it unlocked. |
2209 | */ | 2210 | */ |
2210 | static struct request *get_request_wait(request_queue_t *q, int rw, | 2211 | static struct request *get_request_wait(request_queue_t *q, int rw, |
2211 | struct bio *bio) | 2212 | struct bio *bio) |
2212 | { | 2213 | { |
2213 | struct request *rq; | 2214 | struct request *rq; |
2214 | 2215 | ||
2215 | rq = get_request(q, rw, bio, GFP_NOIO); | 2216 | rq = get_request(q, rw, bio, GFP_NOIO); |
2216 | while (!rq) { | 2217 | while (!rq) { |
2217 | DEFINE_WAIT(wait); | 2218 | DEFINE_WAIT(wait); |
2218 | struct request_list *rl = &q->rq; | 2219 | struct request_list *rl = &q->rq; |
2219 | 2220 | ||
2220 | prepare_to_wait_exclusive(&rl->wait[rw], &wait, | 2221 | prepare_to_wait_exclusive(&rl->wait[rw], &wait, |
2221 | TASK_UNINTERRUPTIBLE); | 2222 | TASK_UNINTERRUPTIBLE); |
2222 | 2223 | ||
2223 | rq = get_request(q, rw, bio, GFP_NOIO); | 2224 | rq = get_request(q, rw, bio, GFP_NOIO); |
2224 | 2225 | ||
2225 | if (!rq) { | 2226 | if (!rq) { |
2226 | struct io_context *ioc; | 2227 | struct io_context *ioc; |
2227 | 2228 | ||
2228 | blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); | 2229 | blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); |
2229 | 2230 | ||
2230 | __generic_unplug_device(q); | 2231 | __generic_unplug_device(q); |
2231 | spin_unlock_irq(q->queue_lock); | 2232 | spin_unlock_irq(q->queue_lock); |
2232 | io_schedule(); | 2233 | io_schedule(); |
2233 | 2234 | ||
2234 | /* | 2235 | /* |
2235 | * After sleeping, we become a "batching" process and | 2236 | * After sleeping, we become a "batching" process and |
2236 | * will be able to allocate at least one request, and | 2237 | * will be able to allocate at least one request, and |
2237 | * up to a big batch of them for a small period time. | 2238 | * up to a big batch of them for a small period time. |
2238 | * See ioc_batching, ioc_set_batching | 2239 | * See ioc_batching, ioc_set_batching |
2239 | */ | 2240 | */ |
2240 | ioc = current_io_context(GFP_NOIO); | 2241 | ioc = current_io_context(GFP_NOIO); |
2241 | ioc_set_batching(q, ioc); | 2242 | ioc_set_batching(q, ioc); |
2242 | 2243 | ||
2243 | spin_lock_irq(q->queue_lock); | 2244 | spin_lock_irq(q->queue_lock); |
2244 | } | 2245 | } |
2245 | finish_wait(&rl->wait[rw], &wait); | 2246 | finish_wait(&rl->wait[rw], &wait); |
2246 | } | 2247 | } |
2247 | 2248 | ||
2248 | return rq; | 2249 | return rq; |
2249 | } | 2250 | } |
2250 | 2251 | ||
2251 | struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) | 2252 | struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) |
2252 | { | 2253 | { |
2253 | struct request *rq; | 2254 | struct request *rq; |
2254 | 2255 | ||
2255 | BUG_ON(rw != READ && rw != WRITE); | 2256 | BUG_ON(rw != READ && rw != WRITE); |
2256 | 2257 | ||
2257 | spin_lock_irq(q->queue_lock); | 2258 | spin_lock_irq(q->queue_lock); |
2258 | if (gfp_mask & __GFP_WAIT) { | 2259 | if (gfp_mask & __GFP_WAIT) { |
2259 | rq = get_request_wait(q, rw, NULL); | 2260 | rq = get_request_wait(q, rw, NULL); |
2260 | } else { | 2261 | } else { |
2261 | rq = get_request(q, rw, NULL, gfp_mask); | 2262 | rq = get_request(q, rw, NULL, gfp_mask); |
2262 | if (!rq) | 2263 | if (!rq) |
2263 | spin_unlock_irq(q->queue_lock); | 2264 | spin_unlock_irq(q->queue_lock); |
2264 | } | 2265 | } |
2265 | /* q->queue_lock is unlocked at this point */ | 2266 | /* q->queue_lock is unlocked at this point */ |
2266 | 2267 | ||
2267 | return rq; | 2268 | return rq; |
2268 | } | 2269 | } |
2269 | EXPORT_SYMBOL(blk_get_request); | 2270 | EXPORT_SYMBOL(blk_get_request); |
2270 | 2271 | ||
2271 | /** | 2272 | /** |
2272 | * blk_requeue_request - put a request back on queue | 2273 | * blk_requeue_request - put a request back on queue |
2273 | * @q: request queue where request should be inserted | 2274 | * @q: request queue where request should be inserted |
2274 | * @rq: request to be inserted | 2275 | * @rq: request to be inserted |
2275 | * | 2276 | * |
2276 | * Description: | 2277 | * Description: |
2277 | * Drivers often keep queueing requests until the hardware cannot accept | 2278 | * Drivers often keep queueing requests until the hardware cannot accept |
2278 | * more, when that condition happens we need to put the request back | 2279 | * more, when that condition happens we need to put the request back |
2279 | * on the queue. Must be called with queue lock held. | 2280 | * on the queue. Must be called with queue lock held. |
2280 | */ | 2281 | */ |
2281 | void blk_requeue_request(request_queue_t *q, struct request *rq) | 2282 | void blk_requeue_request(request_queue_t *q, struct request *rq) |
2282 | { | 2283 | { |
2283 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); | 2284 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); |
2284 | 2285 | ||
2285 | if (blk_rq_tagged(rq)) | 2286 | if (blk_rq_tagged(rq)) |
2286 | blk_queue_end_tag(q, rq); | 2287 | blk_queue_end_tag(q, rq); |
2287 | 2288 | ||
2288 | elv_requeue_request(q, rq); | 2289 | elv_requeue_request(q, rq); |
2289 | } | 2290 | } |
2290 | 2291 | ||
2291 | EXPORT_SYMBOL(blk_requeue_request); | 2292 | EXPORT_SYMBOL(blk_requeue_request); |
2292 | 2293 | ||
2293 | /** | 2294 | /** |
2294 | * blk_insert_request - insert a special request in to a request queue | 2295 | * blk_insert_request - insert a special request in to a request queue |
2295 | * @q: request queue where request should be inserted | 2296 | * @q: request queue where request should be inserted |
2296 | * @rq: request to be inserted | 2297 | * @rq: request to be inserted |
2297 | * @at_head: insert request at head or tail of queue | 2298 | * @at_head: insert request at head or tail of queue |
2298 | * @data: private data | 2299 | * @data: private data |
2299 | * | 2300 | * |
2300 | * Description: | 2301 | * Description: |
2301 | * Many block devices need to execute commands asynchronously, so they don't | 2302 | * Many block devices need to execute commands asynchronously, so they don't |
2302 | * block the whole kernel from preemption during request execution. This is | 2303 | * block the whole kernel from preemption during request execution. This is |
2303 | * accomplished normally by inserting aritficial requests tagged as | 2304 | * accomplished normally by inserting aritficial requests tagged as |
2304 | * REQ_SPECIAL in to the corresponding request queue, and letting them be | 2305 | * REQ_SPECIAL in to the corresponding request queue, and letting them be |
2305 | * scheduled for actual execution by the request queue. | 2306 | * scheduled for actual execution by the request queue. |
2306 | * | 2307 | * |
2307 | * We have the option of inserting the head or the tail of the queue. | 2308 | * We have the option of inserting the head or the tail of the queue. |
2308 | * Typically we use the tail for new ioctls and so forth. We use the head | 2309 | * Typically we use the tail for new ioctls and so forth. We use the head |
2309 | * of the queue for things like a QUEUE_FULL message from a device, or a | 2310 | * of the queue for things like a QUEUE_FULL message from a device, or a |
2310 | * host that is unable to accept a particular command. | 2311 | * host that is unable to accept a particular command. |
2311 | */ | 2312 | */ |
2312 | void blk_insert_request(request_queue_t *q, struct request *rq, | 2313 | void blk_insert_request(request_queue_t *q, struct request *rq, |
2313 | int at_head, void *data) | 2314 | int at_head, void *data) |
2314 | { | 2315 | { |
2315 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 2316 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
2316 | unsigned long flags; | 2317 | unsigned long flags; |
2317 | 2318 | ||
2318 | /* | 2319 | /* |
2319 | * tell I/O scheduler that this isn't a regular read/write (ie it | 2320 | * tell I/O scheduler that this isn't a regular read/write (ie it |
2320 | * must not attempt merges on this) and that it acts as a soft | 2321 | * must not attempt merges on this) and that it acts as a soft |
2321 | * barrier | 2322 | * barrier |
2322 | */ | 2323 | */ |
2323 | rq->cmd_type = REQ_TYPE_SPECIAL; | 2324 | rq->cmd_type = REQ_TYPE_SPECIAL; |
2324 | rq->cmd_flags |= REQ_SOFTBARRIER; | 2325 | rq->cmd_flags |= REQ_SOFTBARRIER; |
2325 | 2326 | ||
2326 | rq->special = data; | 2327 | rq->special = data; |
2327 | 2328 | ||
2328 | spin_lock_irqsave(q->queue_lock, flags); | 2329 | spin_lock_irqsave(q->queue_lock, flags); |
2329 | 2330 | ||
2330 | /* | 2331 | /* |
2331 | * If command is tagged, release the tag | 2332 | * If command is tagged, release the tag |
2332 | */ | 2333 | */ |
2333 | if (blk_rq_tagged(rq)) | 2334 | if (blk_rq_tagged(rq)) |
2334 | blk_queue_end_tag(q, rq); | 2335 | blk_queue_end_tag(q, rq); |
2335 | 2336 | ||
2336 | drive_stat_acct(rq, rq->nr_sectors, 1); | 2337 | drive_stat_acct(rq, rq->nr_sectors, 1); |
2337 | __elv_add_request(q, rq, where, 0); | 2338 | __elv_add_request(q, rq, where, 0); |
2338 | 2339 | ||
2339 | if (blk_queue_plugged(q)) | 2340 | if (blk_queue_plugged(q)) |
2340 | __generic_unplug_device(q); | 2341 | __generic_unplug_device(q); |
2341 | else | 2342 | else |
2342 | q->request_fn(q); | 2343 | q->request_fn(q); |
2343 | spin_unlock_irqrestore(q->queue_lock, flags); | 2344 | spin_unlock_irqrestore(q->queue_lock, flags); |
2344 | } | 2345 | } |
2345 | 2346 | ||
2346 | EXPORT_SYMBOL(blk_insert_request); | 2347 | EXPORT_SYMBOL(blk_insert_request); |
2347 | 2348 | ||
2348 | /** | 2349 | /** |
2349 | * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage | 2350 | * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage |
2350 | * @q: request queue where request should be inserted | 2351 | * @q: request queue where request should be inserted |
2351 | * @rq: request structure to fill | 2352 | * @rq: request structure to fill |
2352 | * @ubuf: the user buffer | 2353 | * @ubuf: the user buffer |
2353 | * @len: length of user data | 2354 | * @len: length of user data |
2354 | * | 2355 | * |
2355 | * Description: | 2356 | * Description: |
2356 | * Data will be mapped directly for zero copy io, if possible. Otherwise | 2357 | * Data will be mapped directly for zero copy io, if possible. Otherwise |
2357 | * a kernel bounce buffer is used. | 2358 | * a kernel bounce buffer is used. |
2358 | * | 2359 | * |
2359 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | 2360 | * A matching blk_rq_unmap_user() must be issued at the end of io, while |
2360 | * still in process context. | 2361 | * still in process context. |
2361 | * | 2362 | * |
2362 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | 2363 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() |
2363 | * before being submitted to the device, as pages mapped may be out of | 2364 | * before being submitted to the device, as pages mapped may be out of |
2364 | * reach. It's the callers responsibility to make sure this happens. The | 2365 | * reach. It's the callers responsibility to make sure this happens. The |
2365 | * original bio must be passed back in to blk_rq_unmap_user() for proper | 2366 | * original bio must be passed back in to blk_rq_unmap_user() for proper |
2366 | * unmapping. | 2367 | * unmapping. |
2367 | */ | 2368 | */ |
2368 | int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, | 2369 | int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, |
2369 | unsigned int len) | 2370 | unsigned int len) |
2370 | { | 2371 | { |
2371 | unsigned long uaddr; | 2372 | unsigned long uaddr; |
2372 | struct bio *bio; | 2373 | struct bio *bio; |
2373 | int reading; | 2374 | int reading; |
2374 | 2375 | ||
2375 | if (len > (q->max_hw_sectors << 9)) | 2376 | if (len > (q->max_hw_sectors << 9)) |
2376 | return -EINVAL; | 2377 | return -EINVAL; |
2377 | if (!len || !ubuf) | 2378 | if (!len || !ubuf) |
2378 | return -EINVAL; | 2379 | return -EINVAL; |
2379 | 2380 | ||
2380 | reading = rq_data_dir(rq) == READ; | 2381 | reading = rq_data_dir(rq) == READ; |
2381 | 2382 | ||
2382 | /* | 2383 | /* |
2383 | * if alignment requirement is satisfied, map in user pages for | 2384 | * if alignment requirement is satisfied, map in user pages for |
2384 | * direct dma. else, set up kernel bounce buffers | 2385 | * direct dma. else, set up kernel bounce buffers |
2385 | */ | 2386 | */ |
2386 | uaddr = (unsigned long) ubuf; | 2387 | uaddr = (unsigned long) ubuf; |
2387 | if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) | 2388 | if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) |
2388 | bio = bio_map_user(q, NULL, uaddr, len, reading); | 2389 | bio = bio_map_user(q, NULL, uaddr, len, reading); |
2389 | else | 2390 | else |
2390 | bio = bio_copy_user(q, uaddr, len, reading); | 2391 | bio = bio_copy_user(q, uaddr, len, reading); |
2391 | 2392 | ||
2392 | if (!IS_ERR(bio)) { | 2393 | if (!IS_ERR(bio)) { |
2393 | rq->bio = rq->biotail = bio; | 2394 | rq->bio = rq->biotail = bio; |
2394 | blk_rq_bio_prep(q, rq, bio); | 2395 | blk_rq_bio_prep(q, rq, bio); |
2395 | 2396 | ||
2396 | rq->buffer = rq->data = NULL; | 2397 | rq->buffer = rq->data = NULL; |
2397 | rq->data_len = len; | 2398 | rq->data_len = len; |
2398 | return 0; | 2399 | return 0; |
2399 | } | 2400 | } |
2400 | 2401 | ||
2401 | /* | 2402 | /* |
2402 | * bio is the err-ptr | 2403 | * bio is the err-ptr |
2403 | */ | 2404 | */ |
2404 | return PTR_ERR(bio); | 2405 | return PTR_ERR(bio); |
2405 | } | 2406 | } |
2406 | 2407 | ||
2407 | EXPORT_SYMBOL(blk_rq_map_user); | 2408 | EXPORT_SYMBOL(blk_rq_map_user); |
2408 | 2409 | ||
2409 | /** | 2410 | /** |
2410 | * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage | 2411 | * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage |
2411 | * @q: request queue where request should be inserted | 2412 | * @q: request queue where request should be inserted |
2412 | * @rq: request to map data to | 2413 | * @rq: request to map data to |
2413 | * @iov: pointer to the iovec | 2414 | * @iov: pointer to the iovec |
2414 | * @iov_count: number of elements in the iovec | 2415 | * @iov_count: number of elements in the iovec |
2415 | * | 2416 | * |
2416 | * Description: | 2417 | * Description: |
2417 | * Data will be mapped directly for zero copy io, if possible. Otherwise | 2418 | * Data will be mapped directly for zero copy io, if possible. Otherwise |
2418 | * a kernel bounce buffer is used. | 2419 | * a kernel bounce buffer is used. |
2419 | * | 2420 | * |
2420 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | 2421 | * A matching blk_rq_unmap_user() must be issued at the end of io, while |
2421 | * still in process context. | 2422 | * still in process context. |
2422 | * | 2423 | * |
2423 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | 2424 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() |
2424 | * before being submitted to the device, as pages mapped may be out of | 2425 | * before being submitted to the device, as pages mapped may be out of |
2425 | * reach. It's the callers responsibility to make sure this happens. The | 2426 | * reach. It's the callers responsibility to make sure this happens. The |
2426 | * original bio must be passed back in to blk_rq_unmap_user() for proper | 2427 | * original bio must be passed back in to blk_rq_unmap_user() for proper |
2427 | * unmapping. | 2428 | * unmapping. |
2428 | */ | 2429 | */ |
2429 | int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, | 2430 | int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, |
2430 | struct sg_iovec *iov, int iov_count) | 2431 | struct sg_iovec *iov, int iov_count) |
2431 | { | 2432 | { |
2432 | struct bio *bio; | 2433 | struct bio *bio; |
2433 | 2434 | ||
2434 | if (!iov || iov_count <= 0) | 2435 | if (!iov || iov_count <= 0) |
2435 | return -EINVAL; | 2436 | return -EINVAL; |
2436 | 2437 | ||
2437 | /* we don't allow misaligned data like bio_map_user() does. If the | 2438 | /* we don't allow misaligned data like bio_map_user() does. If the |
2438 | * user is using sg, they're expected to know the alignment constraints | 2439 | * user is using sg, they're expected to know the alignment constraints |
2439 | * and respect them accordingly */ | 2440 | * and respect them accordingly */ |
2440 | bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); | 2441 | bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); |
2441 | if (IS_ERR(bio)) | 2442 | if (IS_ERR(bio)) |
2442 | return PTR_ERR(bio); | 2443 | return PTR_ERR(bio); |
2443 | 2444 | ||
2444 | rq->bio = rq->biotail = bio; | 2445 | rq->bio = rq->biotail = bio; |
2445 | blk_rq_bio_prep(q, rq, bio); | 2446 | blk_rq_bio_prep(q, rq, bio); |
2446 | rq->buffer = rq->data = NULL; | 2447 | rq->buffer = rq->data = NULL; |
2447 | rq->data_len = bio->bi_size; | 2448 | rq->data_len = bio->bi_size; |
2448 | return 0; | 2449 | return 0; |
2449 | } | 2450 | } |
2450 | 2451 | ||
2451 | EXPORT_SYMBOL(blk_rq_map_user_iov); | 2452 | EXPORT_SYMBOL(blk_rq_map_user_iov); |
2452 | 2453 | ||
2453 | /** | 2454 | /** |
2454 | * blk_rq_unmap_user - unmap a request with user data | 2455 | * blk_rq_unmap_user - unmap a request with user data |
2455 | * @bio: bio to be unmapped | 2456 | * @bio: bio to be unmapped |
2456 | * @ulen: length of user buffer | 2457 | * @ulen: length of user buffer |
2457 | * | 2458 | * |
2458 | * Description: | 2459 | * Description: |
2459 | * Unmap a bio previously mapped by blk_rq_map_user(). | 2460 | * Unmap a bio previously mapped by blk_rq_map_user(). |
2460 | */ | 2461 | */ |
2461 | int blk_rq_unmap_user(struct bio *bio, unsigned int ulen) | 2462 | int blk_rq_unmap_user(struct bio *bio, unsigned int ulen) |
2462 | { | 2463 | { |
2463 | int ret = 0; | 2464 | int ret = 0; |
2464 | 2465 | ||
2465 | if (bio) { | 2466 | if (bio) { |
2466 | if (bio_flagged(bio, BIO_USER_MAPPED)) | 2467 | if (bio_flagged(bio, BIO_USER_MAPPED)) |
2467 | bio_unmap_user(bio); | 2468 | bio_unmap_user(bio); |
2468 | else | 2469 | else |
2469 | ret = bio_uncopy_user(bio); | 2470 | ret = bio_uncopy_user(bio); |
2470 | } | 2471 | } |
2471 | 2472 | ||
2472 | return 0; | 2473 | return 0; |
2473 | } | 2474 | } |
2474 | 2475 | ||
2475 | EXPORT_SYMBOL(blk_rq_unmap_user); | 2476 | EXPORT_SYMBOL(blk_rq_unmap_user); |
2476 | 2477 | ||
2477 | /** | 2478 | /** |
2478 | * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage | 2479 | * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage |
2479 | * @q: request queue where request should be inserted | 2480 | * @q: request queue where request should be inserted |
2480 | * @rq: request to fill | 2481 | * @rq: request to fill |
2481 | * @kbuf: the kernel buffer | 2482 | * @kbuf: the kernel buffer |
2482 | * @len: length of user data | 2483 | * @len: length of user data |
2483 | * @gfp_mask: memory allocation flags | 2484 | * @gfp_mask: memory allocation flags |
2484 | */ | 2485 | */ |
2485 | int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, | 2486 | int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, |
2486 | unsigned int len, gfp_t gfp_mask) | 2487 | unsigned int len, gfp_t gfp_mask) |
2487 | { | 2488 | { |
2488 | struct bio *bio; | 2489 | struct bio *bio; |
2489 | 2490 | ||
2490 | if (len > (q->max_hw_sectors << 9)) | 2491 | if (len > (q->max_hw_sectors << 9)) |
2491 | return -EINVAL; | 2492 | return -EINVAL; |
2492 | if (!len || !kbuf) | 2493 | if (!len || !kbuf) |
2493 | return -EINVAL; | 2494 | return -EINVAL; |
2494 | 2495 | ||
2495 | bio = bio_map_kern(q, kbuf, len, gfp_mask); | 2496 | bio = bio_map_kern(q, kbuf, len, gfp_mask); |
2496 | if (IS_ERR(bio)) | 2497 | if (IS_ERR(bio)) |
2497 | return PTR_ERR(bio); | 2498 | return PTR_ERR(bio); |
2498 | 2499 | ||
2499 | if (rq_data_dir(rq) == WRITE) | 2500 | if (rq_data_dir(rq) == WRITE) |
2500 | bio->bi_rw |= (1 << BIO_RW); | 2501 | bio->bi_rw |= (1 << BIO_RW); |
2501 | 2502 | ||
2502 | rq->bio = rq->biotail = bio; | 2503 | rq->bio = rq->biotail = bio; |
2503 | blk_rq_bio_prep(q, rq, bio); | 2504 | blk_rq_bio_prep(q, rq, bio); |
2504 | 2505 | ||
2505 | rq->buffer = rq->data = NULL; | 2506 | rq->buffer = rq->data = NULL; |
2506 | rq->data_len = len; | 2507 | rq->data_len = len; |
2507 | return 0; | 2508 | return 0; |
2508 | } | 2509 | } |
2509 | 2510 | ||
2510 | EXPORT_SYMBOL(blk_rq_map_kern); | 2511 | EXPORT_SYMBOL(blk_rq_map_kern); |
2511 | 2512 | ||
2512 | /** | 2513 | /** |
2513 | * blk_execute_rq_nowait - insert a request into queue for execution | 2514 | * blk_execute_rq_nowait - insert a request into queue for execution |
2514 | * @q: queue to insert the request in | 2515 | * @q: queue to insert the request in |
2515 | * @bd_disk: matching gendisk | 2516 | * @bd_disk: matching gendisk |
2516 | * @rq: request to insert | 2517 | * @rq: request to insert |
2517 | * @at_head: insert request at head or tail of queue | 2518 | * @at_head: insert request at head or tail of queue |
2518 | * @done: I/O completion handler | 2519 | * @done: I/O completion handler |
2519 | * | 2520 | * |
2520 | * Description: | 2521 | * Description: |
2521 | * Insert a fully prepared request at the back of the io scheduler queue | 2522 | * Insert a fully prepared request at the back of the io scheduler queue |
2522 | * for execution. Don't wait for completion. | 2523 | * for execution. Don't wait for completion. |
2523 | */ | 2524 | */ |
2524 | void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, | 2525 | void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, |
2525 | struct request *rq, int at_head, | 2526 | struct request *rq, int at_head, |
2526 | rq_end_io_fn *done) | 2527 | rq_end_io_fn *done) |
2527 | { | 2528 | { |
2528 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 2529 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
2529 | 2530 | ||
2530 | rq->rq_disk = bd_disk; | 2531 | rq->rq_disk = bd_disk; |
2531 | rq->cmd_flags |= REQ_NOMERGE; | 2532 | rq->cmd_flags |= REQ_NOMERGE; |
2532 | rq->end_io = done; | 2533 | rq->end_io = done; |
2533 | WARN_ON(irqs_disabled()); | 2534 | WARN_ON(irqs_disabled()); |
2534 | spin_lock_irq(q->queue_lock); | 2535 | spin_lock_irq(q->queue_lock); |
2535 | __elv_add_request(q, rq, where, 1); | 2536 | __elv_add_request(q, rq, where, 1); |
2536 | __generic_unplug_device(q); | 2537 | __generic_unplug_device(q); |
2537 | spin_unlock_irq(q->queue_lock); | 2538 | spin_unlock_irq(q->queue_lock); |
2538 | } | 2539 | } |
2539 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); | 2540 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); |
2540 | 2541 | ||
2541 | /** | 2542 | /** |
2542 | * blk_execute_rq - insert a request into queue for execution | 2543 | * blk_execute_rq - insert a request into queue for execution |
2543 | * @q: queue to insert the request in | 2544 | * @q: queue to insert the request in |
2544 | * @bd_disk: matching gendisk | 2545 | * @bd_disk: matching gendisk |
2545 | * @rq: request to insert | 2546 | * @rq: request to insert |
2546 | * @at_head: insert request at head or tail of queue | 2547 | * @at_head: insert request at head or tail of queue |
2547 | * | 2548 | * |
2548 | * Description: | 2549 | * Description: |
2549 | * Insert a fully prepared request at the back of the io scheduler queue | 2550 | * Insert a fully prepared request at the back of the io scheduler queue |
2550 | * for execution and wait for completion. | 2551 | * for execution and wait for completion. |
2551 | */ | 2552 | */ |
2552 | int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, | 2553 | int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, |
2553 | struct request *rq, int at_head) | 2554 | struct request *rq, int at_head) |
2554 | { | 2555 | { |
2555 | DECLARE_COMPLETION_ONSTACK(wait); | 2556 | DECLARE_COMPLETION_ONSTACK(wait); |
2556 | char sense[SCSI_SENSE_BUFFERSIZE]; | 2557 | char sense[SCSI_SENSE_BUFFERSIZE]; |
2557 | int err = 0; | 2558 | int err = 0; |
2558 | 2559 | ||
2559 | /* | 2560 | /* |
2560 | * we need an extra reference to the request, so we can look at | 2561 | * we need an extra reference to the request, so we can look at |
2561 | * it after io completion | 2562 | * it after io completion |
2562 | */ | 2563 | */ |
2563 | rq->ref_count++; | 2564 | rq->ref_count++; |
2564 | 2565 | ||
2565 | if (!rq->sense) { | 2566 | if (!rq->sense) { |
2566 | memset(sense, 0, sizeof(sense)); | 2567 | memset(sense, 0, sizeof(sense)); |
2567 | rq->sense = sense; | 2568 | rq->sense = sense; |
2568 | rq->sense_len = 0; | 2569 | rq->sense_len = 0; |
2569 | } | 2570 | } |
2570 | 2571 | ||
2571 | rq->waiting = &wait; | 2572 | rq->waiting = &wait; |
2572 | blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); | 2573 | blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); |
2573 | wait_for_completion(&wait); | 2574 | wait_for_completion(&wait); |
2574 | rq->waiting = NULL; | 2575 | rq->waiting = NULL; |
2575 | 2576 | ||
2576 | if (rq->errors) | 2577 | if (rq->errors) |
2577 | err = -EIO; | 2578 | err = -EIO; |
2578 | 2579 | ||
2579 | return err; | 2580 | return err; |
2580 | } | 2581 | } |
2581 | 2582 | ||
2582 | EXPORT_SYMBOL(blk_execute_rq); | 2583 | EXPORT_SYMBOL(blk_execute_rq); |
2583 | 2584 | ||
2584 | /** | 2585 | /** |
2585 | * blkdev_issue_flush - queue a flush | 2586 | * blkdev_issue_flush - queue a flush |
2586 | * @bdev: blockdev to issue flush for | 2587 | * @bdev: blockdev to issue flush for |
2587 | * @error_sector: error sector | 2588 | * @error_sector: error sector |
2588 | * | 2589 | * |
2589 | * Description: | 2590 | * Description: |
2590 | * Issue a flush for the block device in question. Caller can supply | 2591 | * Issue a flush for the block device in question. Caller can supply |
2591 | * room for storing the error offset in case of a flush error, if they | 2592 | * room for storing the error offset in case of a flush error, if they |
2592 | * wish to. Caller must run wait_for_completion() on its own. | 2593 | * wish to. Caller must run wait_for_completion() on its own. |
2593 | */ | 2594 | */ |
2594 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | 2595 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) |
2595 | { | 2596 | { |
2596 | request_queue_t *q; | 2597 | request_queue_t *q; |
2597 | 2598 | ||
2598 | if (bdev->bd_disk == NULL) | 2599 | if (bdev->bd_disk == NULL) |
2599 | return -ENXIO; | 2600 | return -ENXIO; |
2600 | 2601 | ||
2601 | q = bdev_get_queue(bdev); | 2602 | q = bdev_get_queue(bdev); |
2602 | if (!q) | 2603 | if (!q) |
2603 | return -ENXIO; | 2604 | return -ENXIO; |
2604 | if (!q->issue_flush_fn) | 2605 | if (!q->issue_flush_fn) |
2605 | return -EOPNOTSUPP; | 2606 | return -EOPNOTSUPP; |
2606 | 2607 | ||
2607 | return q->issue_flush_fn(q, bdev->bd_disk, error_sector); | 2608 | return q->issue_flush_fn(q, bdev->bd_disk, error_sector); |
2608 | } | 2609 | } |
2609 | 2610 | ||
2610 | EXPORT_SYMBOL(blkdev_issue_flush); | 2611 | EXPORT_SYMBOL(blkdev_issue_flush); |
2611 | 2612 | ||
2612 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) | 2613 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) |
2613 | { | 2614 | { |
2614 | int rw = rq_data_dir(rq); | 2615 | int rw = rq_data_dir(rq); |
2615 | 2616 | ||
2616 | if (!blk_fs_request(rq) || !rq->rq_disk) | 2617 | if (!blk_fs_request(rq) || !rq->rq_disk) |
2617 | return; | 2618 | return; |
2618 | 2619 | ||
2619 | if (!new_io) { | 2620 | if (!new_io) { |
2620 | __disk_stat_inc(rq->rq_disk, merges[rw]); | 2621 | __disk_stat_inc(rq->rq_disk, merges[rw]); |
2621 | } else { | 2622 | } else { |
2622 | disk_round_stats(rq->rq_disk); | 2623 | disk_round_stats(rq->rq_disk); |
2623 | rq->rq_disk->in_flight++; | 2624 | rq->rq_disk->in_flight++; |
2624 | } | 2625 | } |
2625 | } | 2626 | } |
2626 | 2627 | ||
2627 | /* | 2628 | /* |
2628 | * add-request adds a request to the linked list. | 2629 | * add-request adds a request to the linked list. |
2629 | * queue lock is held and interrupts disabled, as we muck with the | 2630 | * queue lock is held and interrupts disabled, as we muck with the |
2630 | * request queue list. | 2631 | * request queue list. |
2631 | */ | 2632 | */ |
2632 | static inline void add_request(request_queue_t * q, struct request * req) | 2633 | static inline void add_request(request_queue_t * q, struct request * req) |
2633 | { | 2634 | { |
2634 | drive_stat_acct(req, req->nr_sectors, 1); | 2635 | drive_stat_acct(req, req->nr_sectors, 1); |
2635 | 2636 | ||
2636 | if (q->activity_fn) | 2637 | if (q->activity_fn) |
2637 | q->activity_fn(q->activity_data, rq_data_dir(req)); | 2638 | q->activity_fn(q->activity_data, rq_data_dir(req)); |
2638 | 2639 | ||
2639 | /* | 2640 | /* |
2640 | * elevator indicated where it wants this request to be | 2641 | * elevator indicated where it wants this request to be |
2641 | * inserted at elevator_merge time | 2642 | * inserted at elevator_merge time |
2642 | */ | 2643 | */ |
2643 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); | 2644 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); |
2644 | } | 2645 | } |
2645 | 2646 | ||
2646 | /* | 2647 | /* |
2647 | * disk_round_stats() - Round off the performance stats on a struct | 2648 | * disk_round_stats() - Round off the performance stats on a struct |
2648 | * disk_stats. | 2649 | * disk_stats. |
2649 | * | 2650 | * |
2650 | * The average IO queue length and utilisation statistics are maintained | 2651 | * The average IO queue length and utilisation statistics are maintained |
2651 | * by observing the current state of the queue length and the amount of | 2652 | * by observing the current state of the queue length and the amount of |
2652 | * time it has been in this state for. | 2653 | * time it has been in this state for. |
2653 | * | 2654 | * |
2654 | * Normally, that accounting is done on IO completion, but that can result | 2655 | * Normally, that accounting is done on IO completion, but that can result |
2655 | * in more than a second's worth of IO being accounted for within any one | 2656 | * in more than a second's worth of IO being accounted for within any one |
2656 | * second, leading to >100% utilisation. To deal with that, we call this | 2657 | * second, leading to >100% utilisation. To deal with that, we call this |
2657 | * function to do a round-off before returning the results when reading | 2658 | * function to do a round-off before returning the results when reading |
2658 | * /proc/diskstats. This accounts immediately for all queue usage up to | 2659 | * /proc/diskstats. This accounts immediately for all queue usage up to |
2659 | * the current jiffies and restarts the counters again. | 2660 | * the current jiffies and restarts the counters again. |
2660 | */ | 2661 | */ |
2661 | void disk_round_stats(struct gendisk *disk) | 2662 | void disk_round_stats(struct gendisk *disk) |
2662 | { | 2663 | { |
2663 | unsigned long now = jiffies; | 2664 | unsigned long now = jiffies; |
2664 | 2665 | ||
2665 | if (now == disk->stamp) | 2666 | if (now == disk->stamp) |
2666 | return; | 2667 | return; |
2667 | 2668 | ||
2668 | if (disk->in_flight) { | 2669 | if (disk->in_flight) { |
2669 | __disk_stat_add(disk, time_in_queue, | 2670 | __disk_stat_add(disk, time_in_queue, |
2670 | disk->in_flight * (now - disk->stamp)); | 2671 | disk->in_flight * (now - disk->stamp)); |
2671 | __disk_stat_add(disk, io_ticks, (now - disk->stamp)); | 2672 | __disk_stat_add(disk, io_ticks, (now - disk->stamp)); |
2672 | } | 2673 | } |
2673 | disk->stamp = now; | 2674 | disk->stamp = now; |
2674 | } | 2675 | } |
2675 | 2676 | ||
2676 | EXPORT_SYMBOL_GPL(disk_round_stats); | 2677 | EXPORT_SYMBOL_GPL(disk_round_stats); |
2677 | 2678 | ||
2678 | /* | 2679 | /* |
2679 | * queue lock must be held | 2680 | * queue lock must be held |
2680 | */ | 2681 | */ |
2681 | void __blk_put_request(request_queue_t *q, struct request *req) | 2682 | void __blk_put_request(request_queue_t *q, struct request *req) |
2682 | { | 2683 | { |
2683 | struct request_list *rl = req->rl; | 2684 | struct request_list *rl = req->rl; |
2684 | 2685 | ||
2685 | if (unlikely(!q)) | 2686 | if (unlikely(!q)) |
2686 | return; | 2687 | return; |
2687 | if (unlikely(--req->ref_count)) | 2688 | if (unlikely(--req->ref_count)) |
2688 | return; | 2689 | return; |
2689 | 2690 | ||
2690 | elv_completed_request(q, req); | 2691 | elv_completed_request(q, req); |
2691 | 2692 | ||
2692 | req->rq_status = RQ_INACTIVE; | 2693 | req->rq_status = RQ_INACTIVE; |
2693 | req->rl = NULL; | 2694 | req->rl = NULL; |
2694 | 2695 | ||
2695 | /* | 2696 | /* |
2696 | * Request may not have originated from ll_rw_blk. if not, | 2697 | * Request may not have originated from ll_rw_blk. if not, |
2697 | * it didn't come out of our reserved rq pools | 2698 | * it didn't come out of our reserved rq pools |
2698 | */ | 2699 | */ |
2699 | if (rl) { | 2700 | if (rl) { |
2700 | int rw = rq_data_dir(req); | 2701 | int rw = rq_data_dir(req); |
2701 | int priv = req->cmd_flags & REQ_ELVPRIV; | 2702 | int priv = req->cmd_flags & REQ_ELVPRIV; |
2702 | 2703 | ||
2703 | BUG_ON(!list_empty(&req->queuelist)); | 2704 | BUG_ON(!list_empty(&req->queuelist)); |
2704 | BUG_ON(!hlist_unhashed(&req->hash)); | 2705 | BUG_ON(!hlist_unhashed(&req->hash)); |
2705 | 2706 | ||
2706 | blk_free_request(q, req); | 2707 | blk_free_request(q, req); |
2707 | freed_request(q, rw, priv); | 2708 | freed_request(q, rw, priv); |
2708 | } | 2709 | } |
2709 | } | 2710 | } |
2710 | 2711 | ||
2711 | EXPORT_SYMBOL_GPL(__blk_put_request); | 2712 | EXPORT_SYMBOL_GPL(__blk_put_request); |
2712 | 2713 | ||
2713 | void blk_put_request(struct request *req) | 2714 | void blk_put_request(struct request *req) |
2714 | { | 2715 | { |
2715 | unsigned long flags; | 2716 | unsigned long flags; |
2716 | request_queue_t *q = req->q; | 2717 | request_queue_t *q = req->q; |
2717 | 2718 | ||
2718 | /* | 2719 | /* |
2719 | * Gee, IDE calls in w/ NULL q. Fix IDE and remove the | 2720 | * Gee, IDE calls in w/ NULL q. Fix IDE and remove the |
2720 | * following if (q) test. | 2721 | * following if (q) test. |
2721 | */ | 2722 | */ |
2722 | if (q) { | 2723 | if (q) { |
2723 | spin_lock_irqsave(q->queue_lock, flags); | 2724 | spin_lock_irqsave(q->queue_lock, flags); |
2724 | __blk_put_request(q, req); | 2725 | __blk_put_request(q, req); |
2725 | spin_unlock_irqrestore(q->queue_lock, flags); | 2726 | spin_unlock_irqrestore(q->queue_lock, flags); |
2726 | } | 2727 | } |
2727 | } | 2728 | } |
2728 | 2729 | ||
2729 | EXPORT_SYMBOL(blk_put_request); | 2730 | EXPORT_SYMBOL(blk_put_request); |
2730 | 2731 | ||
2731 | /** | 2732 | /** |
2732 | * blk_end_sync_rq - executes a completion event on a request | 2733 | * blk_end_sync_rq - executes a completion event on a request |
2733 | * @rq: request to complete | 2734 | * @rq: request to complete |
2734 | * @error: end io status of the request | 2735 | * @error: end io status of the request |
2735 | */ | 2736 | */ |
2736 | void blk_end_sync_rq(struct request *rq, int error) | 2737 | void blk_end_sync_rq(struct request *rq, int error) |
2737 | { | 2738 | { |
2738 | struct completion *waiting = rq->waiting; | 2739 | struct completion *waiting = rq->waiting; |
2739 | 2740 | ||
2740 | rq->waiting = NULL; | 2741 | rq->waiting = NULL; |
2741 | __blk_put_request(rq->q, rq); | 2742 | __blk_put_request(rq->q, rq); |
2742 | 2743 | ||
2743 | /* | 2744 | /* |
2744 | * complete last, if this is a stack request the process (and thus | 2745 | * complete last, if this is a stack request the process (and thus |
2745 | * the rq pointer) could be invalid right after this complete() | 2746 | * the rq pointer) could be invalid right after this complete() |
2746 | */ | 2747 | */ |
2747 | complete(waiting); | 2748 | complete(waiting); |
2748 | } | 2749 | } |
2749 | EXPORT_SYMBOL(blk_end_sync_rq); | 2750 | EXPORT_SYMBOL(blk_end_sync_rq); |
2750 | 2751 | ||
2751 | /** | 2752 | /** |
2752 | * blk_congestion_wait - wait for a queue to become uncongested | 2753 | * blk_congestion_wait - wait for a queue to become uncongested |
2753 | * @rw: READ or WRITE | 2754 | * @rw: READ or WRITE |
2754 | * @timeout: timeout in jiffies | 2755 | * @timeout: timeout in jiffies |
2755 | * | 2756 | * |
2756 | * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion. | 2757 | * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion. |
2757 | * If no queues are congested then just wait for the next request to be | 2758 | * If no queues are congested then just wait for the next request to be |
2758 | * returned. | 2759 | * returned. |
2759 | */ | 2760 | */ |
2760 | long blk_congestion_wait(int rw, long timeout) | 2761 | long blk_congestion_wait(int rw, long timeout) |
2761 | { | 2762 | { |
2762 | long ret; | 2763 | long ret; |
2763 | DEFINE_WAIT(wait); | 2764 | DEFINE_WAIT(wait); |
2764 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | 2765 | wait_queue_head_t *wqh = &congestion_wqh[rw]; |
2765 | 2766 | ||
2766 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); | 2767 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); |
2767 | ret = io_schedule_timeout(timeout); | 2768 | ret = io_schedule_timeout(timeout); |
2768 | finish_wait(wqh, &wait); | 2769 | finish_wait(wqh, &wait); |
2769 | return ret; | 2770 | return ret; |
2770 | } | 2771 | } |
2771 | 2772 | ||
2772 | EXPORT_SYMBOL(blk_congestion_wait); | 2773 | EXPORT_SYMBOL(blk_congestion_wait); |
2773 | 2774 | ||
2774 | /** | 2775 | /** |
2775 | * blk_congestion_end - wake up sleepers on a congestion queue | 2776 | * blk_congestion_end - wake up sleepers on a congestion queue |
2776 | * @rw: READ or WRITE | 2777 | * @rw: READ or WRITE |
2777 | */ | 2778 | */ |
2778 | void blk_congestion_end(int rw) | 2779 | void blk_congestion_end(int rw) |
2779 | { | 2780 | { |
2780 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | 2781 | wait_queue_head_t *wqh = &congestion_wqh[rw]; |
2781 | 2782 | ||
2782 | if (waitqueue_active(wqh)) | 2783 | if (waitqueue_active(wqh)) |
2783 | wake_up(wqh); | 2784 | wake_up(wqh); |
2784 | } | 2785 | } |
2785 | 2786 | ||
2786 | /* | 2787 | /* |
2787 | * Has to be called with the request spinlock acquired | 2788 | * Has to be called with the request spinlock acquired |
2788 | */ | 2789 | */ |
2789 | static int attempt_merge(request_queue_t *q, struct request *req, | 2790 | static int attempt_merge(request_queue_t *q, struct request *req, |
2790 | struct request *next) | 2791 | struct request *next) |
2791 | { | 2792 | { |
2792 | if (!rq_mergeable(req) || !rq_mergeable(next)) | 2793 | if (!rq_mergeable(req) || !rq_mergeable(next)) |
2793 | return 0; | 2794 | return 0; |
2794 | 2795 | ||
2795 | /* | 2796 | /* |
2796 | * not contiguous | 2797 | * not contiguous |
2797 | */ | 2798 | */ |
2798 | if (req->sector + req->nr_sectors != next->sector) | 2799 | if (req->sector + req->nr_sectors != next->sector) |
2799 | return 0; | 2800 | return 0; |
2800 | 2801 | ||
2801 | if (rq_data_dir(req) != rq_data_dir(next) | 2802 | if (rq_data_dir(req) != rq_data_dir(next) |
2802 | || req->rq_disk != next->rq_disk | 2803 | || req->rq_disk != next->rq_disk |
2803 | || next->waiting || next->special) | 2804 | || next->waiting || next->special) |
2804 | return 0; | 2805 | return 0; |
2805 | 2806 | ||
2806 | /* | 2807 | /* |
2807 | * If we are allowed to merge, then append bio list | 2808 | * If we are allowed to merge, then append bio list |
2808 | * from next to rq and release next. merge_requests_fn | 2809 | * from next to rq and release next. merge_requests_fn |
2809 | * will have updated segment counts, update sector | 2810 | * will have updated segment counts, update sector |
2810 | * counts here. | 2811 | * counts here. |
2811 | */ | 2812 | */ |
2812 | if (!q->merge_requests_fn(q, req, next)) | 2813 | if (!q->merge_requests_fn(q, req, next)) |
2813 | return 0; | 2814 | return 0; |
2814 | 2815 | ||
2815 | /* | 2816 | /* |
2816 | * At this point we have either done a back merge | 2817 | * At this point we have either done a back merge |
2817 | * or front merge. We need the smaller start_time of | 2818 | * or front merge. We need the smaller start_time of |
2818 | * the merged requests to be the current request | 2819 | * the merged requests to be the current request |
2819 | * for accounting purposes. | 2820 | * for accounting purposes. |
2820 | */ | 2821 | */ |
2821 | if (time_after(req->start_time, next->start_time)) | 2822 | if (time_after(req->start_time, next->start_time)) |
2822 | req->start_time = next->start_time; | 2823 | req->start_time = next->start_time; |
2823 | 2824 | ||
2824 | req->biotail->bi_next = next->bio; | 2825 | req->biotail->bi_next = next->bio; |
2825 | req->biotail = next->biotail; | 2826 | req->biotail = next->biotail; |
2826 | 2827 | ||
2827 | req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; | 2828 | req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; |
2828 | 2829 | ||
2829 | elv_merge_requests(q, req, next); | 2830 | elv_merge_requests(q, req, next); |
2830 | 2831 | ||
2831 | if (req->rq_disk) { | 2832 | if (req->rq_disk) { |
2832 | disk_round_stats(req->rq_disk); | 2833 | disk_round_stats(req->rq_disk); |
2833 | req->rq_disk->in_flight--; | 2834 | req->rq_disk->in_flight--; |
2834 | } | 2835 | } |
2835 | 2836 | ||
2836 | req->ioprio = ioprio_best(req->ioprio, next->ioprio); | 2837 | req->ioprio = ioprio_best(req->ioprio, next->ioprio); |
2837 | 2838 | ||
2838 | __blk_put_request(q, next); | 2839 | __blk_put_request(q, next); |
2839 | return 1; | 2840 | return 1; |
2840 | } | 2841 | } |
2841 | 2842 | ||
2842 | static inline int attempt_back_merge(request_queue_t *q, struct request *rq) | 2843 | static inline int attempt_back_merge(request_queue_t *q, struct request *rq) |
2843 | { | 2844 | { |
2844 | struct request *next = elv_latter_request(q, rq); | 2845 | struct request *next = elv_latter_request(q, rq); |
2845 | 2846 | ||
2846 | if (next) | 2847 | if (next) |
2847 | return attempt_merge(q, rq, next); | 2848 | return attempt_merge(q, rq, next); |
2848 | 2849 | ||
2849 | return 0; | 2850 | return 0; |
2850 | } | 2851 | } |
2851 | 2852 | ||
2852 | static inline int attempt_front_merge(request_queue_t *q, struct request *rq) | 2853 | static inline int attempt_front_merge(request_queue_t *q, struct request *rq) |
2853 | { | 2854 | { |
2854 | struct request *prev = elv_former_request(q, rq); | 2855 | struct request *prev = elv_former_request(q, rq); |
2855 | 2856 | ||
2856 | if (prev) | 2857 | if (prev) |
2857 | return attempt_merge(q, prev, rq); | 2858 | return attempt_merge(q, prev, rq); |
2858 | 2859 | ||
2859 | return 0; | 2860 | return 0; |
2860 | } | 2861 | } |
2861 | 2862 | ||
2862 | static void init_request_from_bio(struct request *req, struct bio *bio) | 2863 | static void init_request_from_bio(struct request *req, struct bio *bio) |
2863 | { | 2864 | { |
2864 | req->cmd_type = REQ_TYPE_FS; | 2865 | req->cmd_type = REQ_TYPE_FS; |
2865 | 2866 | ||
2866 | /* | 2867 | /* |
2867 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) | 2868 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) |
2868 | */ | 2869 | */ |
2869 | if (bio_rw_ahead(bio) || bio_failfast(bio)) | 2870 | if (bio_rw_ahead(bio) || bio_failfast(bio)) |
2870 | req->cmd_flags |= REQ_FAILFAST; | 2871 | req->cmd_flags |= REQ_FAILFAST; |
2871 | 2872 | ||
2872 | /* | 2873 | /* |
2873 | * REQ_BARRIER implies no merging, but lets make it explicit | 2874 | * REQ_BARRIER implies no merging, but lets make it explicit |
2874 | */ | 2875 | */ |
2875 | if (unlikely(bio_barrier(bio))) | 2876 | if (unlikely(bio_barrier(bio))) |
2876 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | 2877 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); |
2877 | 2878 | ||
2878 | if (bio_sync(bio)) | 2879 | if (bio_sync(bio)) |
2879 | req->cmd_flags |= REQ_RW_SYNC; | 2880 | req->cmd_flags |= REQ_RW_SYNC; |
2880 | 2881 | ||
2881 | req->errors = 0; | 2882 | req->errors = 0; |
2882 | req->hard_sector = req->sector = bio->bi_sector; | 2883 | req->hard_sector = req->sector = bio->bi_sector; |
2883 | req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); | 2884 | req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); |
2884 | req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio); | 2885 | req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio); |
2885 | req->nr_phys_segments = bio_phys_segments(req->q, bio); | 2886 | req->nr_phys_segments = bio_phys_segments(req->q, bio); |
2886 | req->nr_hw_segments = bio_hw_segments(req->q, bio); | 2887 | req->nr_hw_segments = bio_hw_segments(req->q, bio); |
2887 | req->buffer = bio_data(bio); /* see ->buffer comment above */ | 2888 | req->buffer = bio_data(bio); /* see ->buffer comment above */ |
2888 | req->waiting = NULL; | 2889 | req->waiting = NULL; |
2889 | req->bio = req->biotail = bio; | 2890 | req->bio = req->biotail = bio; |
2890 | req->ioprio = bio_prio(bio); | 2891 | req->ioprio = bio_prio(bio); |
2891 | req->rq_disk = bio->bi_bdev->bd_disk; | 2892 | req->rq_disk = bio->bi_bdev->bd_disk; |
2892 | req->start_time = jiffies; | 2893 | req->start_time = jiffies; |
2893 | } | 2894 | } |
2894 | 2895 | ||
2895 | static int __make_request(request_queue_t *q, struct bio *bio) | 2896 | static int __make_request(request_queue_t *q, struct bio *bio) |
2896 | { | 2897 | { |
2897 | struct request *req; | 2898 | struct request *req; |
2898 | int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; | 2899 | int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; |
2899 | unsigned short prio; | 2900 | unsigned short prio; |
2900 | sector_t sector; | 2901 | sector_t sector; |
2901 | 2902 | ||
2902 | sector = bio->bi_sector; | 2903 | sector = bio->bi_sector; |
2903 | nr_sectors = bio_sectors(bio); | 2904 | nr_sectors = bio_sectors(bio); |
2904 | cur_nr_sectors = bio_cur_sectors(bio); | 2905 | cur_nr_sectors = bio_cur_sectors(bio); |
2905 | prio = bio_prio(bio); | 2906 | prio = bio_prio(bio); |
2906 | 2907 | ||
2907 | rw = bio_data_dir(bio); | 2908 | rw = bio_data_dir(bio); |
2908 | sync = bio_sync(bio); | 2909 | sync = bio_sync(bio); |
2909 | 2910 | ||
2910 | /* | 2911 | /* |
2911 | * low level driver can indicate that it wants pages above a | 2912 | * low level driver can indicate that it wants pages above a |
2912 | * certain limit bounced to low memory (ie for highmem, or even | 2913 | * certain limit bounced to low memory (ie for highmem, or even |
2913 | * ISA dma in theory) | 2914 | * ISA dma in theory) |
2914 | */ | 2915 | */ |
2915 | blk_queue_bounce(q, &bio); | 2916 | blk_queue_bounce(q, &bio); |
2916 | 2917 | ||
2917 | spin_lock_prefetch(q->queue_lock); | 2918 | spin_lock_prefetch(q->queue_lock); |
2918 | 2919 | ||
2919 | barrier = bio_barrier(bio); | 2920 | barrier = bio_barrier(bio); |
2920 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { | 2921 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { |
2921 | err = -EOPNOTSUPP; | 2922 | err = -EOPNOTSUPP; |
2922 | goto end_io; | 2923 | goto end_io; |
2923 | } | 2924 | } |
2924 | 2925 | ||
2925 | spin_lock_irq(q->queue_lock); | 2926 | spin_lock_irq(q->queue_lock); |
2926 | 2927 | ||
2927 | if (unlikely(barrier) || elv_queue_empty(q)) | 2928 | if (unlikely(barrier) || elv_queue_empty(q)) |
2928 | goto get_rq; | 2929 | goto get_rq; |
2929 | 2930 | ||
2930 | el_ret = elv_merge(q, &req, bio); | 2931 | el_ret = elv_merge(q, &req, bio); |
2931 | switch (el_ret) { | 2932 | switch (el_ret) { |
2932 | case ELEVATOR_BACK_MERGE: | 2933 | case ELEVATOR_BACK_MERGE: |
2933 | BUG_ON(!rq_mergeable(req)); | 2934 | BUG_ON(!rq_mergeable(req)); |
2934 | 2935 | ||
2935 | if (!q->back_merge_fn(q, req, bio)) | 2936 | if (!q->back_merge_fn(q, req, bio)) |
2936 | break; | 2937 | break; |
2937 | 2938 | ||
2938 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); | 2939 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); |
2939 | 2940 | ||
2940 | req->biotail->bi_next = bio; | 2941 | req->biotail->bi_next = bio; |
2941 | req->biotail = bio; | 2942 | req->biotail = bio; |
2942 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; | 2943 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; |
2943 | req->ioprio = ioprio_best(req->ioprio, prio); | 2944 | req->ioprio = ioprio_best(req->ioprio, prio); |
2944 | drive_stat_acct(req, nr_sectors, 0); | 2945 | drive_stat_acct(req, nr_sectors, 0); |
2945 | if (!attempt_back_merge(q, req)) | 2946 | if (!attempt_back_merge(q, req)) |
2946 | elv_merged_request(q, req); | 2947 | elv_merged_request(q, req, el_ret); |
2947 | goto out; | 2948 | goto out; |
2948 | 2949 | ||
2949 | case ELEVATOR_FRONT_MERGE: | 2950 | case ELEVATOR_FRONT_MERGE: |
2950 | BUG_ON(!rq_mergeable(req)); | 2951 | BUG_ON(!rq_mergeable(req)); |
2951 | 2952 | ||
2952 | if (!q->front_merge_fn(q, req, bio)) | 2953 | if (!q->front_merge_fn(q, req, bio)) |
2953 | break; | 2954 | break; |
2954 | 2955 | ||
2955 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); | 2956 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); |
2956 | 2957 | ||
2957 | bio->bi_next = req->bio; | 2958 | bio->bi_next = req->bio; |
2958 | req->bio = bio; | 2959 | req->bio = bio; |
2959 | 2960 | ||
2960 | /* | 2961 | /* |
2961 | * may not be valid. if the low level driver said | 2962 | * may not be valid. if the low level driver said |
2962 | * it didn't need a bounce buffer then it better | 2963 | * it didn't need a bounce buffer then it better |
2963 | * not touch req->buffer either... | 2964 | * not touch req->buffer either... |
2964 | */ | 2965 | */ |
2965 | req->buffer = bio_data(bio); | 2966 | req->buffer = bio_data(bio); |
2966 | req->current_nr_sectors = cur_nr_sectors; | 2967 | req->current_nr_sectors = cur_nr_sectors; |
2967 | req->hard_cur_sectors = cur_nr_sectors; | 2968 | req->hard_cur_sectors = cur_nr_sectors; |
2968 | req->sector = req->hard_sector = sector; | 2969 | req->sector = req->hard_sector = sector; |
2969 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; | 2970 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; |
2970 | req->ioprio = ioprio_best(req->ioprio, prio); | 2971 | req->ioprio = ioprio_best(req->ioprio, prio); |
2971 | drive_stat_acct(req, nr_sectors, 0); | 2972 | drive_stat_acct(req, nr_sectors, 0); |
2972 | if (!attempt_front_merge(q, req)) | 2973 | if (!attempt_front_merge(q, req)) |
2973 | elv_merged_request(q, req); | 2974 | elv_merged_request(q, req, el_ret); |
2974 | goto out; | 2975 | goto out; |
2975 | 2976 | ||
2976 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ | 2977 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ |
2977 | default: | 2978 | default: |
2978 | ; | 2979 | ; |
2979 | } | 2980 | } |
2980 | 2981 | ||
2981 | get_rq: | 2982 | get_rq: |
2982 | /* | 2983 | /* |
2983 | * Grab a free request. This is might sleep but can not fail. | 2984 | * Grab a free request. This is might sleep but can not fail. |
2984 | * Returns with the queue unlocked. | 2985 | * Returns with the queue unlocked. |
2985 | */ | 2986 | */ |
2986 | req = get_request_wait(q, rw, bio); | 2987 | req = get_request_wait(q, rw, bio); |
2987 | 2988 | ||
2988 | /* | 2989 | /* |
2989 | * After dropping the lock and possibly sleeping here, our request | 2990 | * After dropping the lock and possibly sleeping here, our request |
2990 | * may now be mergeable after it had proven unmergeable (above). | 2991 | * may now be mergeable after it had proven unmergeable (above). |
2991 | * We don't worry about that case for efficiency. It won't happen | 2992 | * We don't worry about that case for efficiency. It won't happen |
2992 | * often, and the elevators are able to handle it. | 2993 | * often, and the elevators are able to handle it. |
2993 | */ | 2994 | */ |
2994 | init_request_from_bio(req, bio); | 2995 | init_request_from_bio(req, bio); |
2995 | 2996 | ||
2996 | spin_lock_irq(q->queue_lock); | 2997 | spin_lock_irq(q->queue_lock); |
2997 | if (elv_queue_empty(q)) | 2998 | if (elv_queue_empty(q)) |
2998 | blk_plug_device(q); | 2999 | blk_plug_device(q); |
2999 | add_request(q, req); | 3000 | add_request(q, req); |
3000 | out: | 3001 | out: |
3001 | if (sync) | 3002 | if (sync) |
3002 | __generic_unplug_device(q); | 3003 | __generic_unplug_device(q); |
3003 | 3004 | ||
3004 | spin_unlock_irq(q->queue_lock); | 3005 | spin_unlock_irq(q->queue_lock); |
3005 | return 0; | 3006 | return 0; |
3006 | 3007 | ||
3007 | end_io: | 3008 | end_io: |
3008 | bio_endio(bio, nr_sectors << 9, err); | 3009 | bio_endio(bio, nr_sectors << 9, err); |
3009 | return 0; | 3010 | return 0; |
3010 | } | 3011 | } |
3011 | 3012 | ||
3012 | /* | 3013 | /* |
3013 | * If bio->bi_dev is a partition, remap the location | 3014 | * If bio->bi_dev is a partition, remap the location |
3014 | */ | 3015 | */ |
3015 | static inline void blk_partition_remap(struct bio *bio) | 3016 | static inline void blk_partition_remap(struct bio *bio) |
3016 | { | 3017 | { |
3017 | struct block_device *bdev = bio->bi_bdev; | 3018 | struct block_device *bdev = bio->bi_bdev; |
3018 | 3019 | ||
3019 | if (bdev != bdev->bd_contains) { | 3020 | if (bdev != bdev->bd_contains) { |
3020 | struct hd_struct *p = bdev->bd_part; | 3021 | struct hd_struct *p = bdev->bd_part; |
3021 | const int rw = bio_data_dir(bio); | 3022 | const int rw = bio_data_dir(bio); |
3022 | 3023 | ||
3023 | p->sectors[rw] += bio_sectors(bio); | 3024 | p->sectors[rw] += bio_sectors(bio); |
3024 | p->ios[rw]++; | 3025 | p->ios[rw]++; |
3025 | 3026 | ||
3026 | bio->bi_sector += p->start_sect; | 3027 | bio->bi_sector += p->start_sect; |
3027 | bio->bi_bdev = bdev->bd_contains; | 3028 | bio->bi_bdev = bdev->bd_contains; |
3028 | } | 3029 | } |
3029 | } | 3030 | } |
3030 | 3031 | ||
3031 | static void handle_bad_sector(struct bio *bio) | 3032 | static void handle_bad_sector(struct bio *bio) |
3032 | { | 3033 | { |
3033 | char b[BDEVNAME_SIZE]; | 3034 | char b[BDEVNAME_SIZE]; |
3034 | 3035 | ||
3035 | printk(KERN_INFO "attempt to access beyond end of device\n"); | 3036 | printk(KERN_INFO "attempt to access beyond end of device\n"); |
3036 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", | 3037 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", |
3037 | bdevname(bio->bi_bdev, b), | 3038 | bdevname(bio->bi_bdev, b), |
3038 | bio->bi_rw, | 3039 | bio->bi_rw, |
3039 | (unsigned long long)bio->bi_sector + bio_sectors(bio), | 3040 | (unsigned long long)bio->bi_sector + bio_sectors(bio), |
3040 | (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); | 3041 | (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); |
3041 | 3042 | ||
3042 | set_bit(BIO_EOF, &bio->bi_flags); | 3043 | set_bit(BIO_EOF, &bio->bi_flags); |
3043 | } | 3044 | } |
3044 | 3045 | ||
3045 | /** | 3046 | /** |
3046 | * generic_make_request: hand a buffer to its device driver for I/O | 3047 | * generic_make_request: hand a buffer to its device driver for I/O |
3047 | * @bio: The bio describing the location in memory and on the device. | 3048 | * @bio: The bio describing the location in memory and on the device. |
3048 | * | 3049 | * |
3049 | * generic_make_request() is used to make I/O requests of block | 3050 | * generic_make_request() is used to make I/O requests of block |
3050 | * devices. It is passed a &struct bio, which describes the I/O that needs | 3051 | * devices. It is passed a &struct bio, which describes the I/O that needs |
3051 | * to be done. | 3052 | * to be done. |
3052 | * | 3053 | * |
3053 | * generic_make_request() does not return any status. The | 3054 | * generic_make_request() does not return any status. The |
3054 | * success/failure status of the request, along with notification of | 3055 | * success/failure status of the request, along with notification of |
3055 | * completion, is delivered asynchronously through the bio->bi_end_io | 3056 | * completion, is delivered asynchronously through the bio->bi_end_io |
3056 | * function described (one day) else where. | 3057 | * function described (one day) else where. |
3057 | * | 3058 | * |
3058 | * The caller of generic_make_request must make sure that bi_io_vec | 3059 | * The caller of generic_make_request must make sure that bi_io_vec |
3059 | * are set to describe the memory buffer, and that bi_dev and bi_sector are | 3060 | * are set to describe the memory buffer, and that bi_dev and bi_sector are |
3060 | * set to describe the device address, and the | 3061 | * set to describe the device address, and the |
3061 | * bi_end_io and optionally bi_private are set to describe how | 3062 | * bi_end_io and optionally bi_private are set to describe how |
3062 | * completion notification should be signaled. | 3063 | * completion notification should be signaled. |
3063 | * | 3064 | * |
3064 | * generic_make_request and the drivers it calls may use bi_next if this | 3065 | * generic_make_request and the drivers it calls may use bi_next if this |
3065 | * bio happens to be merged with someone else, and may change bi_dev and | 3066 | * bio happens to be merged with someone else, and may change bi_dev and |
3066 | * bi_sector for remaps as it sees fit. So the values of these fields | 3067 | * bi_sector for remaps as it sees fit. So the values of these fields |
3067 | * should NOT be depended on after the call to generic_make_request. | 3068 | * should NOT be depended on after the call to generic_make_request. |
3068 | */ | 3069 | */ |
3069 | void generic_make_request(struct bio *bio) | 3070 | void generic_make_request(struct bio *bio) |
3070 | { | 3071 | { |
3071 | request_queue_t *q; | 3072 | request_queue_t *q; |
3072 | sector_t maxsector; | 3073 | sector_t maxsector; |
3073 | int ret, nr_sectors = bio_sectors(bio); | 3074 | int ret, nr_sectors = bio_sectors(bio); |
3074 | dev_t old_dev; | 3075 | dev_t old_dev; |
3075 | 3076 | ||
3076 | might_sleep(); | 3077 | might_sleep(); |
3077 | /* Test device or partition size, when known. */ | 3078 | /* Test device or partition size, when known. */ |
3078 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; | 3079 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; |
3079 | if (maxsector) { | 3080 | if (maxsector) { |
3080 | sector_t sector = bio->bi_sector; | 3081 | sector_t sector = bio->bi_sector; |
3081 | 3082 | ||
3082 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { | 3083 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { |
3083 | /* | 3084 | /* |
3084 | * This may well happen - the kernel calls bread() | 3085 | * This may well happen - the kernel calls bread() |
3085 | * without checking the size of the device, e.g., when | 3086 | * without checking the size of the device, e.g., when |
3086 | * mounting a device. | 3087 | * mounting a device. |
3087 | */ | 3088 | */ |
3088 | handle_bad_sector(bio); | 3089 | handle_bad_sector(bio); |
3089 | goto end_io; | 3090 | goto end_io; |
3090 | } | 3091 | } |
3091 | } | 3092 | } |
3092 | 3093 | ||
3093 | /* | 3094 | /* |
3094 | * Resolve the mapping until finished. (drivers are | 3095 | * Resolve the mapping until finished. (drivers are |
3095 | * still free to implement/resolve their own stacking | 3096 | * still free to implement/resolve their own stacking |
3096 | * by explicitly returning 0) | 3097 | * by explicitly returning 0) |
3097 | * | 3098 | * |
3098 | * NOTE: we don't repeat the blk_size check for each new device. | 3099 | * NOTE: we don't repeat the blk_size check for each new device. |
3099 | * Stacking drivers are expected to know what they are doing. | 3100 | * Stacking drivers are expected to know what they are doing. |
3100 | */ | 3101 | */ |
3101 | maxsector = -1; | 3102 | maxsector = -1; |
3102 | old_dev = 0; | 3103 | old_dev = 0; |
3103 | do { | 3104 | do { |
3104 | char b[BDEVNAME_SIZE]; | 3105 | char b[BDEVNAME_SIZE]; |
3105 | 3106 | ||
3106 | q = bdev_get_queue(bio->bi_bdev); | 3107 | q = bdev_get_queue(bio->bi_bdev); |
3107 | if (!q) { | 3108 | if (!q) { |
3108 | printk(KERN_ERR | 3109 | printk(KERN_ERR |
3109 | "generic_make_request: Trying to access " | 3110 | "generic_make_request: Trying to access " |
3110 | "nonexistent block-device %s (%Lu)\n", | 3111 | "nonexistent block-device %s (%Lu)\n", |
3111 | bdevname(bio->bi_bdev, b), | 3112 | bdevname(bio->bi_bdev, b), |
3112 | (long long) bio->bi_sector); | 3113 | (long long) bio->bi_sector); |
3113 | end_io: | 3114 | end_io: |
3114 | bio_endio(bio, bio->bi_size, -EIO); | 3115 | bio_endio(bio, bio->bi_size, -EIO); |
3115 | break; | 3116 | break; |
3116 | } | 3117 | } |
3117 | 3118 | ||
3118 | if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) { | 3119 | if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) { |
3119 | printk("bio too big device %s (%u > %u)\n", | 3120 | printk("bio too big device %s (%u > %u)\n", |
3120 | bdevname(bio->bi_bdev, b), | 3121 | bdevname(bio->bi_bdev, b), |
3121 | bio_sectors(bio), | 3122 | bio_sectors(bio), |
3122 | q->max_hw_sectors); | 3123 | q->max_hw_sectors); |
3123 | goto end_io; | 3124 | goto end_io; |
3124 | } | 3125 | } |
3125 | 3126 | ||
3126 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 3127 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) |
3127 | goto end_io; | 3128 | goto end_io; |
3128 | 3129 | ||
3129 | /* | 3130 | /* |
3130 | * If this device has partitions, remap block n | 3131 | * If this device has partitions, remap block n |
3131 | * of partition p to block n+start(p) of the disk. | 3132 | * of partition p to block n+start(p) of the disk. |
3132 | */ | 3133 | */ |
3133 | blk_partition_remap(bio); | 3134 | blk_partition_remap(bio); |
3134 | 3135 | ||
3135 | if (maxsector != -1) | 3136 | if (maxsector != -1) |
3136 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, | 3137 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, |
3137 | maxsector); | 3138 | maxsector); |
3138 | 3139 | ||
3139 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); | 3140 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); |
3140 | 3141 | ||
3141 | maxsector = bio->bi_sector; | 3142 | maxsector = bio->bi_sector; |
3142 | old_dev = bio->bi_bdev->bd_dev; | 3143 | old_dev = bio->bi_bdev->bd_dev; |
3143 | 3144 | ||
3144 | ret = q->make_request_fn(q, bio); | 3145 | ret = q->make_request_fn(q, bio); |
3145 | } while (ret); | 3146 | } while (ret); |
3146 | } | 3147 | } |
3147 | 3148 | ||
3148 | EXPORT_SYMBOL(generic_make_request); | 3149 | EXPORT_SYMBOL(generic_make_request); |
3149 | 3150 | ||
3150 | /** | 3151 | /** |
3151 | * submit_bio: submit a bio to the block device layer for I/O | 3152 | * submit_bio: submit a bio to the block device layer for I/O |
3152 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | 3153 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) |
3153 | * @bio: The &struct bio which describes the I/O | 3154 | * @bio: The &struct bio which describes the I/O |
3154 | * | 3155 | * |
3155 | * submit_bio() is very similar in purpose to generic_make_request(), and | 3156 | * submit_bio() is very similar in purpose to generic_make_request(), and |
3156 | * uses that function to do most of the work. Both are fairly rough | 3157 | * uses that function to do most of the work. Both are fairly rough |
3157 | * interfaces, @bio must be presetup and ready for I/O. | 3158 | * interfaces, @bio must be presetup and ready for I/O. |
3158 | * | 3159 | * |
3159 | */ | 3160 | */ |
3160 | void submit_bio(int rw, struct bio *bio) | 3161 | void submit_bio(int rw, struct bio *bio) |
3161 | { | 3162 | { |
3162 | int count = bio_sectors(bio); | 3163 | int count = bio_sectors(bio); |
3163 | 3164 | ||
3164 | BIO_BUG_ON(!bio->bi_size); | 3165 | BIO_BUG_ON(!bio->bi_size); |
3165 | BIO_BUG_ON(!bio->bi_io_vec); | 3166 | BIO_BUG_ON(!bio->bi_io_vec); |
3166 | bio->bi_rw |= rw; | 3167 | bio->bi_rw |= rw; |
3167 | if (rw & WRITE) | 3168 | if (rw & WRITE) |
3168 | count_vm_events(PGPGOUT, count); | 3169 | count_vm_events(PGPGOUT, count); |
3169 | else | 3170 | else |
3170 | count_vm_events(PGPGIN, count); | 3171 | count_vm_events(PGPGIN, count); |
3171 | 3172 | ||
3172 | if (unlikely(block_dump)) { | 3173 | if (unlikely(block_dump)) { |
3173 | char b[BDEVNAME_SIZE]; | 3174 | char b[BDEVNAME_SIZE]; |
3174 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | 3175 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", |
3175 | current->comm, current->pid, | 3176 | current->comm, current->pid, |
3176 | (rw & WRITE) ? "WRITE" : "READ", | 3177 | (rw & WRITE) ? "WRITE" : "READ", |
3177 | (unsigned long long)bio->bi_sector, | 3178 | (unsigned long long)bio->bi_sector, |
3178 | bdevname(bio->bi_bdev,b)); | 3179 | bdevname(bio->bi_bdev,b)); |
3179 | } | 3180 | } |
3180 | 3181 | ||
3181 | generic_make_request(bio); | 3182 | generic_make_request(bio); |
3182 | } | 3183 | } |
3183 | 3184 | ||
3184 | EXPORT_SYMBOL(submit_bio); | 3185 | EXPORT_SYMBOL(submit_bio); |
3185 | 3186 | ||
3186 | static void blk_recalc_rq_segments(struct request *rq) | 3187 | static void blk_recalc_rq_segments(struct request *rq) |
3187 | { | 3188 | { |
3188 | struct bio *bio, *prevbio = NULL; | 3189 | struct bio *bio, *prevbio = NULL; |
3189 | int nr_phys_segs, nr_hw_segs; | 3190 | int nr_phys_segs, nr_hw_segs; |
3190 | unsigned int phys_size, hw_size; | 3191 | unsigned int phys_size, hw_size; |
3191 | request_queue_t *q = rq->q; | 3192 | request_queue_t *q = rq->q; |
3192 | 3193 | ||
3193 | if (!rq->bio) | 3194 | if (!rq->bio) |
3194 | return; | 3195 | return; |
3195 | 3196 | ||
3196 | phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; | 3197 | phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; |
3197 | rq_for_each_bio(bio, rq) { | 3198 | rq_for_each_bio(bio, rq) { |
3198 | /* Force bio hw/phys segs to be recalculated. */ | 3199 | /* Force bio hw/phys segs to be recalculated. */ |
3199 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); | 3200 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); |
3200 | 3201 | ||
3201 | nr_phys_segs += bio_phys_segments(q, bio); | 3202 | nr_phys_segs += bio_phys_segments(q, bio); |
3202 | nr_hw_segs += bio_hw_segments(q, bio); | 3203 | nr_hw_segs += bio_hw_segments(q, bio); |
3203 | if (prevbio) { | 3204 | if (prevbio) { |
3204 | int pseg = phys_size + prevbio->bi_size + bio->bi_size; | 3205 | int pseg = phys_size + prevbio->bi_size + bio->bi_size; |
3205 | int hseg = hw_size + prevbio->bi_size + bio->bi_size; | 3206 | int hseg = hw_size + prevbio->bi_size + bio->bi_size; |
3206 | 3207 | ||
3207 | if (blk_phys_contig_segment(q, prevbio, bio) && | 3208 | if (blk_phys_contig_segment(q, prevbio, bio) && |
3208 | pseg <= q->max_segment_size) { | 3209 | pseg <= q->max_segment_size) { |
3209 | nr_phys_segs--; | 3210 | nr_phys_segs--; |
3210 | phys_size += prevbio->bi_size + bio->bi_size; | 3211 | phys_size += prevbio->bi_size + bio->bi_size; |
3211 | } else | 3212 | } else |
3212 | phys_size = 0; | 3213 | phys_size = 0; |
3213 | 3214 | ||
3214 | if (blk_hw_contig_segment(q, prevbio, bio) && | 3215 | if (blk_hw_contig_segment(q, prevbio, bio) && |
3215 | hseg <= q->max_segment_size) { | 3216 | hseg <= q->max_segment_size) { |
3216 | nr_hw_segs--; | 3217 | nr_hw_segs--; |
3217 | hw_size += prevbio->bi_size + bio->bi_size; | 3218 | hw_size += prevbio->bi_size + bio->bi_size; |
3218 | } else | 3219 | } else |
3219 | hw_size = 0; | 3220 | hw_size = 0; |
3220 | } | 3221 | } |
3221 | prevbio = bio; | 3222 | prevbio = bio; |
3222 | } | 3223 | } |
3223 | 3224 | ||
3224 | rq->nr_phys_segments = nr_phys_segs; | 3225 | rq->nr_phys_segments = nr_phys_segs; |
3225 | rq->nr_hw_segments = nr_hw_segs; | 3226 | rq->nr_hw_segments = nr_hw_segs; |
3226 | } | 3227 | } |
3227 | 3228 | ||
3228 | static void blk_recalc_rq_sectors(struct request *rq, int nsect) | 3229 | static void blk_recalc_rq_sectors(struct request *rq, int nsect) |
3229 | { | 3230 | { |
3230 | if (blk_fs_request(rq)) { | 3231 | if (blk_fs_request(rq)) { |
3231 | rq->hard_sector += nsect; | 3232 | rq->hard_sector += nsect; |
3232 | rq->hard_nr_sectors -= nsect; | 3233 | rq->hard_nr_sectors -= nsect; |
3233 | 3234 | ||
3234 | /* | 3235 | /* |
3235 | * Move the I/O submission pointers ahead if required. | 3236 | * Move the I/O submission pointers ahead if required. |
3236 | */ | 3237 | */ |
3237 | if ((rq->nr_sectors >= rq->hard_nr_sectors) && | 3238 | if ((rq->nr_sectors >= rq->hard_nr_sectors) && |
3238 | (rq->sector <= rq->hard_sector)) { | 3239 | (rq->sector <= rq->hard_sector)) { |
3239 | rq->sector = rq->hard_sector; | 3240 | rq->sector = rq->hard_sector; |
3240 | rq->nr_sectors = rq->hard_nr_sectors; | 3241 | rq->nr_sectors = rq->hard_nr_sectors; |
3241 | rq->hard_cur_sectors = bio_cur_sectors(rq->bio); | 3242 | rq->hard_cur_sectors = bio_cur_sectors(rq->bio); |
3242 | rq->current_nr_sectors = rq->hard_cur_sectors; | 3243 | rq->current_nr_sectors = rq->hard_cur_sectors; |
3243 | rq->buffer = bio_data(rq->bio); | 3244 | rq->buffer = bio_data(rq->bio); |
3244 | } | 3245 | } |
3245 | 3246 | ||
3246 | /* | 3247 | /* |
3247 | * if total number of sectors is less than the first segment | 3248 | * if total number of sectors is less than the first segment |
3248 | * size, something has gone terribly wrong | 3249 | * size, something has gone terribly wrong |
3249 | */ | 3250 | */ |
3250 | if (rq->nr_sectors < rq->current_nr_sectors) { | 3251 | if (rq->nr_sectors < rq->current_nr_sectors) { |
3251 | printk("blk: request botched\n"); | 3252 | printk("blk: request botched\n"); |
3252 | rq->nr_sectors = rq->current_nr_sectors; | 3253 | rq->nr_sectors = rq->current_nr_sectors; |
3253 | } | 3254 | } |
3254 | } | 3255 | } |
3255 | } | 3256 | } |
3256 | 3257 | ||
3257 | static int __end_that_request_first(struct request *req, int uptodate, | 3258 | static int __end_that_request_first(struct request *req, int uptodate, |
3258 | int nr_bytes) | 3259 | int nr_bytes) |
3259 | { | 3260 | { |
3260 | int total_bytes, bio_nbytes, error, next_idx = 0; | 3261 | int total_bytes, bio_nbytes, error, next_idx = 0; |
3261 | struct bio *bio; | 3262 | struct bio *bio; |
3262 | 3263 | ||
3263 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); | 3264 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); |
3264 | 3265 | ||
3265 | /* | 3266 | /* |
3266 | * extend uptodate bool to allow < 0 value to be direct io error | 3267 | * extend uptodate bool to allow < 0 value to be direct io error |
3267 | */ | 3268 | */ |
3268 | error = 0; | 3269 | error = 0; |
3269 | if (end_io_error(uptodate)) | 3270 | if (end_io_error(uptodate)) |
3270 | error = !uptodate ? -EIO : uptodate; | 3271 | error = !uptodate ? -EIO : uptodate; |
3271 | 3272 | ||
3272 | /* | 3273 | /* |
3273 | * for a REQ_BLOCK_PC request, we want to carry any eventual | 3274 | * for a REQ_BLOCK_PC request, we want to carry any eventual |
3274 | * sense key with us all the way through | 3275 | * sense key with us all the way through |
3275 | */ | 3276 | */ |
3276 | if (!blk_pc_request(req)) | 3277 | if (!blk_pc_request(req)) |
3277 | req->errors = 0; | 3278 | req->errors = 0; |
3278 | 3279 | ||
3279 | if (!uptodate) { | 3280 | if (!uptodate) { |
3280 | if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) | 3281 | if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) |
3281 | printk("end_request: I/O error, dev %s, sector %llu\n", | 3282 | printk("end_request: I/O error, dev %s, sector %llu\n", |
3282 | req->rq_disk ? req->rq_disk->disk_name : "?", | 3283 | req->rq_disk ? req->rq_disk->disk_name : "?", |
3283 | (unsigned long long)req->sector); | 3284 | (unsigned long long)req->sector); |
3284 | } | 3285 | } |
3285 | 3286 | ||
3286 | if (blk_fs_request(req) && req->rq_disk) { | 3287 | if (blk_fs_request(req) && req->rq_disk) { |
3287 | const int rw = rq_data_dir(req); | 3288 | const int rw = rq_data_dir(req); |
3288 | 3289 | ||
3289 | disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); | 3290 | disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); |
3290 | } | 3291 | } |
3291 | 3292 | ||
3292 | total_bytes = bio_nbytes = 0; | 3293 | total_bytes = bio_nbytes = 0; |
3293 | while ((bio = req->bio) != NULL) { | 3294 | while ((bio = req->bio) != NULL) { |
3294 | int nbytes; | 3295 | int nbytes; |
3295 | 3296 | ||
3296 | if (nr_bytes >= bio->bi_size) { | 3297 | if (nr_bytes >= bio->bi_size) { |
3297 | req->bio = bio->bi_next; | 3298 | req->bio = bio->bi_next; |
3298 | nbytes = bio->bi_size; | 3299 | nbytes = bio->bi_size; |
3299 | if (!ordered_bio_endio(req, bio, nbytes, error)) | 3300 | if (!ordered_bio_endio(req, bio, nbytes, error)) |
3300 | bio_endio(bio, nbytes, error); | 3301 | bio_endio(bio, nbytes, error); |
3301 | next_idx = 0; | 3302 | next_idx = 0; |
3302 | bio_nbytes = 0; | 3303 | bio_nbytes = 0; |
3303 | } else { | 3304 | } else { |
3304 | int idx = bio->bi_idx + next_idx; | 3305 | int idx = bio->bi_idx + next_idx; |
3305 | 3306 | ||
3306 | if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { | 3307 | if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { |
3307 | blk_dump_rq_flags(req, "__end_that"); | 3308 | blk_dump_rq_flags(req, "__end_that"); |
3308 | printk("%s: bio idx %d >= vcnt %d\n", | 3309 | printk("%s: bio idx %d >= vcnt %d\n", |
3309 | __FUNCTION__, | 3310 | __FUNCTION__, |
3310 | bio->bi_idx, bio->bi_vcnt); | 3311 | bio->bi_idx, bio->bi_vcnt); |
3311 | break; | 3312 | break; |
3312 | } | 3313 | } |
3313 | 3314 | ||
3314 | nbytes = bio_iovec_idx(bio, idx)->bv_len; | 3315 | nbytes = bio_iovec_idx(bio, idx)->bv_len; |
3315 | BIO_BUG_ON(nbytes > bio->bi_size); | 3316 | BIO_BUG_ON(nbytes > bio->bi_size); |
3316 | 3317 | ||
3317 | /* | 3318 | /* |
3318 | * not a complete bvec done | 3319 | * not a complete bvec done |
3319 | */ | 3320 | */ |
3320 | if (unlikely(nbytes > nr_bytes)) { | 3321 | if (unlikely(nbytes > nr_bytes)) { |
3321 | bio_nbytes += nr_bytes; | 3322 | bio_nbytes += nr_bytes; |
3322 | total_bytes += nr_bytes; | 3323 | total_bytes += nr_bytes; |
3323 | break; | 3324 | break; |
3324 | } | 3325 | } |
3325 | 3326 | ||
3326 | /* | 3327 | /* |
3327 | * advance to the next vector | 3328 | * advance to the next vector |
3328 | */ | 3329 | */ |
3329 | next_idx++; | 3330 | next_idx++; |
3330 | bio_nbytes += nbytes; | 3331 | bio_nbytes += nbytes; |
3331 | } | 3332 | } |
3332 | 3333 | ||
3333 | total_bytes += nbytes; | 3334 | total_bytes += nbytes; |
3334 | nr_bytes -= nbytes; | 3335 | nr_bytes -= nbytes; |
3335 | 3336 | ||
3336 | if ((bio = req->bio)) { | 3337 | if ((bio = req->bio)) { |
3337 | /* | 3338 | /* |
3338 | * end more in this run, or just return 'not-done' | 3339 | * end more in this run, or just return 'not-done' |
3339 | */ | 3340 | */ |
3340 | if (unlikely(nr_bytes <= 0)) | 3341 | if (unlikely(nr_bytes <= 0)) |
3341 | break; | 3342 | break; |
3342 | } | 3343 | } |
3343 | } | 3344 | } |
3344 | 3345 | ||
3345 | /* | 3346 | /* |
3346 | * completely done | 3347 | * completely done |
3347 | */ | 3348 | */ |
3348 | if (!req->bio) | 3349 | if (!req->bio) |
3349 | return 0; | 3350 | return 0; |
3350 | 3351 | ||
3351 | /* | 3352 | /* |
3352 | * if the request wasn't completed, update state | 3353 | * if the request wasn't completed, update state |
3353 | */ | 3354 | */ |
3354 | if (bio_nbytes) { | 3355 | if (bio_nbytes) { |
3355 | if (!ordered_bio_endio(req, bio, bio_nbytes, error)) | 3356 | if (!ordered_bio_endio(req, bio, bio_nbytes, error)) |
3356 | bio_endio(bio, bio_nbytes, error); | 3357 | bio_endio(bio, bio_nbytes, error); |
3357 | bio->bi_idx += next_idx; | 3358 | bio->bi_idx += next_idx; |
3358 | bio_iovec(bio)->bv_offset += nr_bytes; | 3359 | bio_iovec(bio)->bv_offset += nr_bytes; |
3359 | bio_iovec(bio)->bv_len -= nr_bytes; | 3360 | bio_iovec(bio)->bv_len -= nr_bytes; |
3360 | } | 3361 | } |
3361 | 3362 | ||
3362 | blk_recalc_rq_sectors(req, total_bytes >> 9); | 3363 | blk_recalc_rq_sectors(req, total_bytes >> 9); |
3363 | blk_recalc_rq_segments(req); | 3364 | blk_recalc_rq_segments(req); |
3364 | return 1; | 3365 | return 1; |
3365 | } | 3366 | } |
3366 | 3367 | ||
3367 | /** | 3368 | /** |
3368 | * end_that_request_first - end I/O on a request | 3369 | * end_that_request_first - end I/O on a request |
3369 | * @req: the request being processed | 3370 | * @req: the request being processed |
3370 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error | 3371 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error |
3371 | * @nr_sectors: number of sectors to end I/O on | 3372 | * @nr_sectors: number of sectors to end I/O on |
3372 | * | 3373 | * |
3373 | * Description: | 3374 | * Description: |
3374 | * Ends I/O on a number of sectors attached to @req, and sets it up | 3375 | * Ends I/O on a number of sectors attached to @req, and sets it up |
3375 | * for the next range of segments (if any) in the cluster. | 3376 | * for the next range of segments (if any) in the cluster. |
3376 | * | 3377 | * |
3377 | * Return: | 3378 | * Return: |
3378 | * 0 - we are done with this request, call end_that_request_last() | 3379 | * 0 - we are done with this request, call end_that_request_last() |
3379 | * 1 - still buffers pending for this request | 3380 | * 1 - still buffers pending for this request |
3380 | **/ | 3381 | **/ |
3381 | int end_that_request_first(struct request *req, int uptodate, int nr_sectors) | 3382 | int end_that_request_first(struct request *req, int uptodate, int nr_sectors) |
3382 | { | 3383 | { |
3383 | return __end_that_request_first(req, uptodate, nr_sectors << 9); | 3384 | return __end_that_request_first(req, uptodate, nr_sectors << 9); |
3384 | } | 3385 | } |
3385 | 3386 | ||
3386 | EXPORT_SYMBOL(end_that_request_first); | 3387 | EXPORT_SYMBOL(end_that_request_first); |
3387 | 3388 | ||
3388 | /** | 3389 | /** |
3389 | * end_that_request_chunk - end I/O on a request | 3390 | * end_that_request_chunk - end I/O on a request |
3390 | * @req: the request being processed | 3391 | * @req: the request being processed |
3391 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error | 3392 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error |
3392 | * @nr_bytes: number of bytes to complete | 3393 | * @nr_bytes: number of bytes to complete |
3393 | * | 3394 | * |
3394 | * Description: | 3395 | * Description: |
3395 | * Ends I/O on a number of bytes attached to @req, and sets it up | 3396 | * Ends I/O on a number of bytes attached to @req, and sets it up |
3396 | * for the next range of segments (if any). Like end_that_request_first(), | 3397 | * for the next range of segments (if any). Like end_that_request_first(), |
3397 | * but deals with bytes instead of sectors. | 3398 | * but deals with bytes instead of sectors. |
3398 | * | 3399 | * |
3399 | * Return: | 3400 | * Return: |
3400 | * 0 - we are done with this request, call end_that_request_last() | 3401 | * 0 - we are done with this request, call end_that_request_last() |
3401 | * 1 - still buffers pending for this request | 3402 | * 1 - still buffers pending for this request |
3402 | **/ | 3403 | **/ |
3403 | int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) | 3404 | int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) |
3404 | { | 3405 | { |
3405 | return __end_that_request_first(req, uptodate, nr_bytes); | 3406 | return __end_that_request_first(req, uptodate, nr_bytes); |
3406 | } | 3407 | } |
3407 | 3408 | ||
3408 | EXPORT_SYMBOL(end_that_request_chunk); | 3409 | EXPORT_SYMBOL(end_that_request_chunk); |
3409 | 3410 | ||
3410 | /* | 3411 | /* |
3411 | * splice the completion data to a local structure and hand off to | 3412 | * splice the completion data to a local structure and hand off to |
3412 | * process_completion_queue() to complete the requests | 3413 | * process_completion_queue() to complete the requests |
3413 | */ | 3414 | */ |
3414 | static void blk_done_softirq(struct softirq_action *h) | 3415 | static void blk_done_softirq(struct softirq_action *h) |
3415 | { | 3416 | { |
3416 | struct list_head *cpu_list, local_list; | 3417 | struct list_head *cpu_list, local_list; |
3417 | 3418 | ||
3418 | local_irq_disable(); | 3419 | local_irq_disable(); |
3419 | cpu_list = &__get_cpu_var(blk_cpu_done); | 3420 | cpu_list = &__get_cpu_var(blk_cpu_done); |
3420 | list_replace_init(cpu_list, &local_list); | 3421 | list_replace_init(cpu_list, &local_list); |
3421 | local_irq_enable(); | 3422 | local_irq_enable(); |
3422 | 3423 | ||
3423 | while (!list_empty(&local_list)) { | 3424 | while (!list_empty(&local_list)) { |
3424 | struct request *rq = list_entry(local_list.next, struct request, donelist); | 3425 | struct request *rq = list_entry(local_list.next, struct request, donelist); |
3425 | 3426 | ||
3426 | list_del_init(&rq->donelist); | 3427 | list_del_init(&rq->donelist); |
3427 | rq->q->softirq_done_fn(rq); | 3428 | rq->q->softirq_done_fn(rq); |
3428 | } | 3429 | } |
3429 | } | 3430 | } |
3430 | 3431 | ||
3431 | #ifdef CONFIG_HOTPLUG_CPU | 3432 | #ifdef CONFIG_HOTPLUG_CPU |
3432 | 3433 | ||
3433 | static int blk_cpu_notify(struct notifier_block *self, unsigned long action, | 3434 | static int blk_cpu_notify(struct notifier_block *self, unsigned long action, |
3434 | void *hcpu) | 3435 | void *hcpu) |
3435 | { | 3436 | { |
3436 | /* | 3437 | /* |
3437 | * If a CPU goes away, splice its entries to the current CPU | 3438 | * If a CPU goes away, splice its entries to the current CPU |
3438 | * and trigger a run of the softirq | 3439 | * and trigger a run of the softirq |
3439 | */ | 3440 | */ |
3440 | if (action == CPU_DEAD) { | 3441 | if (action == CPU_DEAD) { |
3441 | int cpu = (unsigned long) hcpu; | 3442 | int cpu = (unsigned long) hcpu; |
3442 | 3443 | ||
3443 | local_irq_disable(); | 3444 | local_irq_disable(); |
3444 | list_splice_init(&per_cpu(blk_cpu_done, cpu), | 3445 | list_splice_init(&per_cpu(blk_cpu_done, cpu), |
3445 | &__get_cpu_var(blk_cpu_done)); | 3446 | &__get_cpu_var(blk_cpu_done)); |
3446 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | 3447 | raise_softirq_irqoff(BLOCK_SOFTIRQ); |
3447 | local_irq_enable(); | 3448 | local_irq_enable(); |
3448 | } | 3449 | } |
3449 | 3450 | ||
3450 | return NOTIFY_OK; | 3451 | return NOTIFY_OK; |
3451 | } | 3452 | } |
3452 | 3453 | ||
3453 | 3454 | ||
3454 | static struct notifier_block __devinitdata blk_cpu_notifier = { | 3455 | static struct notifier_block __devinitdata blk_cpu_notifier = { |
3455 | .notifier_call = blk_cpu_notify, | 3456 | .notifier_call = blk_cpu_notify, |
3456 | }; | 3457 | }; |
3457 | 3458 | ||
3458 | #endif /* CONFIG_HOTPLUG_CPU */ | 3459 | #endif /* CONFIG_HOTPLUG_CPU */ |
3459 | 3460 | ||
3460 | /** | 3461 | /** |
3461 | * blk_complete_request - end I/O on a request | 3462 | * blk_complete_request - end I/O on a request |
3462 | * @req: the request being processed | 3463 | * @req: the request being processed |
3463 | * | 3464 | * |
3464 | * Description: | 3465 | * Description: |
3465 | * Ends all I/O on a request. It does not handle partial completions, | 3466 | * Ends all I/O on a request. It does not handle partial completions, |
3466 | * unless the driver actually implements this in its completion callback | 3467 | * unless the driver actually implements this in its completion callback |
3467 | * through requeueing. Theh actual completion happens out-of-order, | 3468 | * through requeueing. Theh actual completion happens out-of-order, |
3468 | * through a softirq handler. The user must have registered a completion | 3469 | * through a softirq handler. The user must have registered a completion |
3469 | * callback through blk_queue_softirq_done(). | 3470 | * callback through blk_queue_softirq_done(). |
3470 | **/ | 3471 | **/ |
3471 | 3472 | ||
3472 | void blk_complete_request(struct request *req) | 3473 | void blk_complete_request(struct request *req) |
3473 | { | 3474 | { |
3474 | struct list_head *cpu_list; | 3475 | struct list_head *cpu_list; |
3475 | unsigned long flags; | 3476 | unsigned long flags; |
3476 | 3477 | ||
3477 | BUG_ON(!req->q->softirq_done_fn); | 3478 | BUG_ON(!req->q->softirq_done_fn); |
3478 | 3479 | ||
3479 | local_irq_save(flags); | 3480 | local_irq_save(flags); |
3480 | 3481 | ||
3481 | cpu_list = &__get_cpu_var(blk_cpu_done); | 3482 | cpu_list = &__get_cpu_var(blk_cpu_done); |
3482 | list_add_tail(&req->donelist, cpu_list); | 3483 | list_add_tail(&req->donelist, cpu_list); |
3483 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | 3484 | raise_softirq_irqoff(BLOCK_SOFTIRQ); |
3484 | 3485 | ||
3485 | local_irq_restore(flags); | 3486 | local_irq_restore(flags); |
3486 | } | 3487 | } |
3487 | 3488 | ||
3488 | EXPORT_SYMBOL(blk_complete_request); | 3489 | EXPORT_SYMBOL(blk_complete_request); |
3489 | 3490 | ||
3490 | /* | 3491 | /* |
3491 | * queue lock must be held | 3492 | * queue lock must be held |
3492 | */ | 3493 | */ |
3493 | void end_that_request_last(struct request *req, int uptodate) | 3494 | void end_that_request_last(struct request *req, int uptodate) |
3494 | { | 3495 | { |
3495 | struct gendisk *disk = req->rq_disk; | 3496 | struct gendisk *disk = req->rq_disk; |
3496 | int error; | 3497 | int error; |
3497 | 3498 | ||
3498 | /* | 3499 | /* |
3499 | * extend uptodate bool to allow < 0 value to be direct io error | 3500 | * extend uptodate bool to allow < 0 value to be direct io error |
3500 | */ | 3501 | */ |
3501 | error = 0; | 3502 | error = 0; |
3502 | if (end_io_error(uptodate)) | 3503 | if (end_io_error(uptodate)) |
3503 | error = !uptodate ? -EIO : uptodate; | 3504 | error = !uptodate ? -EIO : uptodate; |
3504 | 3505 | ||
3505 | if (unlikely(laptop_mode) && blk_fs_request(req)) | 3506 | if (unlikely(laptop_mode) && blk_fs_request(req)) |
3506 | laptop_io_completion(); | 3507 | laptop_io_completion(); |
3507 | 3508 | ||
3508 | /* | 3509 | /* |
3509 | * Account IO completion. bar_rq isn't accounted as a normal | 3510 | * Account IO completion. bar_rq isn't accounted as a normal |
3510 | * IO on queueing nor completion. Accounting the containing | 3511 | * IO on queueing nor completion. Accounting the containing |
3511 | * request is enough. | 3512 | * request is enough. |
3512 | */ | 3513 | */ |
3513 | if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { | 3514 | if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { |
3514 | unsigned long duration = jiffies - req->start_time; | 3515 | unsigned long duration = jiffies - req->start_time; |
3515 | const int rw = rq_data_dir(req); | 3516 | const int rw = rq_data_dir(req); |
3516 | 3517 | ||
3517 | __disk_stat_inc(disk, ios[rw]); | 3518 | __disk_stat_inc(disk, ios[rw]); |
3518 | __disk_stat_add(disk, ticks[rw], duration); | 3519 | __disk_stat_add(disk, ticks[rw], duration); |
3519 | disk_round_stats(disk); | 3520 | disk_round_stats(disk); |
3520 | disk->in_flight--; | 3521 | disk->in_flight--; |
3521 | } | 3522 | } |
3522 | if (req->end_io) | 3523 | if (req->end_io) |
3523 | req->end_io(req, error); | 3524 | req->end_io(req, error); |
3524 | else | 3525 | else |
3525 | __blk_put_request(req->q, req); | 3526 | __blk_put_request(req->q, req); |
3526 | } | 3527 | } |
3527 | 3528 | ||
3528 | EXPORT_SYMBOL(end_that_request_last); | 3529 | EXPORT_SYMBOL(end_that_request_last); |
3529 | 3530 | ||
3530 | void end_request(struct request *req, int uptodate) | 3531 | void end_request(struct request *req, int uptodate) |
3531 | { | 3532 | { |
3532 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { | 3533 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { |
3533 | add_disk_randomness(req->rq_disk); | 3534 | add_disk_randomness(req->rq_disk); |
3534 | blkdev_dequeue_request(req); | 3535 | blkdev_dequeue_request(req); |
3535 | end_that_request_last(req, uptodate); | 3536 | end_that_request_last(req, uptodate); |
3536 | } | 3537 | } |
3537 | } | 3538 | } |
3538 | 3539 | ||
3539 | EXPORT_SYMBOL(end_request); | 3540 | EXPORT_SYMBOL(end_request); |
3540 | 3541 | ||
3541 | void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) | 3542 | void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) |
3542 | { | 3543 | { |
3543 | /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ | 3544 | /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ |
3544 | rq->cmd_flags |= (bio->bi_rw & 3); | 3545 | rq->cmd_flags |= (bio->bi_rw & 3); |
3545 | 3546 | ||
3546 | rq->nr_phys_segments = bio_phys_segments(q, bio); | 3547 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
3547 | rq->nr_hw_segments = bio_hw_segments(q, bio); | 3548 | rq->nr_hw_segments = bio_hw_segments(q, bio); |
3548 | rq->current_nr_sectors = bio_cur_sectors(bio); | 3549 | rq->current_nr_sectors = bio_cur_sectors(bio); |
3549 | rq->hard_cur_sectors = rq->current_nr_sectors; | 3550 | rq->hard_cur_sectors = rq->current_nr_sectors; |
3550 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); | 3551 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); |
3551 | rq->buffer = bio_data(bio); | 3552 | rq->buffer = bio_data(bio); |
3552 | 3553 | ||
3553 | rq->bio = rq->biotail = bio; | 3554 | rq->bio = rq->biotail = bio; |
3554 | } | 3555 | } |
3555 | 3556 | ||
3556 | EXPORT_SYMBOL(blk_rq_bio_prep); | 3557 | EXPORT_SYMBOL(blk_rq_bio_prep); |
3557 | 3558 | ||
3558 | int kblockd_schedule_work(struct work_struct *work) | 3559 | int kblockd_schedule_work(struct work_struct *work) |
3559 | { | 3560 | { |
3560 | return queue_work(kblockd_workqueue, work); | 3561 | return queue_work(kblockd_workqueue, work); |
3561 | } | 3562 | } |
3562 | 3563 | ||
3563 | EXPORT_SYMBOL(kblockd_schedule_work); | 3564 | EXPORT_SYMBOL(kblockd_schedule_work); |
3564 | 3565 | ||
3565 | void kblockd_flush(void) | 3566 | void kblockd_flush(void) |
3566 | { | 3567 | { |
3567 | flush_workqueue(kblockd_workqueue); | 3568 | flush_workqueue(kblockd_workqueue); |
3568 | } | 3569 | } |
3569 | EXPORT_SYMBOL(kblockd_flush); | 3570 | EXPORT_SYMBOL(kblockd_flush); |
3570 | 3571 | ||
3571 | int __init blk_dev_init(void) | 3572 | int __init blk_dev_init(void) |
3572 | { | 3573 | { |
3573 | int i; | 3574 | int i; |
3574 | 3575 | ||
3575 | kblockd_workqueue = create_workqueue("kblockd"); | 3576 | kblockd_workqueue = create_workqueue("kblockd"); |
3576 | if (!kblockd_workqueue) | 3577 | if (!kblockd_workqueue) |
3577 | panic("Failed to create kblockd\n"); | 3578 | panic("Failed to create kblockd\n"); |
3578 | 3579 | ||
3579 | request_cachep = kmem_cache_create("blkdev_requests", | 3580 | request_cachep = kmem_cache_create("blkdev_requests", |
3580 | sizeof(struct request), 0, SLAB_PANIC, NULL, NULL); | 3581 | sizeof(struct request), 0, SLAB_PANIC, NULL, NULL); |
3581 | 3582 | ||
3582 | requestq_cachep = kmem_cache_create("blkdev_queue", | 3583 | requestq_cachep = kmem_cache_create("blkdev_queue", |
3583 | sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL); | 3584 | sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL); |
3584 | 3585 | ||
3585 | iocontext_cachep = kmem_cache_create("blkdev_ioc", | 3586 | iocontext_cachep = kmem_cache_create("blkdev_ioc", |
3586 | sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); | 3587 | sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); |
3587 | 3588 | ||
3588 | for_each_possible_cpu(i) | 3589 | for_each_possible_cpu(i) |
3589 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | 3590 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); |
3590 | 3591 | ||
3591 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); | 3592 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); |
3592 | register_hotcpu_notifier(&blk_cpu_notifier); | 3593 | register_hotcpu_notifier(&blk_cpu_notifier); |
3593 | 3594 | ||
3594 | blk_max_low_pfn = max_low_pfn; | 3595 | blk_max_low_pfn = max_low_pfn; |
3595 | blk_max_pfn = max_pfn; | 3596 | blk_max_pfn = max_pfn; |
3596 | 3597 | ||
3597 | return 0; | 3598 | return 0; |
3598 | } | 3599 | } |
3599 | 3600 | ||
3600 | /* | 3601 | /* |
3601 | * IO Context helper functions | 3602 | * IO Context helper functions |
3602 | */ | 3603 | */ |
3603 | void put_io_context(struct io_context *ioc) | 3604 | void put_io_context(struct io_context *ioc) |
3604 | { | 3605 | { |
3605 | if (ioc == NULL) | 3606 | if (ioc == NULL) |
3606 | return; | 3607 | return; |
3607 | 3608 | ||
3608 | BUG_ON(atomic_read(&ioc->refcount) == 0); | 3609 | BUG_ON(atomic_read(&ioc->refcount) == 0); |
3609 | 3610 | ||
3610 | if (atomic_dec_and_test(&ioc->refcount)) { | 3611 | if (atomic_dec_and_test(&ioc->refcount)) { |
3611 | struct cfq_io_context *cic; | 3612 | struct cfq_io_context *cic; |
3612 | 3613 | ||
3613 | rcu_read_lock(); | 3614 | rcu_read_lock(); |
3614 | if (ioc->aic && ioc->aic->dtor) | 3615 | if (ioc->aic && ioc->aic->dtor) |
3615 | ioc->aic->dtor(ioc->aic); | 3616 | ioc->aic->dtor(ioc->aic); |
3616 | if (ioc->cic_root.rb_node != NULL) { | 3617 | if (ioc->cic_root.rb_node != NULL) { |
3617 | struct rb_node *n = rb_first(&ioc->cic_root); | 3618 | struct rb_node *n = rb_first(&ioc->cic_root); |
3618 | 3619 | ||
3619 | cic = rb_entry(n, struct cfq_io_context, rb_node); | 3620 | cic = rb_entry(n, struct cfq_io_context, rb_node); |
3620 | cic->dtor(ioc); | 3621 | cic->dtor(ioc); |
3621 | } | 3622 | } |
3622 | rcu_read_unlock(); | 3623 | rcu_read_unlock(); |
3623 | 3624 | ||
3624 | kmem_cache_free(iocontext_cachep, ioc); | 3625 | kmem_cache_free(iocontext_cachep, ioc); |
3625 | } | 3626 | } |
3626 | } | 3627 | } |
3627 | EXPORT_SYMBOL(put_io_context); | 3628 | EXPORT_SYMBOL(put_io_context); |
3628 | 3629 | ||
3629 | /* Called by the exitting task */ | 3630 | /* Called by the exitting task */ |
3630 | void exit_io_context(void) | 3631 | void exit_io_context(void) |
3631 | { | 3632 | { |
3632 | unsigned long flags; | 3633 | unsigned long flags; |
3633 | struct io_context *ioc; | 3634 | struct io_context *ioc; |
3634 | struct cfq_io_context *cic; | 3635 | struct cfq_io_context *cic; |
3635 | 3636 | ||
3636 | local_irq_save(flags); | 3637 | local_irq_save(flags); |
3637 | task_lock(current); | 3638 | task_lock(current); |
3638 | ioc = current->io_context; | 3639 | ioc = current->io_context; |
3639 | current->io_context = NULL; | 3640 | current->io_context = NULL; |
3640 | ioc->task = NULL; | 3641 | ioc->task = NULL; |
3641 | task_unlock(current); | 3642 | task_unlock(current); |
3642 | local_irq_restore(flags); | 3643 | local_irq_restore(flags); |
3643 | 3644 | ||
3644 | if (ioc->aic && ioc->aic->exit) | 3645 | if (ioc->aic && ioc->aic->exit) |
3645 | ioc->aic->exit(ioc->aic); | 3646 | ioc->aic->exit(ioc->aic); |
3646 | if (ioc->cic_root.rb_node != NULL) { | 3647 | if (ioc->cic_root.rb_node != NULL) { |
3647 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); | 3648 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); |
3648 | cic->exit(ioc); | 3649 | cic->exit(ioc); |
3649 | } | 3650 | } |
3650 | 3651 | ||
3651 | put_io_context(ioc); | 3652 | put_io_context(ioc); |
3652 | } | 3653 | } |
3653 | 3654 | ||
3654 | /* | 3655 | /* |
3655 | * If the current task has no IO context then create one and initialise it. | 3656 | * If the current task has no IO context then create one and initialise it. |
3656 | * Otherwise, return its existing IO context. | 3657 | * Otherwise, return its existing IO context. |
3657 | * | 3658 | * |
3658 | * This returned IO context doesn't have a specifically elevated refcount, | 3659 | * This returned IO context doesn't have a specifically elevated refcount, |
3659 | * but since the current task itself holds a reference, the context can be | 3660 | * but since the current task itself holds a reference, the context can be |
3660 | * used in general code, so long as it stays within `current` context. | 3661 | * used in general code, so long as it stays within `current` context. |
3661 | */ | 3662 | */ |
3662 | struct io_context *current_io_context(gfp_t gfp_flags) | 3663 | struct io_context *current_io_context(gfp_t gfp_flags) |
3663 | { | 3664 | { |
3664 | struct task_struct *tsk = current; | 3665 | struct task_struct *tsk = current; |
3665 | struct io_context *ret; | 3666 | struct io_context *ret; |
3666 | 3667 | ||
3667 | ret = tsk->io_context; | 3668 | ret = tsk->io_context; |
3668 | if (likely(ret)) | 3669 | if (likely(ret)) |
3669 | return ret; | 3670 | return ret; |
3670 | 3671 | ||
3671 | ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); | 3672 | ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); |
3672 | if (ret) { | 3673 | if (ret) { |
3673 | atomic_set(&ret->refcount, 1); | 3674 | atomic_set(&ret->refcount, 1); |
3674 | ret->task = current; | 3675 | ret->task = current; |
3675 | ret->set_ioprio = NULL; | 3676 | ret->set_ioprio = NULL; |
3676 | ret->last_waited = jiffies; /* doesn't matter... */ | 3677 | ret->last_waited = jiffies; /* doesn't matter... */ |
3677 | ret->nr_batch_requests = 0; /* because this is 0 */ | 3678 | ret->nr_batch_requests = 0; /* because this is 0 */ |
3678 | ret->aic = NULL; | 3679 | ret->aic = NULL; |
3679 | ret->cic_root.rb_node = NULL; | 3680 | ret->cic_root.rb_node = NULL; |
3680 | /* make sure set_task_ioprio() sees the settings above */ | 3681 | /* make sure set_task_ioprio() sees the settings above */ |
3681 | smp_wmb(); | 3682 | smp_wmb(); |
3682 | tsk->io_context = ret; | 3683 | tsk->io_context = ret; |
3683 | } | 3684 | } |
3684 | 3685 | ||
3685 | return ret; | 3686 | return ret; |
3686 | } | 3687 | } |
3687 | EXPORT_SYMBOL(current_io_context); | 3688 | EXPORT_SYMBOL(current_io_context); |
3688 | 3689 | ||
3689 | /* | 3690 | /* |
3690 | * If the current task has no IO context then create one and initialise it. | 3691 | * If the current task has no IO context then create one and initialise it. |
3691 | * If it does have a context, take a ref on it. | 3692 | * If it does have a context, take a ref on it. |
3692 | * | 3693 | * |
3693 | * This is always called in the context of the task which submitted the I/O. | 3694 | * This is always called in the context of the task which submitted the I/O. |
3694 | */ | 3695 | */ |
3695 | struct io_context *get_io_context(gfp_t gfp_flags) | 3696 | struct io_context *get_io_context(gfp_t gfp_flags) |
3696 | { | 3697 | { |
3697 | struct io_context *ret; | 3698 | struct io_context *ret; |
3698 | ret = current_io_context(gfp_flags); | 3699 | ret = current_io_context(gfp_flags); |
3699 | if (likely(ret)) | 3700 | if (likely(ret)) |
3700 | atomic_inc(&ret->refcount); | 3701 | atomic_inc(&ret->refcount); |
3701 | return ret; | 3702 | return ret; |
3702 | } | 3703 | } |
3703 | EXPORT_SYMBOL(get_io_context); | 3704 | EXPORT_SYMBOL(get_io_context); |
3704 | 3705 | ||
3705 | void copy_io_context(struct io_context **pdst, struct io_context **psrc) | 3706 | void copy_io_context(struct io_context **pdst, struct io_context **psrc) |
3706 | { | 3707 | { |
3707 | struct io_context *src = *psrc; | 3708 | struct io_context *src = *psrc; |
3708 | struct io_context *dst = *pdst; | 3709 | struct io_context *dst = *pdst; |
3709 | 3710 | ||
3710 | if (src) { | 3711 | if (src) { |
3711 | BUG_ON(atomic_read(&src->refcount) == 0); | 3712 | BUG_ON(atomic_read(&src->refcount) == 0); |
3712 | atomic_inc(&src->refcount); | 3713 | atomic_inc(&src->refcount); |
3713 | put_io_context(dst); | 3714 | put_io_context(dst); |
3714 | *pdst = src; | 3715 | *pdst = src; |
3715 | } | 3716 | } |
3716 | } | 3717 | } |
3717 | EXPORT_SYMBOL(copy_io_context); | 3718 | EXPORT_SYMBOL(copy_io_context); |
3718 | 3719 | ||
3719 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) | 3720 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) |
3720 | { | 3721 | { |
3721 | struct io_context *temp; | 3722 | struct io_context *temp; |
3722 | temp = *ioc1; | 3723 | temp = *ioc1; |
3723 | *ioc1 = *ioc2; | 3724 | *ioc1 = *ioc2; |
3724 | *ioc2 = temp; | 3725 | *ioc2 = temp; |
3725 | } | 3726 | } |
3726 | EXPORT_SYMBOL(swap_io_context); | 3727 | EXPORT_SYMBOL(swap_io_context); |
3727 | 3728 | ||
3728 | /* | 3729 | /* |
3729 | * sysfs parts below | 3730 | * sysfs parts below |
3730 | */ | 3731 | */ |
3731 | struct queue_sysfs_entry { | 3732 | struct queue_sysfs_entry { |
3732 | struct attribute attr; | 3733 | struct attribute attr; |
3733 | ssize_t (*show)(struct request_queue *, char *); | 3734 | ssize_t (*show)(struct request_queue *, char *); |
3734 | ssize_t (*store)(struct request_queue *, const char *, size_t); | 3735 | ssize_t (*store)(struct request_queue *, const char *, size_t); |
3735 | }; | 3736 | }; |
3736 | 3737 | ||
3737 | static ssize_t | 3738 | static ssize_t |
3738 | queue_var_show(unsigned int var, char *page) | 3739 | queue_var_show(unsigned int var, char *page) |
3739 | { | 3740 | { |
3740 | return sprintf(page, "%d\n", var); | 3741 | return sprintf(page, "%d\n", var); |
3741 | } | 3742 | } |
3742 | 3743 | ||
3743 | static ssize_t | 3744 | static ssize_t |
3744 | queue_var_store(unsigned long *var, const char *page, size_t count) | 3745 | queue_var_store(unsigned long *var, const char *page, size_t count) |
3745 | { | 3746 | { |
3746 | char *p = (char *) page; | 3747 | char *p = (char *) page; |
3747 | 3748 | ||
3748 | *var = simple_strtoul(p, &p, 10); | 3749 | *var = simple_strtoul(p, &p, 10); |
3749 | return count; | 3750 | return count; |
3750 | } | 3751 | } |
3751 | 3752 | ||
3752 | static ssize_t queue_requests_show(struct request_queue *q, char *page) | 3753 | static ssize_t queue_requests_show(struct request_queue *q, char *page) |
3753 | { | 3754 | { |
3754 | return queue_var_show(q->nr_requests, (page)); | 3755 | return queue_var_show(q->nr_requests, (page)); |
3755 | } | 3756 | } |
3756 | 3757 | ||
3757 | static ssize_t | 3758 | static ssize_t |
3758 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | 3759 | queue_requests_store(struct request_queue *q, const char *page, size_t count) |
3759 | { | 3760 | { |
3760 | struct request_list *rl = &q->rq; | 3761 | struct request_list *rl = &q->rq; |
3761 | unsigned long nr; | 3762 | unsigned long nr; |
3762 | int ret = queue_var_store(&nr, page, count); | 3763 | int ret = queue_var_store(&nr, page, count); |
3763 | if (nr < BLKDEV_MIN_RQ) | 3764 | if (nr < BLKDEV_MIN_RQ) |
3764 | nr = BLKDEV_MIN_RQ; | 3765 | nr = BLKDEV_MIN_RQ; |
3765 | 3766 | ||
3766 | spin_lock_irq(q->queue_lock); | 3767 | spin_lock_irq(q->queue_lock); |
3767 | q->nr_requests = nr; | 3768 | q->nr_requests = nr; |
3768 | blk_queue_congestion_threshold(q); | 3769 | blk_queue_congestion_threshold(q); |
3769 | 3770 | ||
3770 | if (rl->count[READ] >= queue_congestion_on_threshold(q)) | 3771 | if (rl->count[READ] >= queue_congestion_on_threshold(q)) |
3771 | set_queue_congested(q, READ); | 3772 | set_queue_congested(q, READ); |
3772 | else if (rl->count[READ] < queue_congestion_off_threshold(q)) | 3773 | else if (rl->count[READ] < queue_congestion_off_threshold(q)) |
3773 | clear_queue_congested(q, READ); | 3774 | clear_queue_congested(q, READ); |
3774 | 3775 | ||
3775 | if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) | 3776 | if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) |
3776 | set_queue_congested(q, WRITE); | 3777 | set_queue_congested(q, WRITE); |
3777 | else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) | 3778 | else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) |
3778 | clear_queue_congested(q, WRITE); | 3779 | clear_queue_congested(q, WRITE); |
3779 | 3780 | ||
3780 | if (rl->count[READ] >= q->nr_requests) { | 3781 | if (rl->count[READ] >= q->nr_requests) { |
3781 | blk_set_queue_full(q, READ); | 3782 | blk_set_queue_full(q, READ); |
3782 | } else if (rl->count[READ]+1 <= q->nr_requests) { | 3783 | } else if (rl->count[READ]+1 <= q->nr_requests) { |
3783 | blk_clear_queue_full(q, READ); | 3784 | blk_clear_queue_full(q, READ); |
3784 | wake_up(&rl->wait[READ]); | 3785 | wake_up(&rl->wait[READ]); |
3785 | } | 3786 | } |
3786 | 3787 | ||
3787 | if (rl->count[WRITE] >= q->nr_requests) { | 3788 | if (rl->count[WRITE] >= q->nr_requests) { |
3788 | blk_set_queue_full(q, WRITE); | 3789 | blk_set_queue_full(q, WRITE); |
3789 | } else if (rl->count[WRITE]+1 <= q->nr_requests) { | 3790 | } else if (rl->count[WRITE]+1 <= q->nr_requests) { |
3790 | blk_clear_queue_full(q, WRITE); | 3791 | blk_clear_queue_full(q, WRITE); |
3791 | wake_up(&rl->wait[WRITE]); | 3792 | wake_up(&rl->wait[WRITE]); |
3792 | } | 3793 | } |
3793 | spin_unlock_irq(q->queue_lock); | 3794 | spin_unlock_irq(q->queue_lock); |
3794 | return ret; | 3795 | return ret; |
3795 | } | 3796 | } |
3796 | 3797 | ||
3797 | static ssize_t queue_ra_show(struct request_queue *q, char *page) | 3798 | static ssize_t queue_ra_show(struct request_queue *q, char *page) |
3798 | { | 3799 | { |
3799 | int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); | 3800 | int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); |
3800 | 3801 | ||
3801 | return queue_var_show(ra_kb, (page)); | 3802 | return queue_var_show(ra_kb, (page)); |
3802 | } | 3803 | } |
3803 | 3804 | ||
3804 | static ssize_t | 3805 | static ssize_t |
3805 | queue_ra_store(struct request_queue *q, const char *page, size_t count) | 3806 | queue_ra_store(struct request_queue *q, const char *page, size_t count) |
3806 | { | 3807 | { |
3807 | unsigned long ra_kb; | 3808 | unsigned long ra_kb; |
3808 | ssize_t ret = queue_var_store(&ra_kb, page, count); | 3809 | ssize_t ret = queue_var_store(&ra_kb, page, count); |
3809 | 3810 | ||
3810 | spin_lock_irq(q->queue_lock); | 3811 | spin_lock_irq(q->queue_lock); |
3811 | if (ra_kb > (q->max_sectors >> 1)) | 3812 | if (ra_kb > (q->max_sectors >> 1)) |
3812 | ra_kb = (q->max_sectors >> 1); | 3813 | ra_kb = (q->max_sectors >> 1); |
3813 | 3814 | ||
3814 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); | 3815 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); |
3815 | spin_unlock_irq(q->queue_lock); | 3816 | spin_unlock_irq(q->queue_lock); |
3816 | 3817 | ||
3817 | return ret; | 3818 | return ret; |
3818 | } | 3819 | } |
3819 | 3820 | ||
3820 | static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) | 3821 | static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) |
3821 | { | 3822 | { |
3822 | int max_sectors_kb = q->max_sectors >> 1; | 3823 | int max_sectors_kb = q->max_sectors >> 1; |
3823 | 3824 | ||
3824 | return queue_var_show(max_sectors_kb, (page)); | 3825 | return queue_var_show(max_sectors_kb, (page)); |
3825 | } | 3826 | } |
3826 | 3827 | ||
3827 | static ssize_t | 3828 | static ssize_t |
3828 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | 3829 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) |
3829 | { | 3830 | { |
3830 | unsigned long max_sectors_kb, | 3831 | unsigned long max_sectors_kb, |
3831 | max_hw_sectors_kb = q->max_hw_sectors >> 1, | 3832 | max_hw_sectors_kb = q->max_hw_sectors >> 1, |
3832 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); | 3833 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); |
3833 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); | 3834 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); |
3834 | int ra_kb; | 3835 | int ra_kb; |
3835 | 3836 | ||
3836 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) | 3837 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) |
3837 | return -EINVAL; | 3838 | return -EINVAL; |
3838 | /* | 3839 | /* |
3839 | * Take the queue lock to update the readahead and max_sectors | 3840 | * Take the queue lock to update the readahead and max_sectors |
3840 | * values synchronously: | 3841 | * values synchronously: |
3841 | */ | 3842 | */ |
3842 | spin_lock_irq(q->queue_lock); | 3843 | spin_lock_irq(q->queue_lock); |
3843 | /* | 3844 | /* |
3844 | * Trim readahead window as well, if necessary: | 3845 | * Trim readahead window as well, if necessary: |
3845 | */ | 3846 | */ |
3846 | ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); | 3847 | ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); |
3847 | if (ra_kb > max_sectors_kb) | 3848 | if (ra_kb > max_sectors_kb) |
3848 | q->backing_dev_info.ra_pages = | 3849 | q->backing_dev_info.ra_pages = |
3849 | max_sectors_kb >> (PAGE_CACHE_SHIFT - 10); | 3850 | max_sectors_kb >> (PAGE_CACHE_SHIFT - 10); |
3850 | 3851 | ||
3851 | q->max_sectors = max_sectors_kb << 1; | 3852 | q->max_sectors = max_sectors_kb << 1; |
3852 | spin_unlock_irq(q->queue_lock); | 3853 | spin_unlock_irq(q->queue_lock); |
3853 | 3854 | ||
3854 | return ret; | 3855 | return ret; |
3855 | } | 3856 | } |
3856 | 3857 | ||
3857 | static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) | 3858 | static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) |
3858 | { | 3859 | { |
3859 | int max_hw_sectors_kb = q->max_hw_sectors >> 1; | 3860 | int max_hw_sectors_kb = q->max_hw_sectors >> 1; |
3860 | 3861 | ||
3861 | return queue_var_show(max_hw_sectors_kb, (page)); | 3862 | return queue_var_show(max_hw_sectors_kb, (page)); |
3862 | } | 3863 | } |
3863 | 3864 | ||
3864 | 3865 | ||
3865 | static struct queue_sysfs_entry queue_requests_entry = { | 3866 | static struct queue_sysfs_entry queue_requests_entry = { |
3866 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, | 3867 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, |
3867 | .show = queue_requests_show, | 3868 | .show = queue_requests_show, |
3868 | .store = queue_requests_store, | 3869 | .store = queue_requests_store, |
3869 | }; | 3870 | }; |
3870 | 3871 | ||
3871 | static struct queue_sysfs_entry queue_ra_entry = { | 3872 | static struct queue_sysfs_entry queue_ra_entry = { |
3872 | .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, | 3873 | .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, |
3873 | .show = queue_ra_show, | 3874 | .show = queue_ra_show, |
3874 | .store = queue_ra_store, | 3875 | .store = queue_ra_store, |
3875 | }; | 3876 | }; |
3876 | 3877 | ||
3877 | static struct queue_sysfs_entry queue_max_sectors_entry = { | 3878 | static struct queue_sysfs_entry queue_max_sectors_entry = { |
3878 | .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, | 3879 | .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, |
3879 | .show = queue_max_sectors_show, | 3880 | .show = queue_max_sectors_show, |
3880 | .store = queue_max_sectors_store, | 3881 | .store = queue_max_sectors_store, |
3881 | }; | 3882 | }; |
3882 | 3883 | ||
3883 | static struct queue_sysfs_entry queue_max_hw_sectors_entry = { | 3884 | static struct queue_sysfs_entry queue_max_hw_sectors_entry = { |
3884 | .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, | 3885 | .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, |
3885 | .show = queue_max_hw_sectors_show, | 3886 | .show = queue_max_hw_sectors_show, |
3886 | }; | 3887 | }; |
3887 | 3888 | ||
3888 | static struct queue_sysfs_entry queue_iosched_entry = { | 3889 | static struct queue_sysfs_entry queue_iosched_entry = { |
3889 | .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, | 3890 | .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, |
3890 | .show = elv_iosched_show, | 3891 | .show = elv_iosched_show, |
3891 | .store = elv_iosched_store, | 3892 | .store = elv_iosched_store, |
3892 | }; | 3893 | }; |
3893 | 3894 | ||
3894 | static struct attribute *default_attrs[] = { | 3895 | static struct attribute *default_attrs[] = { |
3895 | &queue_requests_entry.attr, | 3896 | &queue_requests_entry.attr, |
3896 | &queue_ra_entry.attr, | 3897 | &queue_ra_entry.attr, |
3897 | &queue_max_hw_sectors_entry.attr, | 3898 | &queue_max_hw_sectors_entry.attr, |
3898 | &queue_max_sectors_entry.attr, | 3899 | &queue_max_sectors_entry.attr, |
3899 | &queue_iosched_entry.attr, | 3900 | &queue_iosched_entry.attr, |
3900 | NULL, | 3901 | NULL, |
3901 | }; | 3902 | }; |
3902 | 3903 | ||
3903 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) | 3904 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) |
3904 | 3905 | ||
3905 | static ssize_t | 3906 | static ssize_t |
3906 | queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | 3907 | queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) |
3907 | { | 3908 | { |
3908 | struct queue_sysfs_entry *entry = to_queue(attr); | 3909 | struct queue_sysfs_entry *entry = to_queue(attr); |
3909 | request_queue_t *q = container_of(kobj, struct request_queue, kobj); | 3910 | request_queue_t *q = container_of(kobj, struct request_queue, kobj); |
3910 | ssize_t res; | 3911 | ssize_t res; |
3911 | 3912 | ||
3912 | if (!entry->show) | 3913 | if (!entry->show) |
3913 | return -EIO; | 3914 | return -EIO; |
3914 | mutex_lock(&q->sysfs_lock); | 3915 | mutex_lock(&q->sysfs_lock); |
3915 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | 3916 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { |
3916 | mutex_unlock(&q->sysfs_lock); | 3917 | mutex_unlock(&q->sysfs_lock); |
3917 | return -ENOENT; | 3918 | return -ENOENT; |
3918 | } | 3919 | } |
3919 | res = entry->show(q, page); | 3920 | res = entry->show(q, page); |
3920 | mutex_unlock(&q->sysfs_lock); | 3921 | mutex_unlock(&q->sysfs_lock); |
3921 | return res; | 3922 | return res; |
3922 | } | 3923 | } |
3923 | 3924 | ||
3924 | static ssize_t | 3925 | static ssize_t |
3925 | queue_attr_store(struct kobject *kobj, struct attribute *attr, | 3926 | queue_attr_store(struct kobject *kobj, struct attribute *attr, |
3926 | const char *page, size_t length) | 3927 | const char *page, size_t length) |
3927 | { | 3928 | { |
3928 | struct queue_sysfs_entry *entry = to_queue(attr); | 3929 | struct queue_sysfs_entry *entry = to_queue(attr); |
3929 | request_queue_t *q = container_of(kobj, struct request_queue, kobj); | 3930 | request_queue_t *q = container_of(kobj, struct request_queue, kobj); |
3930 | 3931 | ||
3931 | ssize_t res; | 3932 | ssize_t res; |
3932 | 3933 | ||
3933 | if (!entry->store) | 3934 | if (!entry->store) |
3934 | return -EIO; | 3935 | return -EIO; |
3935 | mutex_lock(&q->sysfs_lock); | 3936 | mutex_lock(&q->sysfs_lock); |
3936 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | 3937 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { |
3937 | mutex_unlock(&q->sysfs_lock); | 3938 | mutex_unlock(&q->sysfs_lock); |
3938 | return -ENOENT; | 3939 | return -ENOENT; |
3939 | } | 3940 | } |
3940 | res = entry->store(q, page, length); | 3941 | res = entry->store(q, page, length); |
3941 | mutex_unlock(&q->sysfs_lock); | 3942 | mutex_unlock(&q->sysfs_lock); |
3942 | return res; | 3943 | return res; |
3943 | } | 3944 | } |
3944 | 3945 | ||
3945 | static struct sysfs_ops queue_sysfs_ops = { | 3946 | static struct sysfs_ops queue_sysfs_ops = { |
3946 | .show = queue_attr_show, | 3947 | .show = queue_attr_show, |
3947 | .store = queue_attr_store, | 3948 | .store = queue_attr_store, |
3948 | }; | 3949 | }; |
3949 | 3950 | ||
3950 | static struct kobj_type queue_ktype = { | 3951 | static struct kobj_type queue_ktype = { |
3951 | .sysfs_ops = &queue_sysfs_ops, | 3952 | .sysfs_ops = &queue_sysfs_ops, |
3952 | .default_attrs = default_attrs, | 3953 | .default_attrs = default_attrs, |
3953 | .release = blk_release_queue, | 3954 | .release = blk_release_queue, |
3954 | }; | 3955 | }; |
3955 | 3956 | ||
3956 | int blk_register_queue(struct gendisk *disk) | 3957 | int blk_register_queue(struct gendisk *disk) |
3957 | { | 3958 | { |
3958 | int ret; | 3959 | int ret; |
3959 | 3960 | ||
3960 | request_queue_t *q = disk->queue; | 3961 | request_queue_t *q = disk->queue; |
3961 | 3962 | ||
3962 | if (!q || !q->request_fn) | 3963 | if (!q || !q->request_fn) |
3963 | return -ENXIO; | 3964 | return -ENXIO; |
3964 | 3965 | ||
3965 | q->kobj.parent = kobject_get(&disk->kobj); | 3966 | q->kobj.parent = kobject_get(&disk->kobj); |
3966 | 3967 | ||
3967 | ret = kobject_add(&q->kobj); | 3968 | ret = kobject_add(&q->kobj); |
3968 | if (ret < 0) | 3969 | if (ret < 0) |
3969 | return ret; | 3970 | return ret; |
3970 | 3971 | ||
3971 | kobject_uevent(&q->kobj, KOBJ_ADD); | 3972 | kobject_uevent(&q->kobj, KOBJ_ADD); |
3972 | 3973 | ||
3973 | ret = elv_register_queue(q); | 3974 | ret = elv_register_queue(q); |
3974 | if (ret) { | 3975 | if (ret) { |
3975 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | 3976 | kobject_uevent(&q->kobj, KOBJ_REMOVE); |
3976 | kobject_del(&q->kobj); | 3977 | kobject_del(&q->kobj); |
3977 | return ret; | 3978 | return ret; |
3978 | } | 3979 | } |
3979 | 3980 | ||
3980 | return 0; | 3981 | return 0; |
3981 | } | 3982 | } |
3982 | 3983 | ||
3983 | void blk_unregister_queue(struct gendisk *disk) | 3984 | void blk_unregister_queue(struct gendisk *disk) |
3984 | { | 3985 | { |
3985 | request_queue_t *q = disk->queue; | 3986 | request_queue_t *q = disk->queue; |
3986 | 3987 | ||
3987 | if (q && q->request_fn) { | 3988 | if (q && q->request_fn) { |
3988 | elv_unregister_queue(q); | 3989 | elv_unregister_queue(q); |
3989 | 3990 | ||
3990 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | 3991 | kobject_uevent(&q->kobj, KOBJ_REMOVE); |
3991 | kobject_del(&q->kobj); | 3992 | kobject_del(&q->kobj); |
3992 | kobject_put(&disk->kobj); | 3993 | kobject_put(&disk->kobj); |
3993 | } | 3994 | } |
3994 | } | 3995 | } |
include/linux/blkdev.h
1 | #ifndef _LINUX_BLKDEV_H | 1 | #ifndef _LINUX_BLKDEV_H |
2 | #define _LINUX_BLKDEV_H | 2 | #define _LINUX_BLKDEV_H |
3 | 3 | ||
4 | #include <linux/major.h> | 4 | #include <linux/major.h> |
5 | #include <linux/genhd.h> | 5 | #include <linux/genhd.h> |
6 | #include <linux/list.h> | 6 | #include <linux/list.h> |
7 | #include <linux/timer.h> | 7 | #include <linux/timer.h> |
8 | #include <linux/workqueue.h> | 8 | #include <linux/workqueue.h> |
9 | #include <linux/pagemap.h> | 9 | #include <linux/pagemap.h> |
10 | #include <linux/backing-dev.h> | 10 | #include <linux/backing-dev.h> |
11 | #include <linux/wait.h> | 11 | #include <linux/wait.h> |
12 | #include <linux/mempool.h> | 12 | #include <linux/mempool.h> |
13 | #include <linux/bio.h> | 13 | #include <linux/bio.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/stringify.h> | 15 | #include <linux/stringify.h> |
16 | 16 | ||
17 | #include <asm/scatterlist.h> | 17 | #include <asm/scatterlist.h> |
18 | 18 | ||
19 | struct scsi_ioctl_command; | 19 | struct scsi_ioctl_command; |
20 | 20 | ||
21 | struct request_queue; | 21 | struct request_queue; |
22 | typedef struct request_queue request_queue_t; | 22 | typedef struct request_queue request_queue_t; |
23 | struct elevator_queue; | 23 | struct elevator_queue; |
24 | typedef struct elevator_queue elevator_t; | 24 | typedef struct elevator_queue elevator_t; |
25 | struct request_pm_state; | 25 | struct request_pm_state; |
26 | struct blk_trace; | 26 | struct blk_trace; |
27 | 27 | ||
28 | #define BLKDEV_MIN_RQ 4 | 28 | #define BLKDEV_MIN_RQ 4 |
29 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 29 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * This is the per-process anticipatory I/O scheduler state. | 32 | * This is the per-process anticipatory I/O scheduler state. |
33 | */ | 33 | */ |
34 | struct as_io_context { | 34 | struct as_io_context { |
35 | spinlock_t lock; | 35 | spinlock_t lock; |
36 | 36 | ||
37 | void (*dtor)(struct as_io_context *aic); /* destructor */ | 37 | void (*dtor)(struct as_io_context *aic); /* destructor */ |
38 | void (*exit)(struct as_io_context *aic); /* called on task exit */ | 38 | void (*exit)(struct as_io_context *aic); /* called on task exit */ |
39 | 39 | ||
40 | unsigned long state; | 40 | unsigned long state; |
41 | atomic_t nr_queued; /* queued reads & sync writes */ | 41 | atomic_t nr_queued; /* queued reads & sync writes */ |
42 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ | 42 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ |
43 | 43 | ||
44 | /* IO History tracking */ | 44 | /* IO History tracking */ |
45 | /* Thinktime */ | 45 | /* Thinktime */ |
46 | unsigned long last_end_request; | 46 | unsigned long last_end_request; |
47 | unsigned long ttime_total; | 47 | unsigned long ttime_total; |
48 | unsigned long ttime_samples; | 48 | unsigned long ttime_samples; |
49 | unsigned long ttime_mean; | 49 | unsigned long ttime_mean; |
50 | /* Layout pattern */ | 50 | /* Layout pattern */ |
51 | unsigned int seek_samples; | 51 | unsigned int seek_samples; |
52 | sector_t last_request_pos; | 52 | sector_t last_request_pos; |
53 | u64 seek_total; | 53 | u64 seek_total; |
54 | sector_t seek_mean; | 54 | sector_t seek_mean; |
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct cfq_queue; | 57 | struct cfq_queue; |
58 | struct cfq_io_context { | 58 | struct cfq_io_context { |
59 | struct rb_node rb_node; | 59 | struct rb_node rb_node; |
60 | void *key; | 60 | void *key; |
61 | 61 | ||
62 | struct cfq_queue *cfqq[2]; | 62 | struct cfq_queue *cfqq[2]; |
63 | 63 | ||
64 | struct io_context *ioc; | 64 | struct io_context *ioc; |
65 | 65 | ||
66 | unsigned long last_end_request; | 66 | unsigned long last_end_request; |
67 | sector_t last_request_pos; | 67 | sector_t last_request_pos; |
68 | unsigned long last_queue; | 68 | unsigned long last_queue; |
69 | 69 | ||
70 | unsigned long ttime_total; | 70 | unsigned long ttime_total; |
71 | unsigned long ttime_samples; | 71 | unsigned long ttime_samples; |
72 | unsigned long ttime_mean; | 72 | unsigned long ttime_mean; |
73 | 73 | ||
74 | unsigned int seek_samples; | 74 | unsigned int seek_samples; |
75 | u64 seek_total; | 75 | u64 seek_total; |
76 | sector_t seek_mean; | 76 | sector_t seek_mean; |
77 | 77 | ||
78 | struct list_head queue_list; | 78 | struct list_head queue_list; |
79 | 79 | ||
80 | void (*dtor)(struct io_context *); /* destructor */ | 80 | void (*dtor)(struct io_context *); /* destructor */ |
81 | void (*exit)(struct io_context *); /* called on task exit */ | 81 | void (*exit)(struct io_context *); /* called on task exit */ |
82 | }; | 82 | }; |
83 | 83 | ||
84 | /* | 84 | /* |
85 | * This is the per-process I/O subsystem state. It is refcounted and | 85 | * This is the per-process I/O subsystem state. It is refcounted and |
86 | * kmalloc'ed. Currently all fields are modified in process io context | 86 | * kmalloc'ed. Currently all fields are modified in process io context |
87 | * (apart from the atomic refcount), so require no locking. | 87 | * (apart from the atomic refcount), so require no locking. |
88 | */ | 88 | */ |
89 | struct io_context { | 89 | struct io_context { |
90 | atomic_t refcount; | 90 | atomic_t refcount; |
91 | struct task_struct *task; | 91 | struct task_struct *task; |
92 | 92 | ||
93 | int (*set_ioprio)(struct io_context *, unsigned int); | 93 | int (*set_ioprio)(struct io_context *, unsigned int); |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * For request batching | 96 | * For request batching |
97 | */ | 97 | */ |
98 | unsigned long last_waited; /* Time last woken after wait for request */ | 98 | unsigned long last_waited; /* Time last woken after wait for request */ |
99 | int nr_batch_requests; /* Number of requests left in the batch */ | 99 | int nr_batch_requests; /* Number of requests left in the batch */ |
100 | 100 | ||
101 | struct as_io_context *aic; | 101 | struct as_io_context *aic; |
102 | struct rb_root cic_root; | 102 | struct rb_root cic_root; |
103 | }; | 103 | }; |
104 | 104 | ||
105 | void put_io_context(struct io_context *ioc); | 105 | void put_io_context(struct io_context *ioc); |
106 | void exit_io_context(void); | 106 | void exit_io_context(void); |
107 | struct io_context *current_io_context(gfp_t gfp_flags); | 107 | struct io_context *current_io_context(gfp_t gfp_flags); |
108 | struct io_context *get_io_context(gfp_t gfp_flags); | 108 | struct io_context *get_io_context(gfp_t gfp_flags); |
109 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); | 109 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); |
110 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); | 110 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); |
111 | 111 | ||
112 | struct request; | 112 | struct request; |
113 | typedef void (rq_end_io_fn)(struct request *, int); | 113 | typedef void (rq_end_io_fn)(struct request *, int); |
114 | 114 | ||
115 | struct request_list { | 115 | struct request_list { |
116 | int count[2]; | 116 | int count[2]; |
117 | int starved[2]; | 117 | int starved[2]; |
118 | int elvpriv; | 118 | int elvpriv; |
119 | mempool_t *rq_pool; | 119 | mempool_t *rq_pool; |
120 | wait_queue_head_t wait[2]; | 120 | wait_queue_head_t wait[2]; |
121 | }; | 121 | }; |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * request command types | 124 | * request command types |
125 | */ | 125 | */ |
126 | enum rq_cmd_type_bits { | 126 | enum rq_cmd_type_bits { |
127 | REQ_TYPE_FS = 1, /* fs request */ | 127 | REQ_TYPE_FS = 1, /* fs request */ |
128 | REQ_TYPE_BLOCK_PC, /* scsi command */ | 128 | REQ_TYPE_BLOCK_PC, /* scsi command */ |
129 | REQ_TYPE_SENSE, /* sense request */ | 129 | REQ_TYPE_SENSE, /* sense request */ |
130 | REQ_TYPE_PM_SUSPEND, /* suspend request */ | 130 | REQ_TYPE_PM_SUSPEND, /* suspend request */ |
131 | REQ_TYPE_PM_RESUME, /* resume request */ | 131 | REQ_TYPE_PM_RESUME, /* resume request */ |
132 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ | 132 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ |
133 | REQ_TYPE_FLUSH, /* flush request */ | 133 | REQ_TYPE_FLUSH, /* flush request */ |
134 | REQ_TYPE_SPECIAL, /* driver defined type */ | 134 | REQ_TYPE_SPECIAL, /* driver defined type */ |
135 | REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ | 135 | REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ |
136 | /* | 136 | /* |
137 | * for ATA/ATAPI devices. this really doesn't belong here, ide should | 137 | * for ATA/ATAPI devices. this really doesn't belong here, ide should |
138 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver | 138 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver |
139 | * private REQ_LB opcodes to differentiate what type of request this is | 139 | * private REQ_LB opcodes to differentiate what type of request this is |
140 | */ | 140 | */ |
141 | REQ_TYPE_ATA_CMD, | 141 | REQ_TYPE_ATA_CMD, |
142 | REQ_TYPE_ATA_TASK, | 142 | REQ_TYPE_ATA_TASK, |
143 | REQ_TYPE_ATA_TASKFILE, | 143 | REQ_TYPE_ATA_TASKFILE, |
144 | }; | 144 | }; |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being | 147 | * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being |
148 | * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a | 148 | * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a |
149 | * SCSI cdb. | 149 | * SCSI cdb. |
150 | * | 150 | * |
151 | * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, | 151 | * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, |
152 | * typically to differentiate REQ_TYPE_SPECIAL requests. | 152 | * typically to differentiate REQ_TYPE_SPECIAL requests. |
153 | * | 153 | * |
154 | */ | 154 | */ |
155 | enum { | 155 | enum { |
156 | /* | 156 | /* |
157 | * just examples for now | 157 | * just examples for now |
158 | */ | 158 | */ |
159 | REQ_LB_OP_EJECT = 0x40, /* eject request */ | 159 | REQ_LB_OP_EJECT = 0x40, /* eject request */ |
160 | REQ_LB_OP_FLUSH = 0x41, /* flush device */ | 160 | REQ_LB_OP_FLUSH = 0x41, /* flush device */ |
161 | }; | 161 | }; |
162 | 162 | ||
163 | /* | 163 | /* |
164 | * request type modified bits. first three bits match BIO_RW* bits, important | 164 | * request type modified bits. first three bits match BIO_RW* bits, important |
165 | */ | 165 | */ |
166 | enum rq_flag_bits { | 166 | enum rq_flag_bits { |
167 | __REQ_RW, /* not set, read. set, write */ | 167 | __REQ_RW, /* not set, read. set, write */ |
168 | __REQ_FAILFAST, /* no low level driver retries */ | 168 | __REQ_FAILFAST, /* no low level driver retries */ |
169 | __REQ_SORTED, /* elevator knows about this request */ | 169 | __REQ_SORTED, /* elevator knows about this request */ |
170 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ | 170 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ |
171 | __REQ_HARDBARRIER, /* may not be passed by drive either */ | 171 | __REQ_HARDBARRIER, /* may not be passed by drive either */ |
172 | __REQ_FUA, /* forced unit access */ | 172 | __REQ_FUA, /* forced unit access */ |
173 | __REQ_NOMERGE, /* don't touch this for merging */ | 173 | __REQ_NOMERGE, /* don't touch this for merging */ |
174 | __REQ_STARTED, /* drive already may have started this one */ | 174 | __REQ_STARTED, /* drive already may have started this one */ |
175 | __REQ_DONTPREP, /* don't call prep for this one */ | 175 | __REQ_DONTPREP, /* don't call prep for this one */ |
176 | __REQ_QUEUED, /* uses queueing */ | 176 | __REQ_QUEUED, /* uses queueing */ |
177 | __REQ_ELVPRIV, /* elevator private data attached */ | 177 | __REQ_ELVPRIV, /* elevator private data attached */ |
178 | __REQ_FAILED, /* set if the request failed */ | 178 | __REQ_FAILED, /* set if the request failed */ |
179 | __REQ_QUIET, /* don't worry about errors */ | 179 | __REQ_QUIET, /* don't worry about errors */ |
180 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ | 180 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ |
181 | __REQ_ORDERED_COLOR, /* is before or after barrier */ | 181 | __REQ_ORDERED_COLOR, /* is before or after barrier */ |
182 | __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ | 182 | __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ |
183 | __REQ_NR_BITS, /* stops here */ | 183 | __REQ_NR_BITS, /* stops here */ |
184 | }; | 184 | }; |
185 | 185 | ||
186 | #define REQ_RW (1 << __REQ_RW) | 186 | #define REQ_RW (1 << __REQ_RW) |
187 | #define REQ_FAILFAST (1 << __REQ_FAILFAST) | 187 | #define REQ_FAILFAST (1 << __REQ_FAILFAST) |
188 | #define REQ_SORTED (1 << __REQ_SORTED) | 188 | #define REQ_SORTED (1 << __REQ_SORTED) |
189 | #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) | 189 | #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) |
190 | #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) | 190 | #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) |
191 | #define REQ_FUA (1 << __REQ_FUA) | 191 | #define REQ_FUA (1 << __REQ_FUA) |
192 | #define REQ_NOMERGE (1 << __REQ_NOMERGE) | 192 | #define REQ_NOMERGE (1 << __REQ_NOMERGE) |
193 | #define REQ_STARTED (1 << __REQ_STARTED) | 193 | #define REQ_STARTED (1 << __REQ_STARTED) |
194 | #define REQ_DONTPREP (1 << __REQ_DONTPREP) | 194 | #define REQ_DONTPREP (1 << __REQ_DONTPREP) |
195 | #define REQ_QUEUED (1 << __REQ_QUEUED) | 195 | #define REQ_QUEUED (1 << __REQ_QUEUED) |
196 | #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) | 196 | #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) |
197 | #define REQ_FAILED (1 << __REQ_FAILED) | 197 | #define REQ_FAILED (1 << __REQ_FAILED) |
198 | #define REQ_QUIET (1 << __REQ_QUIET) | 198 | #define REQ_QUIET (1 << __REQ_QUIET) |
199 | #define REQ_PREEMPT (1 << __REQ_PREEMPT) | 199 | #define REQ_PREEMPT (1 << __REQ_PREEMPT) |
200 | #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) | 200 | #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) |
201 | #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) | 201 | #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) |
202 | 202 | ||
203 | #define BLK_MAX_CDB 16 | 203 | #define BLK_MAX_CDB 16 |
204 | 204 | ||
205 | /* | 205 | /* |
206 | * try to put the fields that are referenced together in the same cacheline | 206 | * try to put the fields that are referenced together in the same cacheline |
207 | */ | 207 | */ |
208 | struct request { | 208 | struct request { |
209 | struct list_head queuelist; | 209 | struct list_head queuelist; |
210 | struct list_head donelist; | 210 | struct list_head donelist; |
211 | 211 | ||
212 | unsigned int cmd_flags; | 212 | unsigned int cmd_flags; |
213 | enum rq_cmd_type_bits cmd_type; | 213 | enum rq_cmd_type_bits cmd_type; |
214 | 214 | ||
215 | /* Maintain bio traversal state for part by part I/O submission. | 215 | /* Maintain bio traversal state for part by part I/O submission. |
216 | * hard_* are block layer internals, no driver should touch them! | 216 | * hard_* are block layer internals, no driver should touch them! |
217 | */ | 217 | */ |
218 | 218 | ||
219 | sector_t sector; /* next sector to submit */ | 219 | sector_t sector; /* next sector to submit */ |
220 | unsigned long nr_sectors; /* no. of sectors left to submit */ | 220 | unsigned long nr_sectors; /* no. of sectors left to submit */ |
221 | /* no. of sectors left to submit in the current segment */ | 221 | /* no. of sectors left to submit in the current segment */ |
222 | unsigned int current_nr_sectors; | 222 | unsigned int current_nr_sectors; |
223 | 223 | ||
224 | sector_t hard_sector; /* next sector to complete */ | 224 | sector_t hard_sector; /* next sector to complete */ |
225 | unsigned long hard_nr_sectors; /* no. of sectors left to complete */ | 225 | unsigned long hard_nr_sectors; /* no. of sectors left to complete */ |
226 | /* no. of sectors left to complete in the current segment */ | 226 | /* no. of sectors left to complete in the current segment */ |
227 | unsigned int hard_cur_sectors; | 227 | unsigned int hard_cur_sectors; |
228 | 228 | ||
229 | struct bio *bio; | 229 | struct bio *bio; |
230 | struct bio *biotail; | 230 | struct bio *biotail; |
231 | 231 | ||
232 | struct hlist_node hash; /* merge hash */ | 232 | struct hlist_node hash; /* merge hash */ |
233 | struct rb_node rb_node; /* sort/lookup */ | ||
233 | 234 | ||
234 | void *elevator_private; | 235 | void *elevator_private; |
235 | void *completion_data; | 236 | void *completion_data; |
236 | 237 | ||
237 | int rq_status; /* should split this into a few status bits */ | 238 | int rq_status; /* should split this into a few status bits */ |
238 | int errors; | 239 | int errors; |
239 | struct gendisk *rq_disk; | 240 | struct gendisk *rq_disk; |
240 | unsigned long start_time; | 241 | unsigned long start_time; |
241 | 242 | ||
242 | /* Number of scatter-gather DMA addr+len pairs after | 243 | /* Number of scatter-gather DMA addr+len pairs after |
243 | * physical address coalescing is performed. | 244 | * physical address coalescing is performed. |
244 | */ | 245 | */ |
245 | unsigned short nr_phys_segments; | 246 | unsigned short nr_phys_segments; |
246 | 247 | ||
247 | /* Number of scatter-gather addr+len pairs after | 248 | /* Number of scatter-gather addr+len pairs after |
248 | * physical and DMA remapping hardware coalescing is performed. | 249 | * physical and DMA remapping hardware coalescing is performed. |
249 | * This is the number of scatter-gather entries the driver | 250 | * This is the number of scatter-gather entries the driver |
250 | * will actually have to deal with after DMA mapping is done. | 251 | * will actually have to deal with after DMA mapping is done. |
251 | */ | 252 | */ |
252 | unsigned short nr_hw_segments; | 253 | unsigned short nr_hw_segments; |
253 | 254 | ||
254 | unsigned short ioprio; | 255 | unsigned short ioprio; |
255 | 256 | ||
256 | int tag; | 257 | int tag; |
257 | 258 | ||
258 | int ref_count; | 259 | int ref_count; |
259 | request_queue_t *q; | 260 | request_queue_t *q; |
260 | struct request_list *rl; | 261 | struct request_list *rl; |
261 | 262 | ||
262 | struct completion *waiting; | 263 | struct completion *waiting; |
263 | void *special; | 264 | void *special; |
264 | char *buffer; | 265 | char *buffer; |
265 | 266 | ||
266 | /* | 267 | /* |
267 | * when request is used as a packet command carrier | 268 | * when request is used as a packet command carrier |
268 | */ | 269 | */ |
269 | unsigned int cmd_len; | 270 | unsigned int cmd_len; |
270 | unsigned char cmd[BLK_MAX_CDB]; | 271 | unsigned char cmd[BLK_MAX_CDB]; |
271 | 272 | ||
272 | unsigned int data_len; | 273 | unsigned int data_len; |
273 | unsigned int sense_len; | 274 | unsigned int sense_len; |
274 | void *data; | 275 | void *data; |
275 | void *sense; | 276 | void *sense; |
276 | 277 | ||
277 | unsigned int timeout; | 278 | unsigned int timeout; |
278 | int retries; | 279 | int retries; |
279 | 280 | ||
280 | /* | 281 | /* |
281 | * completion callback. end_io_data should be folded in with waiting | 282 | * completion callback. end_io_data should be folded in with waiting |
282 | */ | 283 | */ |
283 | rq_end_io_fn *end_io; | 284 | rq_end_io_fn *end_io; |
284 | void *end_io_data; | 285 | void *end_io_data; |
285 | }; | 286 | }; |
286 | 287 | ||
287 | /* | 288 | /* |
288 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME | 289 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME |
289 | * requests. Some step values could eventually be made generic. | 290 | * requests. Some step values could eventually be made generic. |
290 | */ | 291 | */ |
291 | struct request_pm_state | 292 | struct request_pm_state |
292 | { | 293 | { |
293 | /* PM state machine step value, currently driver specific */ | 294 | /* PM state machine step value, currently driver specific */ |
294 | int pm_step; | 295 | int pm_step; |
295 | /* requested PM state value (S1, S2, S3, S4, ...) */ | 296 | /* requested PM state value (S1, S2, S3, S4, ...) */ |
296 | u32 pm_state; | 297 | u32 pm_state; |
297 | void* data; /* for driver use */ | 298 | void* data; /* for driver use */ |
298 | }; | 299 | }; |
299 | 300 | ||
300 | #include <linux/elevator.h> | 301 | #include <linux/elevator.h> |
301 | 302 | ||
302 | typedef int (merge_request_fn) (request_queue_t *, struct request *, | 303 | typedef int (merge_request_fn) (request_queue_t *, struct request *, |
303 | struct bio *); | 304 | struct bio *); |
304 | typedef int (merge_requests_fn) (request_queue_t *, struct request *, | 305 | typedef int (merge_requests_fn) (request_queue_t *, struct request *, |
305 | struct request *); | 306 | struct request *); |
306 | typedef void (request_fn_proc) (request_queue_t *q); | 307 | typedef void (request_fn_proc) (request_queue_t *q); |
307 | typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); | 308 | typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); |
308 | typedef int (prep_rq_fn) (request_queue_t *, struct request *); | 309 | typedef int (prep_rq_fn) (request_queue_t *, struct request *); |
309 | typedef void (unplug_fn) (request_queue_t *); | 310 | typedef void (unplug_fn) (request_queue_t *); |
310 | 311 | ||
311 | struct bio_vec; | 312 | struct bio_vec; |
312 | typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); | 313 | typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); |
313 | typedef void (activity_fn) (void *data, int rw); | 314 | typedef void (activity_fn) (void *data, int rw); |
314 | typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); | 315 | typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); |
315 | typedef void (prepare_flush_fn) (request_queue_t *, struct request *); | 316 | typedef void (prepare_flush_fn) (request_queue_t *, struct request *); |
316 | typedef void (softirq_done_fn)(struct request *); | 317 | typedef void (softirq_done_fn)(struct request *); |
317 | 318 | ||
318 | enum blk_queue_state { | 319 | enum blk_queue_state { |
319 | Queue_down, | 320 | Queue_down, |
320 | Queue_up, | 321 | Queue_up, |
321 | }; | 322 | }; |
322 | 323 | ||
323 | struct blk_queue_tag { | 324 | struct blk_queue_tag { |
324 | struct request **tag_index; /* map of busy tags */ | 325 | struct request **tag_index; /* map of busy tags */ |
325 | unsigned long *tag_map; /* bit map of free/busy tags */ | 326 | unsigned long *tag_map; /* bit map of free/busy tags */ |
326 | struct list_head busy_list; /* fifo list of busy tags */ | 327 | struct list_head busy_list; /* fifo list of busy tags */ |
327 | int busy; /* current depth */ | 328 | int busy; /* current depth */ |
328 | int max_depth; /* what we will send to device */ | 329 | int max_depth; /* what we will send to device */ |
329 | int real_max_depth; /* what the array can hold */ | 330 | int real_max_depth; /* what the array can hold */ |
330 | atomic_t refcnt; /* map can be shared */ | 331 | atomic_t refcnt; /* map can be shared */ |
331 | }; | 332 | }; |
332 | 333 | ||
333 | struct request_queue | 334 | struct request_queue |
334 | { | 335 | { |
335 | /* | 336 | /* |
336 | * Together with queue_head for cacheline sharing | 337 | * Together with queue_head for cacheline sharing |
337 | */ | 338 | */ |
338 | struct list_head queue_head; | 339 | struct list_head queue_head; |
339 | struct request *last_merge; | 340 | struct request *last_merge; |
340 | elevator_t *elevator; | 341 | elevator_t *elevator; |
341 | 342 | ||
342 | /* | 343 | /* |
343 | * the queue request freelist, one for reads and one for writes | 344 | * the queue request freelist, one for reads and one for writes |
344 | */ | 345 | */ |
345 | struct request_list rq; | 346 | struct request_list rq; |
346 | 347 | ||
347 | request_fn_proc *request_fn; | 348 | request_fn_proc *request_fn; |
348 | merge_request_fn *back_merge_fn; | 349 | merge_request_fn *back_merge_fn; |
349 | merge_request_fn *front_merge_fn; | 350 | merge_request_fn *front_merge_fn; |
350 | merge_requests_fn *merge_requests_fn; | 351 | merge_requests_fn *merge_requests_fn; |
351 | make_request_fn *make_request_fn; | 352 | make_request_fn *make_request_fn; |
352 | prep_rq_fn *prep_rq_fn; | 353 | prep_rq_fn *prep_rq_fn; |
353 | unplug_fn *unplug_fn; | 354 | unplug_fn *unplug_fn; |
354 | merge_bvec_fn *merge_bvec_fn; | 355 | merge_bvec_fn *merge_bvec_fn; |
355 | activity_fn *activity_fn; | 356 | activity_fn *activity_fn; |
356 | issue_flush_fn *issue_flush_fn; | 357 | issue_flush_fn *issue_flush_fn; |
357 | prepare_flush_fn *prepare_flush_fn; | 358 | prepare_flush_fn *prepare_flush_fn; |
358 | softirq_done_fn *softirq_done_fn; | 359 | softirq_done_fn *softirq_done_fn; |
359 | 360 | ||
360 | /* | 361 | /* |
361 | * Dispatch queue sorting | 362 | * Dispatch queue sorting |
362 | */ | 363 | */ |
363 | sector_t end_sector; | 364 | sector_t end_sector; |
364 | struct request *boundary_rq; | 365 | struct request *boundary_rq; |
365 | 366 | ||
366 | /* | 367 | /* |
367 | * Auto-unplugging state | 368 | * Auto-unplugging state |
368 | */ | 369 | */ |
369 | struct timer_list unplug_timer; | 370 | struct timer_list unplug_timer; |
370 | int unplug_thresh; /* After this many requests */ | 371 | int unplug_thresh; /* After this many requests */ |
371 | unsigned long unplug_delay; /* After this many jiffies */ | 372 | unsigned long unplug_delay; /* After this many jiffies */ |
372 | struct work_struct unplug_work; | 373 | struct work_struct unplug_work; |
373 | 374 | ||
374 | struct backing_dev_info backing_dev_info; | 375 | struct backing_dev_info backing_dev_info; |
375 | 376 | ||
376 | /* | 377 | /* |
377 | * The queue owner gets to use this for whatever they like. | 378 | * The queue owner gets to use this for whatever they like. |
378 | * ll_rw_blk doesn't touch it. | 379 | * ll_rw_blk doesn't touch it. |
379 | */ | 380 | */ |
380 | void *queuedata; | 381 | void *queuedata; |
381 | 382 | ||
382 | void *activity_data; | 383 | void *activity_data; |
383 | 384 | ||
384 | /* | 385 | /* |
385 | * queue needs bounce pages for pages above this limit | 386 | * queue needs bounce pages for pages above this limit |
386 | */ | 387 | */ |
387 | unsigned long bounce_pfn; | 388 | unsigned long bounce_pfn; |
388 | gfp_t bounce_gfp; | 389 | gfp_t bounce_gfp; |
389 | 390 | ||
390 | /* | 391 | /* |
391 | * various queue flags, see QUEUE_* below | 392 | * various queue flags, see QUEUE_* below |
392 | */ | 393 | */ |
393 | unsigned long queue_flags; | 394 | unsigned long queue_flags; |
394 | 395 | ||
395 | /* | 396 | /* |
396 | * protects queue structures from reentrancy. ->__queue_lock should | 397 | * protects queue structures from reentrancy. ->__queue_lock should |
397 | * _never_ be used directly, it is queue private. always use | 398 | * _never_ be used directly, it is queue private. always use |
398 | * ->queue_lock. | 399 | * ->queue_lock. |
399 | */ | 400 | */ |
400 | spinlock_t __queue_lock; | 401 | spinlock_t __queue_lock; |
401 | spinlock_t *queue_lock; | 402 | spinlock_t *queue_lock; |
402 | 403 | ||
403 | /* | 404 | /* |
404 | * queue kobject | 405 | * queue kobject |
405 | */ | 406 | */ |
406 | struct kobject kobj; | 407 | struct kobject kobj; |
407 | 408 | ||
408 | /* | 409 | /* |
409 | * queue settings | 410 | * queue settings |
410 | */ | 411 | */ |
411 | unsigned long nr_requests; /* Max # of requests */ | 412 | unsigned long nr_requests; /* Max # of requests */ |
412 | unsigned int nr_congestion_on; | 413 | unsigned int nr_congestion_on; |
413 | unsigned int nr_congestion_off; | 414 | unsigned int nr_congestion_off; |
414 | unsigned int nr_batching; | 415 | unsigned int nr_batching; |
415 | 416 | ||
416 | unsigned int max_sectors; | 417 | unsigned int max_sectors; |
417 | unsigned int max_hw_sectors; | 418 | unsigned int max_hw_sectors; |
418 | unsigned short max_phys_segments; | 419 | unsigned short max_phys_segments; |
419 | unsigned short max_hw_segments; | 420 | unsigned short max_hw_segments; |
420 | unsigned short hardsect_size; | 421 | unsigned short hardsect_size; |
421 | unsigned int max_segment_size; | 422 | unsigned int max_segment_size; |
422 | 423 | ||
423 | unsigned long seg_boundary_mask; | 424 | unsigned long seg_boundary_mask; |
424 | unsigned int dma_alignment; | 425 | unsigned int dma_alignment; |
425 | 426 | ||
426 | struct blk_queue_tag *queue_tags; | 427 | struct blk_queue_tag *queue_tags; |
427 | 428 | ||
428 | unsigned int nr_sorted; | 429 | unsigned int nr_sorted; |
429 | unsigned int in_flight; | 430 | unsigned int in_flight; |
430 | 431 | ||
431 | /* | 432 | /* |
432 | * sg stuff | 433 | * sg stuff |
433 | */ | 434 | */ |
434 | unsigned int sg_timeout; | 435 | unsigned int sg_timeout; |
435 | unsigned int sg_reserved_size; | 436 | unsigned int sg_reserved_size; |
436 | int node; | 437 | int node; |
437 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 438 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
438 | struct blk_trace *blk_trace; | 439 | struct blk_trace *blk_trace; |
439 | #endif | 440 | #endif |
440 | /* | 441 | /* |
441 | * reserved for flush operations | 442 | * reserved for flush operations |
442 | */ | 443 | */ |
443 | unsigned int ordered, next_ordered, ordseq; | 444 | unsigned int ordered, next_ordered, ordseq; |
444 | int orderr, ordcolor; | 445 | int orderr, ordcolor; |
445 | struct request pre_flush_rq, bar_rq, post_flush_rq; | 446 | struct request pre_flush_rq, bar_rq, post_flush_rq; |
446 | struct request *orig_bar_rq; | 447 | struct request *orig_bar_rq; |
447 | unsigned int bi_size; | 448 | unsigned int bi_size; |
448 | 449 | ||
449 | struct mutex sysfs_lock; | 450 | struct mutex sysfs_lock; |
450 | }; | 451 | }; |
451 | 452 | ||
452 | #define RQ_INACTIVE (-1) | 453 | #define RQ_INACTIVE (-1) |
453 | #define RQ_ACTIVE 1 | 454 | #define RQ_ACTIVE 1 |
454 | 455 | ||
455 | #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ | 456 | #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ |
456 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 457 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |
457 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ | 458 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ |
458 | #define QUEUE_FLAG_READFULL 3 /* write queue has been filled */ | 459 | #define QUEUE_FLAG_READFULL 3 /* write queue has been filled */ |
459 | #define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */ | 460 | #define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */ |
460 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ | 461 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ |
461 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ | 462 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ |
462 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ | 463 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ |
463 | #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ | 464 | #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ |
464 | 465 | ||
465 | enum { | 466 | enum { |
466 | /* | 467 | /* |
467 | * Hardbarrier is supported with one of the following methods. | 468 | * Hardbarrier is supported with one of the following methods. |
468 | * | 469 | * |
469 | * NONE : hardbarrier unsupported | 470 | * NONE : hardbarrier unsupported |
470 | * DRAIN : ordering by draining is enough | 471 | * DRAIN : ordering by draining is enough |
471 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes | 472 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes |
472 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write | 473 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write |
473 | * TAG : ordering by tag is enough | 474 | * TAG : ordering by tag is enough |
474 | * TAG_FLUSH : ordering by tag w/ pre and post flushes | 475 | * TAG_FLUSH : ordering by tag w/ pre and post flushes |
475 | * TAG_FUA : ordering by tag w/ pre flush and FUA write | 476 | * TAG_FUA : ordering by tag w/ pre flush and FUA write |
476 | */ | 477 | */ |
477 | QUEUE_ORDERED_NONE = 0x00, | 478 | QUEUE_ORDERED_NONE = 0x00, |
478 | QUEUE_ORDERED_DRAIN = 0x01, | 479 | QUEUE_ORDERED_DRAIN = 0x01, |
479 | QUEUE_ORDERED_TAG = 0x02, | 480 | QUEUE_ORDERED_TAG = 0x02, |
480 | 481 | ||
481 | QUEUE_ORDERED_PREFLUSH = 0x10, | 482 | QUEUE_ORDERED_PREFLUSH = 0x10, |
482 | QUEUE_ORDERED_POSTFLUSH = 0x20, | 483 | QUEUE_ORDERED_POSTFLUSH = 0x20, |
483 | QUEUE_ORDERED_FUA = 0x40, | 484 | QUEUE_ORDERED_FUA = 0x40, |
484 | 485 | ||
485 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | | 486 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | |
486 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, | 487 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, |
487 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | | 488 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | |
488 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, | 489 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, |
489 | QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | | 490 | QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | |
490 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, | 491 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, |
491 | QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | | 492 | QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | |
492 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, | 493 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, |
493 | 494 | ||
494 | /* | 495 | /* |
495 | * Ordered operation sequence | 496 | * Ordered operation sequence |
496 | */ | 497 | */ |
497 | QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ | 498 | QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ |
498 | QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ | 499 | QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ |
499 | QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ | 500 | QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ |
500 | QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ | 501 | QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ |
501 | QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ | 502 | QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ |
502 | QUEUE_ORDSEQ_DONE = 0x20, | 503 | QUEUE_ORDSEQ_DONE = 0x20, |
503 | }; | 504 | }; |
504 | 505 | ||
505 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) | 506 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) |
506 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) | 507 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) |
507 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) | 508 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) |
508 | #define blk_queue_flushing(q) ((q)->ordseq) | 509 | #define blk_queue_flushing(q) ((q)->ordseq) |
509 | 510 | ||
510 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) | 511 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) |
511 | #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) | 512 | #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) |
512 | #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) | 513 | #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) |
513 | #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) | 514 | #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) |
514 | 515 | ||
515 | #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) | 516 | #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) |
516 | #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) | 517 | #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) |
517 | 518 | ||
518 | #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) | 519 | #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) |
519 | 520 | ||
520 | #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) | 521 | #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) |
521 | #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) | 522 | #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) |
522 | #define blk_pm_request(rq) \ | 523 | #define blk_pm_request(rq) \ |
523 | (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) | 524 | (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) |
524 | 525 | ||
525 | #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) | 526 | #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) |
526 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) | 527 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) |
527 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) | 528 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) |
528 | 529 | ||
529 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) | 530 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) |
530 | 531 | ||
531 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) | 532 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) |
532 | 533 | ||
533 | static inline int blk_queue_full(struct request_queue *q, int rw) | 534 | static inline int blk_queue_full(struct request_queue *q, int rw) |
534 | { | 535 | { |
535 | if (rw == READ) | 536 | if (rw == READ) |
536 | return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); | 537 | return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); |
537 | return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); | 538 | return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); |
538 | } | 539 | } |
539 | 540 | ||
540 | static inline void blk_set_queue_full(struct request_queue *q, int rw) | 541 | static inline void blk_set_queue_full(struct request_queue *q, int rw) |
541 | { | 542 | { |
542 | if (rw == READ) | 543 | if (rw == READ) |
543 | set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); | 544 | set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); |
544 | else | 545 | else |
545 | set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); | 546 | set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); |
546 | } | 547 | } |
547 | 548 | ||
548 | static inline void blk_clear_queue_full(struct request_queue *q, int rw) | 549 | static inline void blk_clear_queue_full(struct request_queue *q, int rw) |
549 | { | 550 | { |
550 | if (rw == READ) | 551 | if (rw == READ) |
551 | clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); | 552 | clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); |
552 | else | 553 | else |
553 | clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); | 554 | clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); |
554 | } | 555 | } |
555 | 556 | ||
556 | 557 | ||
557 | /* | 558 | /* |
558 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may | 559 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may |
559 | * it already be started by driver. | 560 | * it already be started by driver. |
560 | */ | 561 | */ |
561 | #define RQ_NOMERGE_FLAGS \ | 562 | #define RQ_NOMERGE_FLAGS \ |
562 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) | 563 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) |
563 | #define rq_mergeable(rq) \ | 564 | #define rq_mergeable(rq) \ |
564 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) | 565 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) |
565 | 566 | ||
566 | /* | 567 | /* |
567 | * noop, requests are automagically marked as active/inactive by I/O | 568 | * noop, requests are automagically marked as active/inactive by I/O |
568 | * scheduler -- see elv_next_request | 569 | * scheduler -- see elv_next_request |
569 | */ | 570 | */ |
570 | #define blk_queue_headactive(q, head_active) | 571 | #define blk_queue_headactive(q, head_active) |
571 | 572 | ||
572 | /* | 573 | /* |
573 | * q->prep_rq_fn return values | 574 | * q->prep_rq_fn return values |
574 | */ | 575 | */ |
575 | #define BLKPREP_OK 0 /* serve it */ | 576 | #define BLKPREP_OK 0 /* serve it */ |
576 | #define BLKPREP_KILL 1 /* fatal error, kill */ | 577 | #define BLKPREP_KILL 1 /* fatal error, kill */ |
577 | #define BLKPREP_DEFER 2 /* leave on queue */ | 578 | #define BLKPREP_DEFER 2 /* leave on queue */ |
578 | 579 | ||
579 | extern unsigned long blk_max_low_pfn, blk_max_pfn; | 580 | extern unsigned long blk_max_low_pfn, blk_max_pfn; |
580 | 581 | ||
581 | /* | 582 | /* |
582 | * standard bounce addresses: | 583 | * standard bounce addresses: |
583 | * | 584 | * |
584 | * BLK_BOUNCE_HIGH : bounce all highmem pages | 585 | * BLK_BOUNCE_HIGH : bounce all highmem pages |
585 | * BLK_BOUNCE_ANY : don't bounce anything | 586 | * BLK_BOUNCE_ANY : don't bounce anything |
586 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary | 587 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary |
587 | */ | 588 | */ |
588 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) | 589 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) |
589 | #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) | 590 | #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) |
590 | #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) | 591 | #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) |
591 | 592 | ||
592 | #ifdef CONFIG_MMU | 593 | #ifdef CONFIG_MMU |
593 | extern int init_emergency_isa_pool(void); | 594 | extern int init_emergency_isa_pool(void); |
594 | extern void blk_queue_bounce(request_queue_t *q, struct bio **bio); | 595 | extern void blk_queue_bounce(request_queue_t *q, struct bio **bio); |
595 | #else | 596 | #else |
596 | static inline int init_emergency_isa_pool(void) | 597 | static inline int init_emergency_isa_pool(void) |
597 | { | 598 | { |
598 | return 0; | 599 | return 0; |
599 | } | 600 | } |
600 | static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio) | 601 | static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio) |
601 | { | 602 | { |
602 | } | 603 | } |
603 | #endif /* CONFIG_MMU */ | 604 | #endif /* CONFIG_MMU */ |
604 | 605 | ||
605 | #define rq_for_each_bio(_bio, rq) \ | 606 | #define rq_for_each_bio(_bio, rq) \ |
606 | if ((rq->bio)) \ | 607 | if ((rq->bio)) \ |
607 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) | 608 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) |
608 | 609 | ||
609 | struct sec_size { | 610 | struct sec_size { |
610 | unsigned block_size; | 611 | unsigned block_size; |
611 | unsigned block_size_bits; | 612 | unsigned block_size_bits; |
612 | }; | 613 | }; |
613 | 614 | ||
614 | extern int blk_register_queue(struct gendisk *disk); | 615 | extern int blk_register_queue(struct gendisk *disk); |
615 | extern void blk_unregister_queue(struct gendisk *disk); | 616 | extern void blk_unregister_queue(struct gendisk *disk); |
616 | extern void register_disk(struct gendisk *dev); | 617 | extern void register_disk(struct gendisk *dev); |
617 | extern void generic_make_request(struct bio *bio); | 618 | extern void generic_make_request(struct bio *bio); |
618 | extern void blk_put_request(struct request *); | 619 | extern void blk_put_request(struct request *); |
619 | extern void __blk_put_request(request_queue_t *, struct request *); | 620 | extern void __blk_put_request(request_queue_t *, struct request *); |
620 | extern void blk_end_sync_rq(struct request *rq, int error); | 621 | extern void blk_end_sync_rq(struct request *rq, int error); |
621 | extern struct request *blk_get_request(request_queue_t *, int, gfp_t); | 622 | extern struct request *blk_get_request(request_queue_t *, int, gfp_t); |
622 | extern void blk_insert_request(request_queue_t *, struct request *, int, void *); | 623 | extern void blk_insert_request(request_queue_t *, struct request *, int, void *); |
623 | extern void blk_requeue_request(request_queue_t *, struct request *); | 624 | extern void blk_requeue_request(request_queue_t *, struct request *); |
624 | extern void blk_plug_device(request_queue_t *); | 625 | extern void blk_plug_device(request_queue_t *); |
625 | extern int blk_remove_plug(request_queue_t *); | 626 | extern int blk_remove_plug(request_queue_t *); |
626 | extern void blk_recount_segments(request_queue_t *, struct bio *); | 627 | extern void blk_recount_segments(request_queue_t *, struct bio *); |
627 | extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *); | 628 | extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *); |
628 | extern int sg_scsi_ioctl(struct file *, struct request_queue *, | 629 | extern int sg_scsi_ioctl(struct file *, struct request_queue *, |
629 | struct gendisk *, struct scsi_ioctl_command __user *); | 630 | struct gendisk *, struct scsi_ioctl_command __user *); |
630 | extern void blk_start_queue(request_queue_t *q); | 631 | extern void blk_start_queue(request_queue_t *q); |
631 | extern void blk_stop_queue(request_queue_t *q); | 632 | extern void blk_stop_queue(request_queue_t *q); |
632 | extern void blk_sync_queue(struct request_queue *q); | 633 | extern void blk_sync_queue(struct request_queue *q); |
633 | extern void __blk_stop_queue(request_queue_t *q); | 634 | extern void __blk_stop_queue(request_queue_t *q); |
634 | extern void blk_run_queue(request_queue_t *); | 635 | extern void blk_run_queue(request_queue_t *); |
635 | extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); | 636 | extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); |
636 | extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); | 637 | extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); |
637 | extern int blk_rq_unmap_user(struct bio *, unsigned int); | 638 | extern int blk_rq_unmap_user(struct bio *, unsigned int); |
638 | extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, gfp_t); | 639 | extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, gfp_t); |
639 | extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); | 640 | extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); |
640 | extern int blk_execute_rq(request_queue_t *, struct gendisk *, | 641 | extern int blk_execute_rq(request_queue_t *, struct gendisk *, |
641 | struct request *, int); | 642 | struct request *, int); |
642 | extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, | 643 | extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, |
643 | struct request *, int, rq_end_io_fn *); | 644 | struct request *, int, rq_end_io_fn *); |
644 | 645 | ||
645 | static inline request_queue_t *bdev_get_queue(struct block_device *bdev) | 646 | static inline request_queue_t *bdev_get_queue(struct block_device *bdev) |
646 | { | 647 | { |
647 | return bdev->bd_disk->queue; | 648 | return bdev->bd_disk->queue; |
648 | } | 649 | } |
649 | 650 | ||
650 | static inline void blk_run_backing_dev(struct backing_dev_info *bdi, | 651 | static inline void blk_run_backing_dev(struct backing_dev_info *bdi, |
651 | struct page *page) | 652 | struct page *page) |
652 | { | 653 | { |
653 | if (bdi && bdi->unplug_io_fn) | 654 | if (bdi && bdi->unplug_io_fn) |
654 | bdi->unplug_io_fn(bdi, page); | 655 | bdi->unplug_io_fn(bdi, page); |
655 | } | 656 | } |
656 | 657 | ||
657 | static inline void blk_run_address_space(struct address_space *mapping) | 658 | static inline void blk_run_address_space(struct address_space *mapping) |
658 | { | 659 | { |
659 | if (mapping) | 660 | if (mapping) |
660 | blk_run_backing_dev(mapping->backing_dev_info, NULL); | 661 | blk_run_backing_dev(mapping->backing_dev_info, NULL); |
661 | } | 662 | } |
662 | 663 | ||
663 | /* | 664 | /* |
664 | * end_request() and friends. Must be called with the request queue spinlock | 665 | * end_request() and friends. Must be called with the request queue spinlock |
665 | * acquired. All functions called within end_request() _must_be_ atomic. | 666 | * acquired. All functions called within end_request() _must_be_ atomic. |
666 | * | 667 | * |
667 | * Several drivers define their own end_request and call | 668 | * Several drivers define their own end_request and call |
668 | * end_that_request_first() and end_that_request_last() | 669 | * end_that_request_first() and end_that_request_last() |
669 | * for parts of the original function. This prevents | 670 | * for parts of the original function. This prevents |
670 | * code duplication in drivers. | 671 | * code duplication in drivers. |
671 | */ | 672 | */ |
672 | extern int end_that_request_first(struct request *, int, int); | 673 | extern int end_that_request_first(struct request *, int, int); |
673 | extern int end_that_request_chunk(struct request *, int, int); | 674 | extern int end_that_request_chunk(struct request *, int, int); |
674 | extern void end_that_request_last(struct request *, int); | 675 | extern void end_that_request_last(struct request *, int); |
675 | extern void end_request(struct request *req, int uptodate); | 676 | extern void end_request(struct request *req, int uptodate); |
676 | extern void blk_complete_request(struct request *); | 677 | extern void blk_complete_request(struct request *); |
677 | 678 | ||
678 | static inline int rq_all_done(struct request *rq, unsigned int nr_bytes) | 679 | static inline int rq_all_done(struct request *rq, unsigned int nr_bytes) |
679 | { | 680 | { |
680 | if (blk_fs_request(rq)) | 681 | if (blk_fs_request(rq)) |
681 | return (nr_bytes >= (rq->hard_nr_sectors << 9)); | 682 | return (nr_bytes >= (rq->hard_nr_sectors << 9)); |
682 | else if (blk_pc_request(rq)) | 683 | else if (blk_pc_request(rq)) |
683 | return nr_bytes >= rq->data_len; | 684 | return nr_bytes >= rq->data_len; |
684 | 685 | ||
685 | return 0; | 686 | return 0; |
686 | } | 687 | } |
687 | 688 | ||
688 | /* | 689 | /* |
689 | * end_that_request_first/chunk() takes an uptodate argument. we account | 690 | * end_that_request_first/chunk() takes an uptodate argument. we account |
690 | * any value <= as an io error. 0 means -EIO for compatability reasons, | 691 | * any value <= as an io error. 0 means -EIO for compatability reasons, |
691 | * any other < 0 value is the direct error type. An uptodate value of | 692 | * any other < 0 value is the direct error type. An uptodate value of |
692 | * 1 indicates successful io completion | 693 | * 1 indicates successful io completion |
693 | */ | 694 | */ |
694 | #define end_io_error(uptodate) (unlikely((uptodate) <= 0)) | 695 | #define end_io_error(uptodate) (unlikely((uptodate) <= 0)) |
695 | 696 | ||
696 | static inline void blkdev_dequeue_request(struct request *req) | 697 | static inline void blkdev_dequeue_request(struct request *req) |
697 | { | 698 | { |
698 | elv_dequeue_request(req->q, req); | 699 | elv_dequeue_request(req->q, req); |
699 | } | 700 | } |
700 | 701 | ||
701 | /* | 702 | /* |
702 | * Access functions for manipulating queue properties | 703 | * Access functions for manipulating queue properties |
703 | */ | 704 | */ |
704 | extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn, | 705 | extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn, |
705 | spinlock_t *lock, int node_id); | 706 | spinlock_t *lock, int node_id); |
706 | extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *); | 707 | extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *); |
707 | extern void blk_cleanup_queue(request_queue_t *); | 708 | extern void blk_cleanup_queue(request_queue_t *); |
708 | extern void blk_queue_make_request(request_queue_t *, make_request_fn *); | 709 | extern void blk_queue_make_request(request_queue_t *, make_request_fn *); |
709 | extern void blk_queue_bounce_limit(request_queue_t *, u64); | 710 | extern void blk_queue_bounce_limit(request_queue_t *, u64); |
710 | extern void blk_queue_max_sectors(request_queue_t *, unsigned int); | 711 | extern void blk_queue_max_sectors(request_queue_t *, unsigned int); |
711 | extern void blk_queue_max_phys_segments(request_queue_t *, unsigned short); | 712 | extern void blk_queue_max_phys_segments(request_queue_t *, unsigned short); |
712 | extern void blk_queue_max_hw_segments(request_queue_t *, unsigned short); | 713 | extern void blk_queue_max_hw_segments(request_queue_t *, unsigned short); |
713 | extern void blk_queue_max_segment_size(request_queue_t *, unsigned int); | 714 | extern void blk_queue_max_segment_size(request_queue_t *, unsigned int); |
714 | extern void blk_queue_hardsect_size(request_queue_t *, unsigned short); | 715 | extern void blk_queue_hardsect_size(request_queue_t *, unsigned short); |
715 | extern void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b); | 716 | extern void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b); |
716 | extern void blk_queue_segment_boundary(request_queue_t *, unsigned long); | 717 | extern void blk_queue_segment_boundary(request_queue_t *, unsigned long); |
717 | extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); | 718 | extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); |
718 | extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); | 719 | extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); |
719 | extern void blk_queue_dma_alignment(request_queue_t *, int); | 720 | extern void blk_queue_dma_alignment(request_queue_t *, int); |
720 | extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *); | 721 | extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *); |
721 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); | 722 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); |
722 | extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *); | 723 | extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *); |
723 | extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *); | 724 | extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *); |
724 | extern int blk_do_ordered(request_queue_t *, struct request **); | 725 | extern int blk_do_ordered(request_queue_t *, struct request **); |
725 | extern unsigned blk_ordered_cur_seq(request_queue_t *); | 726 | extern unsigned blk_ordered_cur_seq(request_queue_t *); |
726 | extern unsigned blk_ordered_req_seq(struct request *); | 727 | extern unsigned blk_ordered_req_seq(struct request *); |
727 | extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int); | 728 | extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int); |
728 | 729 | ||
729 | extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); | 730 | extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); |
730 | extern void blk_dump_rq_flags(struct request *, char *); | 731 | extern void blk_dump_rq_flags(struct request *, char *); |
731 | extern void generic_unplug_device(request_queue_t *); | 732 | extern void generic_unplug_device(request_queue_t *); |
732 | extern void __generic_unplug_device(request_queue_t *); | 733 | extern void __generic_unplug_device(request_queue_t *); |
733 | extern long nr_blockdev_pages(void); | 734 | extern long nr_blockdev_pages(void); |
734 | 735 | ||
735 | int blk_get_queue(request_queue_t *); | 736 | int blk_get_queue(request_queue_t *); |
736 | request_queue_t *blk_alloc_queue(gfp_t); | 737 | request_queue_t *blk_alloc_queue(gfp_t); |
737 | request_queue_t *blk_alloc_queue_node(gfp_t, int); | 738 | request_queue_t *blk_alloc_queue_node(gfp_t, int); |
738 | extern void blk_put_queue(request_queue_t *); | 739 | extern void blk_put_queue(request_queue_t *); |
739 | 740 | ||
740 | /* | 741 | /* |
741 | * tag stuff | 742 | * tag stuff |
742 | */ | 743 | */ |
743 | #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) | 744 | #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) |
744 | #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) | 745 | #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) |
745 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) | 746 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) |
746 | extern int blk_queue_start_tag(request_queue_t *, struct request *); | 747 | extern int blk_queue_start_tag(request_queue_t *, struct request *); |
747 | extern struct request *blk_queue_find_tag(request_queue_t *, int); | 748 | extern struct request *blk_queue_find_tag(request_queue_t *, int); |
748 | extern void blk_queue_end_tag(request_queue_t *, struct request *); | 749 | extern void blk_queue_end_tag(request_queue_t *, struct request *); |
749 | extern int blk_queue_init_tags(request_queue_t *, int, struct blk_queue_tag *); | 750 | extern int blk_queue_init_tags(request_queue_t *, int, struct blk_queue_tag *); |
750 | extern void blk_queue_free_tags(request_queue_t *); | 751 | extern void blk_queue_free_tags(request_queue_t *); |
751 | extern int blk_queue_resize_tags(request_queue_t *, int); | 752 | extern int blk_queue_resize_tags(request_queue_t *, int); |
752 | extern void blk_queue_invalidate_tags(request_queue_t *); | 753 | extern void blk_queue_invalidate_tags(request_queue_t *); |
753 | extern long blk_congestion_wait(int rw, long timeout); | 754 | extern long blk_congestion_wait(int rw, long timeout); |
754 | extern struct blk_queue_tag *blk_init_tags(int); | 755 | extern struct blk_queue_tag *blk_init_tags(int); |
755 | extern void blk_free_tags(struct blk_queue_tag *); | 756 | extern void blk_free_tags(struct blk_queue_tag *); |
756 | extern void blk_congestion_end(int rw); | 757 | extern void blk_congestion_end(int rw); |
757 | 758 | ||
758 | extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); | 759 | extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); |
759 | extern int blkdev_issue_flush(struct block_device *, sector_t *); | 760 | extern int blkdev_issue_flush(struct block_device *, sector_t *); |
760 | 761 | ||
761 | #define MAX_PHYS_SEGMENTS 128 | 762 | #define MAX_PHYS_SEGMENTS 128 |
762 | #define MAX_HW_SEGMENTS 128 | 763 | #define MAX_HW_SEGMENTS 128 |
763 | #define SAFE_MAX_SECTORS 255 | 764 | #define SAFE_MAX_SECTORS 255 |
764 | #define BLK_DEF_MAX_SECTORS 1024 | 765 | #define BLK_DEF_MAX_SECTORS 1024 |
765 | 766 | ||
766 | #define MAX_SEGMENT_SIZE 65536 | 767 | #define MAX_SEGMENT_SIZE 65536 |
767 | 768 | ||
768 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) | 769 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) |
769 | 770 | ||
770 | static inline int queue_hardsect_size(request_queue_t *q) | 771 | static inline int queue_hardsect_size(request_queue_t *q) |
771 | { | 772 | { |
772 | int retval = 512; | 773 | int retval = 512; |
773 | 774 | ||
774 | if (q && q->hardsect_size) | 775 | if (q && q->hardsect_size) |
775 | retval = q->hardsect_size; | 776 | retval = q->hardsect_size; |
776 | 777 | ||
777 | return retval; | 778 | return retval; |
778 | } | 779 | } |
779 | 780 | ||
780 | static inline int bdev_hardsect_size(struct block_device *bdev) | 781 | static inline int bdev_hardsect_size(struct block_device *bdev) |
781 | { | 782 | { |
782 | return queue_hardsect_size(bdev_get_queue(bdev)); | 783 | return queue_hardsect_size(bdev_get_queue(bdev)); |
783 | } | 784 | } |
784 | 785 | ||
785 | static inline int queue_dma_alignment(request_queue_t *q) | 786 | static inline int queue_dma_alignment(request_queue_t *q) |
786 | { | 787 | { |
787 | int retval = 511; | 788 | int retval = 511; |
788 | 789 | ||
789 | if (q && q->dma_alignment) | 790 | if (q && q->dma_alignment) |
790 | retval = q->dma_alignment; | 791 | retval = q->dma_alignment; |
791 | 792 | ||
792 | return retval; | 793 | return retval; |
793 | } | 794 | } |
794 | 795 | ||
795 | static inline int bdev_dma_aligment(struct block_device *bdev) | 796 | static inline int bdev_dma_aligment(struct block_device *bdev) |
796 | { | 797 | { |
797 | return queue_dma_alignment(bdev_get_queue(bdev)); | 798 | return queue_dma_alignment(bdev_get_queue(bdev)); |
798 | } | 799 | } |
799 | 800 | ||
800 | #define blk_finished_io(nsects) do { } while (0) | 801 | #define blk_finished_io(nsects) do { } while (0) |
801 | #define blk_started_io(nsects) do { } while (0) | 802 | #define blk_started_io(nsects) do { } while (0) |
802 | 803 | ||
803 | /* assumes size > 256 */ | 804 | /* assumes size > 256 */ |
804 | static inline unsigned int blksize_bits(unsigned int size) | 805 | static inline unsigned int blksize_bits(unsigned int size) |
805 | { | 806 | { |
806 | unsigned int bits = 8; | 807 | unsigned int bits = 8; |
807 | do { | 808 | do { |
808 | bits++; | 809 | bits++; |
809 | size >>= 1; | 810 | size >>= 1; |
810 | } while (size > 256); | 811 | } while (size > 256); |
811 | return bits; | 812 | return bits; |
812 | } | 813 | } |
813 | 814 | ||
814 | static inline unsigned int block_size(struct block_device *bdev) | 815 | static inline unsigned int block_size(struct block_device *bdev) |
815 | { | 816 | { |
816 | return bdev->bd_block_size; | 817 | return bdev->bd_block_size; |
817 | } | 818 | } |
818 | 819 | ||
819 | typedef struct {struct page *v;} Sector; | 820 | typedef struct {struct page *v;} Sector; |
820 | 821 | ||
821 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); | 822 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); |
822 | 823 | ||
823 | static inline void put_dev_sector(Sector p) | 824 | static inline void put_dev_sector(Sector p) |
824 | { | 825 | { |
825 | page_cache_release(p.v); | 826 | page_cache_release(p.v); |
826 | } | 827 | } |
827 | 828 | ||
828 | struct work_struct; | 829 | struct work_struct; |
829 | int kblockd_schedule_work(struct work_struct *work); | 830 | int kblockd_schedule_work(struct work_struct *work); |
830 | void kblockd_flush(void); | 831 | void kblockd_flush(void); |
831 | 832 | ||
832 | #ifdef CONFIG_LBD | 833 | #ifdef CONFIG_LBD |
833 | # include <asm/div64.h> | 834 | # include <asm/div64.h> |
834 | # define sector_div(a, b) do_div(a, b) | 835 | # define sector_div(a, b) do_div(a, b) |
835 | #else | 836 | #else |
836 | # define sector_div(n, b)( \ | 837 | # define sector_div(n, b)( \ |
837 | { \ | 838 | { \ |
838 | int _res; \ | 839 | int _res; \ |
839 | _res = (n) % (b); \ | 840 | _res = (n) % (b); \ |
840 | (n) /= (b); \ | 841 | (n) /= (b); \ |
841 | _res; \ | 842 | _res; \ |
842 | } \ | 843 | } \ |
843 | ) | 844 | ) |
844 | #endif | 845 | #endif |
845 | 846 | ||
846 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ | 847 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ |
847 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) | 848 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) |
848 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ | 849 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ |
849 | MODULE_ALIAS("block-major-" __stringify(major) "-*") | 850 | MODULE_ALIAS("block-major-" __stringify(major) "-*") |
850 | 851 | ||
851 | 852 | ||
852 | #endif | 853 | #endif |
853 | 854 |
include/linux/elevator.h
1 | #ifndef _LINUX_ELEVATOR_H | 1 | #ifndef _LINUX_ELEVATOR_H |
2 | #define _LINUX_ELEVATOR_H | 2 | #define _LINUX_ELEVATOR_H |
3 | 3 | ||
4 | typedef int (elevator_merge_fn) (request_queue_t *, struct request **, | 4 | typedef int (elevator_merge_fn) (request_queue_t *, struct request **, |
5 | struct bio *); | 5 | struct bio *); |
6 | 6 | ||
7 | typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *); | 7 | typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *); |
8 | 8 | ||
9 | typedef void (elevator_merged_fn) (request_queue_t *, struct request *); | 9 | typedef void (elevator_merged_fn) (request_queue_t *, struct request *, int); |
10 | 10 | ||
11 | typedef int (elevator_dispatch_fn) (request_queue_t *, int); | 11 | typedef int (elevator_dispatch_fn) (request_queue_t *, int); |
12 | 12 | ||
13 | typedef void (elevator_add_req_fn) (request_queue_t *, struct request *); | 13 | typedef void (elevator_add_req_fn) (request_queue_t *, struct request *); |
14 | typedef int (elevator_queue_empty_fn) (request_queue_t *); | 14 | typedef int (elevator_queue_empty_fn) (request_queue_t *); |
15 | typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); | 15 | typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); |
16 | typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); | 16 | typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); |
17 | typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); | 17 | typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); |
18 | 18 | ||
19 | typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t); | 19 | typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t); |
20 | typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); | 20 | typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); |
21 | typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *); | 21 | typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *); |
22 | typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); | 22 | typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); |
23 | 23 | ||
24 | typedef void *(elevator_init_fn) (request_queue_t *, elevator_t *); | 24 | typedef void *(elevator_init_fn) (request_queue_t *, elevator_t *); |
25 | typedef void (elevator_exit_fn) (elevator_t *); | 25 | typedef void (elevator_exit_fn) (elevator_t *); |
26 | 26 | ||
27 | struct elevator_ops | 27 | struct elevator_ops |
28 | { | 28 | { |
29 | elevator_merge_fn *elevator_merge_fn; | 29 | elevator_merge_fn *elevator_merge_fn; |
30 | elevator_merged_fn *elevator_merged_fn; | 30 | elevator_merged_fn *elevator_merged_fn; |
31 | elevator_merge_req_fn *elevator_merge_req_fn; | 31 | elevator_merge_req_fn *elevator_merge_req_fn; |
32 | 32 | ||
33 | elevator_dispatch_fn *elevator_dispatch_fn; | 33 | elevator_dispatch_fn *elevator_dispatch_fn; |
34 | elevator_add_req_fn *elevator_add_req_fn; | 34 | elevator_add_req_fn *elevator_add_req_fn; |
35 | elevator_activate_req_fn *elevator_activate_req_fn; | 35 | elevator_activate_req_fn *elevator_activate_req_fn; |
36 | elevator_deactivate_req_fn *elevator_deactivate_req_fn; | 36 | elevator_deactivate_req_fn *elevator_deactivate_req_fn; |
37 | 37 | ||
38 | elevator_queue_empty_fn *elevator_queue_empty_fn; | 38 | elevator_queue_empty_fn *elevator_queue_empty_fn; |
39 | elevator_completed_req_fn *elevator_completed_req_fn; | 39 | elevator_completed_req_fn *elevator_completed_req_fn; |
40 | 40 | ||
41 | elevator_request_list_fn *elevator_former_req_fn; | 41 | elevator_request_list_fn *elevator_former_req_fn; |
42 | elevator_request_list_fn *elevator_latter_req_fn; | 42 | elevator_request_list_fn *elevator_latter_req_fn; |
43 | 43 | ||
44 | elevator_set_req_fn *elevator_set_req_fn; | 44 | elevator_set_req_fn *elevator_set_req_fn; |
45 | elevator_put_req_fn *elevator_put_req_fn; | 45 | elevator_put_req_fn *elevator_put_req_fn; |
46 | 46 | ||
47 | elevator_may_queue_fn *elevator_may_queue_fn; | 47 | elevator_may_queue_fn *elevator_may_queue_fn; |
48 | 48 | ||
49 | elevator_init_fn *elevator_init_fn; | 49 | elevator_init_fn *elevator_init_fn; |
50 | elevator_exit_fn *elevator_exit_fn; | 50 | elevator_exit_fn *elevator_exit_fn; |
51 | void (*trim)(struct io_context *); | 51 | void (*trim)(struct io_context *); |
52 | }; | 52 | }; |
53 | 53 | ||
54 | #define ELV_NAME_MAX (16) | 54 | #define ELV_NAME_MAX (16) |
55 | 55 | ||
56 | struct elv_fs_entry { | 56 | struct elv_fs_entry { |
57 | struct attribute attr; | 57 | struct attribute attr; |
58 | ssize_t (*show)(elevator_t *, char *); | 58 | ssize_t (*show)(elevator_t *, char *); |
59 | ssize_t (*store)(elevator_t *, const char *, size_t); | 59 | ssize_t (*store)(elevator_t *, const char *, size_t); |
60 | }; | 60 | }; |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * identifies an elevator type, such as AS or deadline | 63 | * identifies an elevator type, such as AS or deadline |
64 | */ | 64 | */ |
65 | struct elevator_type | 65 | struct elevator_type |
66 | { | 66 | { |
67 | struct list_head list; | 67 | struct list_head list; |
68 | struct elevator_ops ops; | 68 | struct elevator_ops ops; |
69 | struct elevator_type *elevator_type; | 69 | struct elevator_type *elevator_type; |
70 | struct elv_fs_entry *elevator_attrs; | 70 | struct elv_fs_entry *elevator_attrs; |
71 | char elevator_name[ELV_NAME_MAX]; | 71 | char elevator_name[ELV_NAME_MAX]; |
72 | struct module *elevator_owner; | 72 | struct module *elevator_owner; |
73 | }; | 73 | }; |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * each queue has an elevator_queue associated with it | 76 | * each queue has an elevator_queue associated with it |
77 | */ | 77 | */ |
78 | struct elevator_queue | 78 | struct elevator_queue |
79 | { | 79 | { |
80 | struct elevator_ops *ops; | 80 | struct elevator_ops *ops; |
81 | void *elevator_data; | 81 | void *elevator_data; |
82 | struct kobject kobj; | 82 | struct kobject kobj; |
83 | struct elevator_type *elevator_type; | 83 | struct elevator_type *elevator_type; |
84 | struct mutex sysfs_lock; | 84 | struct mutex sysfs_lock; |
85 | struct hlist_head *hash; | 85 | struct hlist_head *hash; |
86 | }; | 86 | }; |
87 | 87 | ||
88 | /* | 88 | /* |
89 | * block elevator interface | 89 | * block elevator interface |
90 | */ | 90 | */ |
91 | extern void elv_dispatch_sort(request_queue_t *, struct request *); | 91 | extern void elv_dispatch_sort(request_queue_t *, struct request *); |
92 | extern void elv_dispatch_add_tail(request_queue_t *, struct request *); | 92 | extern void elv_dispatch_add_tail(request_queue_t *, struct request *); |
93 | extern void elv_add_request(request_queue_t *, struct request *, int, int); | 93 | extern void elv_add_request(request_queue_t *, struct request *, int, int); |
94 | extern void __elv_add_request(request_queue_t *, struct request *, int, int); | 94 | extern void __elv_add_request(request_queue_t *, struct request *, int, int); |
95 | extern void elv_insert(request_queue_t *, struct request *, int); | 95 | extern void elv_insert(request_queue_t *, struct request *, int); |
96 | extern int elv_merge(request_queue_t *, struct request **, struct bio *); | 96 | extern int elv_merge(request_queue_t *, struct request **, struct bio *); |
97 | extern void elv_merge_requests(request_queue_t *, struct request *, | 97 | extern void elv_merge_requests(request_queue_t *, struct request *, |
98 | struct request *); | 98 | struct request *); |
99 | extern void elv_merged_request(request_queue_t *, struct request *); | 99 | extern void elv_merged_request(request_queue_t *, struct request *, int); |
100 | extern void elv_dequeue_request(request_queue_t *, struct request *); | 100 | extern void elv_dequeue_request(request_queue_t *, struct request *); |
101 | extern void elv_requeue_request(request_queue_t *, struct request *); | 101 | extern void elv_requeue_request(request_queue_t *, struct request *); |
102 | extern int elv_queue_empty(request_queue_t *); | 102 | extern int elv_queue_empty(request_queue_t *); |
103 | extern struct request *elv_next_request(struct request_queue *q); | 103 | extern struct request *elv_next_request(struct request_queue *q); |
104 | extern struct request *elv_former_request(request_queue_t *, struct request *); | 104 | extern struct request *elv_former_request(request_queue_t *, struct request *); |
105 | extern struct request *elv_latter_request(request_queue_t *, struct request *); | 105 | extern struct request *elv_latter_request(request_queue_t *, struct request *); |
106 | extern int elv_register_queue(request_queue_t *q); | 106 | extern int elv_register_queue(request_queue_t *q); |
107 | extern void elv_unregister_queue(request_queue_t *q); | 107 | extern void elv_unregister_queue(request_queue_t *q); |
108 | extern int elv_may_queue(request_queue_t *, int, struct bio *); | 108 | extern int elv_may_queue(request_queue_t *, int, struct bio *); |
109 | extern void elv_completed_request(request_queue_t *, struct request *); | 109 | extern void elv_completed_request(request_queue_t *, struct request *); |
110 | extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t); | 110 | extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t); |
111 | extern void elv_put_request(request_queue_t *, struct request *); | 111 | extern void elv_put_request(request_queue_t *, struct request *); |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * io scheduler registration | 114 | * io scheduler registration |
115 | */ | 115 | */ |
116 | extern int elv_register(struct elevator_type *); | 116 | extern int elv_register(struct elevator_type *); |
117 | extern void elv_unregister(struct elevator_type *); | 117 | extern void elv_unregister(struct elevator_type *); |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * io scheduler sysfs switching | 120 | * io scheduler sysfs switching |
121 | */ | 121 | */ |
122 | extern ssize_t elv_iosched_show(request_queue_t *, char *); | 122 | extern ssize_t elv_iosched_show(request_queue_t *, char *); |
123 | extern ssize_t elv_iosched_store(request_queue_t *, const char *, size_t); | 123 | extern ssize_t elv_iosched_store(request_queue_t *, const char *, size_t); |
124 | 124 | ||
125 | extern int elevator_init(request_queue_t *, char *); | 125 | extern int elevator_init(request_queue_t *, char *); |
126 | extern void elevator_exit(elevator_t *); | 126 | extern void elevator_exit(elevator_t *); |
127 | extern int elv_rq_merge_ok(struct request *, struct bio *); | 127 | extern int elv_rq_merge_ok(struct request *, struct bio *); |
128 | 128 | ||
129 | /* | 129 | /* |
130 | * Helper functions. | ||
131 | */ | ||
132 | extern struct request *elv_rb_former_request(request_queue_t *, struct request *); | ||
133 | extern struct request *elv_rb_latter_request(request_queue_t *, struct request *); | ||
134 | |||
135 | /* | ||
136 | * rb support functions. | ||
137 | */ | ||
138 | extern struct request *elv_rb_add(struct rb_root *, struct request *); | ||
139 | extern void elv_rb_del(struct rb_root *, struct request *); | ||
140 | extern struct request *elv_rb_find(struct rb_root *, sector_t); | ||
141 | |||
142 | /* | ||
130 | * Return values from elevator merger | 143 | * Return values from elevator merger |
131 | */ | 144 | */ |
132 | #define ELEVATOR_NO_MERGE 0 | 145 | #define ELEVATOR_NO_MERGE 0 |
133 | #define ELEVATOR_FRONT_MERGE 1 | 146 | #define ELEVATOR_FRONT_MERGE 1 |
134 | #define ELEVATOR_BACK_MERGE 2 | 147 | #define ELEVATOR_BACK_MERGE 2 |
135 | 148 | ||
136 | /* | 149 | /* |
137 | * Insertion selection | 150 | * Insertion selection |
138 | */ | 151 | */ |
139 | #define ELEVATOR_INSERT_FRONT 1 | 152 | #define ELEVATOR_INSERT_FRONT 1 |
140 | #define ELEVATOR_INSERT_BACK 2 | 153 | #define ELEVATOR_INSERT_BACK 2 |
141 | #define ELEVATOR_INSERT_SORT 3 | 154 | #define ELEVATOR_INSERT_SORT 3 |
142 | #define ELEVATOR_INSERT_REQUEUE 4 | 155 | #define ELEVATOR_INSERT_REQUEUE 4 |
143 | 156 | ||
144 | /* | 157 | /* |
145 | * return values from elevator_may_queue_fn | 158 | * return values from elevator_may_queue_fn |
146 | */ | 159 | */ |
147 | enum { | 160 | enum { |
148 | ELV_MQUEUE_MAY, | 161 | ELV_MQUEUE_MAY, |
149 | ELV_MQUEUE_NO, | 162 | ELV_MQUEUE_NO, |
150 | ELV_MQUEUE_MUST, | 163 | ELV_MQUEUE_MUST, |
151 | }; | 164 | }; |
152 | 165 | ||
153 | #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) | 166 | #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) |
167 | #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) | ||
154 | 168 | ||
155 | #endif | 169 | #endif |
156 | 170 |