Commit a0cd128542cd9c67f27458a08e989db486a293ce
1 parent
992c5ddaf1
block: add end_queued_request() and end_dequeued_request() helpers
We can use this helper in the elevator core for BLKPREP_KILL, and it'll also be useful for the empty barrier patch. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Showing 3 changed files with 77 additions and 14 deletions Inline Diff
block/elevator.c
1 | /* | 1 | /* |
2 | * Block device elevator/IO-scheduler. | 2 | * Block device elevator/IO-scheduler. |
3 | * | 3 | * |
4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * | 5 | * |
6 | * 30042000 Jens Axboe <axboe@kernel.dk> : | 6 | * 30042000 Jens Axboe <axboe@kernel.dk> : |
7 | * | 7 | * |
8 | * Split the elevator a bit so that it is possible to choose a different | 8 | * Split the elevator a bit so that it is possible to choose a different |
9 | * one or even write a new "plug in". There are three pieces: | 9 | * one or even write a new "plug in". There are three pieces: |
10 | * - elevator_fn, inserts a new request in the queue list | 10 | * - elevator_fn, inserts a new request in the queue list |
11 | * - elevator_merge_fn, decides whether a new buffer can be merged with | 11 | * - elevator_merge_fn, decides whether a new buffer can be merged with |
12 | * an existing request | 12 | * an existing request |
13 | * - elevator_dequeue_fn, called when a request is taken off the active list | 13 | * - elevator_dequeue_fn, called when a request is taken off the active list |
14 | * | 14 | * |
15 | * 20082000 Dave Jones <davej@suse.de> : | 15 | * 20082000 Dave Jones <davej@suse.de> : |
16 | * Removed tests for max-bomb-segments, which was breaking elvtune | 16 | * Removed tests for max-bomb-segments, which was breaking elvtune |
17 | * when run without -bN | 17 | * when run without -bN |
18 | * | 18 | * |
19 | * Jens: | 19 | * Jens: |
20 | * - Rework again to work with bio instead of buffer_heads | 20 | * - Rework again to work with bio instead of buffer_heads |
21 | * - loose bi_dev comparisons, partition handling is right now | 21 | * - loose bi_dev comparisons, partition handling is right now |
22 | * - completely modularize elevator setup and teardown | 22 | * - completely modularize elevator setup and teardown |
23 | * | 23 | * |
24 | */ | 24 | */ |
25 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/blkdev.h> | 27 | #include <linux/blkdev.h> |
28 | #include <linux/elevator.h> | 28 | #include <linux/elevator.h> |
29 | #include <linux/bio.h> | 29 | #include <linux/bio.h> |
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/compiler.h> | 33 | #include <linux/compiler.h> |
34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
35 | #include <linux/blktrace_api.h> | 35 | #include <linux/blktrace_api.h> |
36 | #include <linux/hash.h> | 36 | #include <linux/hash.h> |
37 | 37 | ||
38 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
39 | 39 | ||
40 | static DEFINE_SPINLOCK(elv_list_lock); | 40 | static DEFINE_SPINLOCK(elv_list_lock); |
41 | static LIST_HEAD(elv_list); | 41 | static LIST_HEAD(elv_list); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Merge hash stuff. | 44 | * Merge hash stuff. |
45 | */ | 45 | */ |
46 | static const int elv_hash_shift = 6; | 46 | static const int elv_hash_shift = 6; |
47 | #define ELV_HASH_BLOCK(sec) ((sec) >> 3) | 47 | #define ELV_HASH_BLOCK(sec) ((sec) >> 3) |
48 | #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) | 48 | #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) |
49 | #define ELV_HASH_ENTRIES (1 << elv_hash_shift) | 49 | #define ELV_HASH_ENTRIES (1 << elv_hash_shift) |
50 | #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) | 50 | #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) |
51 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) | 51 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * Query io scheduler to see if the current process issuing bio may be | 54 | * Query io scheduler to see if the current process issuing bio may be |
55 | * merged with rq. | 55 | * merged with rq. |
56 | */ | 56 | */ |
57 | static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) | 57 | static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) |
58 | { | 58 | { |
59 | struct request_queue *q = rq->q; | 59 | struct request_queue *q = rq->q; |
60 | elevator_t *e = q->elevator; | 60 | elevator_t *e = q->elevator; |
61 | 61 | ||
62 | if (e->ops->elevator_allow_merge_fn) | 62 | if (e->ops->elevator_allow_merge_fn) |
63 | return e->ops->elevator_allow_merge_fn(q, rq, bio); | 63 | return e->ops->elevator_allow_merge_fn(q, rq, bio); |
64 | 64 | ||
65 | return 1; | 65 | return 1; |
66 | } | 66 | } |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * can we safely merge with this request? | 69 | * can we safely merge with this request? |
70 | */ | 70 | */ |
71 | inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) | 71 | inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) |
72 | { | 72 | { |
73 | if (!rq_mergeable(rq)) | 73 | if (!rq_mergeable(rq)) |
74 | return 0; | 74 | return 0; |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * different data direction or already started, don't merge | 77 | * different data direction or already started, don't merge |
78 | */ | 78 | */ |
79 | if (bio_data_dir(bio) != rq_data_dir(rq)) | 79 | if (bio_data_dir(bio) != rq_data_dir(rq)) |
80 | return 0; | 80 | return 0; |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * must be same device and not a special request | 83 | * must be same device and not a special request |
84 | */ | 84 | */ |
85 | if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) | 85 | if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) |
86 | return 0; | 86 | return 0; |
87 | 87 | ||
88 | if (!elv_iosched_allow_merge(rq, bio)) | 88 | if (!elv_iosched_allow_merge(rq, bio)) |
89 | return 0; | 89 | return 0; |
90 | 90 | ||
91 | return 1; | 91 | return 1; |
92 | } | 92 | } |
93 | EXPORT_SYMBOL(elv_rq_merge_ok); | 93 | EXPORT_SYMBOL(elv_rq_merge_ok); |
94 | 94 | ||
95 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) | 95 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) |
96 | { | 96 | { |
97 | int ret = ELEVATOR_NO_MERGE; | 97 | int ret = ELEVATOR_NO_MERGE; |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * we can merge and sequence is ok, check if it's possible | 100 | * we can merge and sequence is ok, check if it's possible |
101 | */ | 101 | */ |
102 | if (elv_rq_merge_ok(__rq, bio)) { | 102 | if (elv_rq_merge_ok(__rq, bio)) { |
103 | if (__rq->sector + __rq->nr_sectors == bio->bi_sector) | 103 | if (__rq->sector + __rq->nr_sectors == bio->bi_sector) |
104 | ret = ELEVATOR_BACK_MERGE; | 104 | ret = ELEVATOR_BACK_MERGE; |
105 | else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) | 105 | else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) |
106 | ret = ELEVATOR_FRONT_MERGE; | 106 | ret = ELEVATOR_FRONT_MERGE; |
107 | } | 107 | } |
108 | 108 | ||
109 | return ret; | 109 | return ret; |
110 | } | 110 | } |
111 | 111 | ||
112 | static struct elevator_type *elevator_find(const char *name) | 112 | static struct elevator_type *elevator_find(const char *name) |
113 | { | 113 | { |
114 | struct elevator_type *e; | 114 | struct elevator_type *e; |
115 | 115 | ||
116 | list_for_each_entry(e, &elv_list, list) { | 116 | list_for_each_entry(e, &elv_list, list) { |
117 | if (!strcmp(e->elevator_name, name)) | 117 | if (!strcmp(e->elevator_name, name)) |
118 | return e; | 118 | return e; |
119 | } | 119 | } |
120 | 120 | ||
121 | return NULL; | 121 | return NULL; |
122 | } | 122 | } |
123 | 123 | ||
124 | static void elevator_put(struct elevator_type *e) | 124 | static void elevator_put(struct elevator_type *e) |
125 | { | 125 | { |
126 | module_put(e->elevator_owner); | 126 | module_put(e->elevator_owner); |
127 | } | 127 | } |
128 | 128 | ||
129 | static struct elevator_type *elevator_get(const char *name) | 129 | static struct elevator_type *elevator_get(const char *name) |
130 | { | 130 | { |
131 | struct elevator_type *e; | 131 | struct elevator_type *e; |
132 | 132 | ||
133 | spin_lock(&elv_list_lock); | 133 | spin_lock(&elv_list_lock); |
134 | 134 | ||
135 | e = elevator_find(name); | 135 | e = elevator_find(name); |
136 | if (e && !try_module_get(e->elevator_owner)) | 136 | if (e && !try_module_get(e->elevator_owner)) |
137 | e = NULL; | 137 | e = NULL; |
138 | 138 | ||
139 | spin_unlock(&elv_list_lock); | 139 | spin_unlock(&elv_list_lock); |
140 | 140 | ||
141 | return e; | 141 | return e; |
142 | } | 142 | } |
143 | 143 | ||
144 | static void *elevator_init_queue(struct request_queue *q, | 144 | static void *elevator_init_queue(struct request_queue *q, |
145 | struct elevator_queue *eq) | 145 | struct elevator_queue *eq) |
146 | { | 146 | { |
147 | return eq->ops->elevator_init_fn(q); | 147 | return eq->ops->elevator_init_fn(q); |
148 | } | 148 | } |
149 | 149 | ||
150 | static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, | 150 | static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, |
151 | void *data) | 151 | void *data) |
152 | { | 152 | { |
153 | q->elevator = eq; | 153 | q->elevator = eq; |
154 | eq->elevator_data = data; | 154 | eq->elevator_data = data; |
155 | } | 155 | } |
156 | 156 | ||
157 | static char chosen_elevator[16]; | 157 | static char chosen_elevator[16]; |
158 | 158 | ||
159 | static int __init elevator_setup(char *str) | 159 | static int __init elevator_setup(char *str) |
160 | { | 160 | { |
161 | /* | 161 | /* |
162 | * Be backwards-compatible with previous kernels, so users | 162 | * Be backwards-compatible with previous kernels, so users |
163 | * won't get the wrong elevator. | 163 | * won't get the wrong elevator. |
164 | */ | 164 | */ |
165 | if (!strcmp(str, "as")) | 165 | if (!strcmp(str, "as")) |
166 | strcpy(chosen_elevator, "anticipatory"); | 166 | strcpy(chosen_elevator, "anticipatory"); |
167 | else | 167 | else |
168 | strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); | 168 | strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); |
169 | return 1; | 169 | return 1; |
170 | } | 170 | } |
171 | 171 | ||
172 | __setup("elevator=", elevator_setup); | 172 | __setup("elevator=", elevator_setup); |
173 | 173 | ||
174 | static struct kobj_type elv_ktype; | 174 | static struct kobj_type elv_ktype; |
175 | 175 | ||
176 | static elevator_t *elevator_alloc(struct request_queue *q, | 176 | static elevator_t *elevator_alloc(struct request_queue *q, |
177 | struct elevator_type *e) | 177 | struct elevator_type *e) |
178 | { | 178 | { |
179 | elevator_t *eq; | 179 | elevator_t *eq; |
180 | int i; | 180 | int i; |
181 | 181 | ||
182 | eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node); | 182 | eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node); |
183 | if (unlikely(!eq)) | 183 | if (unlikely(!eq)) |
184 | goto err; | 184 | goto err; |
185 | 185 | ||
186 | eq->ops = &e->ops; | 186 | eq->ops = &e->ops; |
187 | eq->elevator_type = e; | 187 | eq->elevator_type = e; |
188 | kobject_init(&eq->kobj); | 188 | kobject_init(&eq->kobj); |
189 | kobject_set_name(&eq->kobj, "%s", "iosched"); | 189 | kobject_set_name(&eq->kobj, "%s", "iosched"); |
190 | eq->kobj.ktype = &elv_ktype; | 190 | eq->kobj.ktype = &elv_ktype; |
191 | mutex_init(&eq->sysfs_lock); | 191 | mutex_init(&eq->sysfs_lock); |
192 | 192 | ||
193 | eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, | 193 | eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, |
194 | GFP_KERNEL, q->node); | 194 | GFP_KERNEL, q->node); |
195 | if (!eq->hash) | 195 | if (!eq->hash) |
196 | goto err; | 196 | goto err; |
197 | 197 | ||
198 | for (i = 0; i < ELV_HASH_ENTRIES; i++) | 198 | for (i = 0; i < ELV_HASH_ENTRIES; i++) |
199 | INIT_HLIST_HEAD(&eq->hash[i]); | 199 | INIT_HLIST_HEAD(&eq->hash[i]); |
200 | 200 | ||
201 | return eq; | 201 | return eq; |
202 | err: | 202 | err: |
203 | kfree(eq); | 203 | kfree(eq); |
204 | elevator_put(e); | 204 | elevator_put(e); |
205 | return NULL; | 205 | return NULL; |
206 | } | 206 | } |
207 | 207 | ||
208 | static void elevator_release(struct kobject *kobj) | 208 | static void elevator_release(struct kobject *kobj) |
209 | { | 209 | { |
210 | elevator_t *e = container_of(kobj, elevator_t, kobj); | 210 | elevator_t *e = container_of(kobj, elevator_t, kobj); |
211 | 211 | ||
212 | elevator_put(e->elevator_type); | 212 | elevator_put(e->elevator_type); |
213 | kfree(e->hash); | 213 | kfree(e->hash); |
214 | kfree(e); | 214 | kfree(e); |
215 | } | 215 | } |
216 | 216 | ||
217 | int elevator_init(struct request_queue *q, char *name) | 217 | int elevator_init(struct request_queue *q, char *name) |
218 | { | 218 | { |
219 | struct elevator_type *e = NULL; | 219 | struct elevator_type *e = NULL; |
220 | struct elevator_queue *eq; | 220 | struct elevator_queue *eq; |
221 | int ret = 0; | 221 | int ret = 0; |
222 | void *data; | 222 | void *data; |
223 | 223 | ||
224 | INIT_LIST_HEAD(&q->queue_head); | 224 | INIT_LIST_HEAD(&q->queue_head); |
225 | q->last_merge = NULL; | 225 | q->last_merge = NULL; |
226 | q->end_sector = 0; | 226 | q->end_sector = 0; |
227 | q->boundary_rq = NULL; | 227 | q->boundary_rq = NULL; |
228 | 228 | ||
229 | if (name && !(e = elevator_get(name))) | 229 | if (name && !(e = elevator_get(name))) |
230 | return -EINVAL; | 230 | return -EINVAL; |
231 | 231 | ||
232 | if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) | 232 | if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) |
233 | printk("I/O scheduler %s not found\n", chosen_elevator); | 233 | printk("I/O scheduler %s not found\n", chosen_elevator); |
234 | 234 | ||
235 | if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { | 235 | if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { |
236 | printk("Default I/O scheduler not found, using no-op\n"); | 236 | printk("Default I/O scheduler not found, using no-op\n"); |
237 | e = elevator_get("noop"); | 237 | e = elevator_get("noop"); |
238 | } | 238 | } |
239 | 239 | ||
240 | eq = elevator_alloc(q, e); | 240 | eq = elevator_alloc(q, e); |
241 | if (!eq) | 241 | if (!eq) |
242 | return -ENOMEM; | 242 | return -ENOMEM; |
243 | 243 | ||
244 | data = elevator_init_queue(q, eq); | 244 | data = elevator_init_queue(q, eq); |
245 | if (!data) { | 245 | if (!data) { |
246 | kobject_put(&eq->kobj); | 246 | kobject_put(&eq->kobj); |
247 | return -ENOMEM; | 247 | return -ENOMEM; |
248 | } | 248 | } |
249 | 249 | ||
250 | elevator_attach(q, eq, data); | 250 | elevator_attach(q, eq, data); |
251 | return ret; | 251 | return ret; |
252 | } | 252 | } |
253 | 253 | ||
254 | EXPORT_SYMBOL(elevator_init); | 254 | EXPORT_SYMBOL(elevator_init); |
255 | 255 | ||
256 | void elevator_exit(elevator_t *e) | 256 | void elevator_exit(elevator_t *e) |
257 | { | 257 | { |
258 | mutex_lock(&e->sysfs_lock); | 258 | mutex_lock(&e->sysfs_lock); |
259 | if (e->ops->elevator_exit_fn) | 259 | if (e->ops->elevator_exit_fn) |
260 | e->ops->elevator_exit_fn(e); | 260 | e->ops->elevator_exit_fn(e); |
261 | e->ops = NULL; | 261 | e->ops = NULL; |
262 | mutex_unlock(&e->sysfs_lock); | 262 | mutex_unlock(&e->sysfs_lock); |
263 | 263 | ||
264 | kobject_put(&e->kobj); | 264 | kobject_put(&e->kobj); |
265 | } | 265 | } |
266 | 266 | ||
267 | EXPORT_SYMBOL(elevator_exit); | 267 | EXPORT_SYMBOL(elevator_exit); |
268 | 268 | ||
269 | static void elv_activate_rq(struct request_queue *q, struct request *rq) | 269 | static void elv_activate_rq(struct request_queue *q, struct request *rq) |
270 | { | 270 | { |
271 | elevator_t *e = q->elevator; | 271 | elevator_t *e = q->elevator; |
272 | 272 | ||
273 | if (e->ops->elevator_activate_req_fn) | 273 | if (e->ops->elevator_activate_req_fn) |
274 | e->ops->elevator_activate_req_fn(q, rq); | 274 | e->ops->elevator_activate_req_fn(q, rq); |
275 | } | 275 | } |
276 | 276 | ||
277 | static void elv_deactivate_rq(struct request_queue *q, struct request *rq) | 277 | static void elv_deactivate_rq(struct request_queue *q, struct request *rq) |
278 | { | 278 | { |
279 | elevator_t *e = q->elevator; | 279 | elevator_t *e = q->elevator; |
280 | 280 | ||
281 | if (e->ops->elevator_deactivate_req_fn) | 281 | if (e->ops->elevator_deactivate_req_fn) |
282 | e->ops->elevator_deactivate_req_fn(q, rq); | 282 | e->ops->elevator_deactivate_req_fn(q, rq); |
283 | } | 283 | } |
284 | 284 | ||
285 | static inline void __elv_rqhash_del(struct request *rq) | 285 | static inline void __elv_rqhash_del(struct request *rq) |
286 | { | 286 | { |
287 | hlist_del_init(&rq->hash); | 287 | hlist_del_init(&rq->hash); |
288 | } | 288 | } |
289 | 289 | ||
290 | static void elv_rqhash_del(struct request_queue *q, struct request *rq) | 290 | static void elv_rqhash_del(struct request_queue *q, struct request *rq) |
291 | { | 291 | { |
292 | if (ELV_ON_HASH(rq)) | 292 | if (ELV_ON_HASH(rq)) |
293 | __elv_rqhash_del(rq); | 293 | __elv_rqhash_del(rq); |
294 | } | 294 | } |
295 | 295 | ||
296 | static void elv_rqhash_add(struct request_queue *q, struct request *rq) | 296 | static void elv_rqhash_add(struct request_queue *q, struct request *rq) |
297 | { | 297 | { |
298 | elevator_t *e = q->elevator; | 298 | elevator_t *e = q->elevator; |
299 | 299 | ||
300 | BUG_ON(ELV_ON_HASH(rq)); | 300 | BUG_ON(ELV_ON_HASH(rq)); |
301 | hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); | 301 | hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); |
302 | } | 302 | } |
303 | 303 | ||
304 | static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) | 304 | static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) |
305 | { | 305 | { |
306 | __elv_rqhash_del(rq); | 306 | __elv_rqhash_del(rq); |
307 | elv_rqhash_add(q, rq); | 307 | elv_rqhash_add(q, rq); |
308 | } | 308 | } |
309 | 309 | ||
310 | static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) | 310 | static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) |
311 | { | 311 | { |
312 | elevator_t *e = q->elevator; | 312 | elevator_t *e = q->elevator; |
313 | struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; | 313 | struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; |
314 | struct hlist_node *entry, *next; | 314 | struct hlist_node *entry, *next; |
315 | struct request *rq; | 315 | struct request *rq; |
316 | 316 | ||
317 | hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { | 317 | hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { |
318 | BUG_ON(!ELV_ON_HASH(rq)); | 318 | BUG_ON(!ELV_ON_HASH(rq)); |
319 | 319 | ||
320 | if (unlikely(!rq_mergeable(rq))) { | 320 | if (unlikely(!rq_mergeable(rq))) { |
321 | __elv_rqhash_del(rq); | 321 | __elv_rqhash_del(rq); |
322 | continue; | 322 | continue; |
323 | } | 323 | } |
324 | 324 | ||
325 | if (rq_hash_key(rq) == offset) | 325 | if (rq_hash_key(rq) == offset) |
326 | return rq; | 326 | return rq; |
327 | } | 327 | } |
328 | 328 | ||
329 | return NULL; | 329 | return NULL; |
330 | } | 330 | } |
331 | 331 | ||
332 | /* | 332 | /* |
333 | * RB-tree support functions for inserting/lookup/removal of requests | 333 | * RB-tree support functions for inserting/lookup/removal of requests |
334 | * in a sorted RB tree. | 334 | * in a sorted RB tree. |
335 | */ | 335 | */ |
336 | struct request *elv_rb_add(struct rb_root *root, struct request *rq) | 336 | struct request *elv_rb_add(struct rb_root *root, struct request *rq) |
337 | { | 337 | { |
338 | struct rb_node **p = &root->rb_node; | 338 | struct rb_node **p = &root->rb_node; |
339 | struct rb_node *parent = NULL; | 339 | struct rb_node *parent = NULL; |
340 | struct request *__rq; | 340 | struct request *__rq; |
341 | 341 | ||
342 | while (*p) { | 342 | while (*p) { |
343 | parent = *p; | 343 | parent = *p; |
344 | __rq = rb_entry(parent, struct request, rb_node); | 344 | __rq = rb_entry(parent, struct request, rb_node); |
345 | 345 | ||
346 | if (rq->sector < __rq->sector) | 346 | if (rq->sector < __rq->sector) |
347 | p = &(*p)->rb_left; | 347 | p = &(*p)->rb_left; |
348 | else if (rq->sector > __rq->sector) | 348 | else if (rq->sector > __rq->sector) |
349 | p = &(*p)->rb_right; | 349 | p = &(*p)->rb_right; |
350 | else | 350 | else |
351 | return __rq; | 351 | return __rq; |
352 | } | 352 | } |
353 | 353 | ||
354 | rb_link_node(&rq->rb_node, parent, p); | 354 | rb_link_node(&rq->rb_node, parent, p); |
355 | rb_insert_color(&rq->rb_node, root); | 355 | rb_insert_color(&rq->rb_node, root); |
356 | return NULL; | 356 | return NULL; |
357 | } | 357 | } |
358 | 358 | ||
359 | EXPORT_SYMBOL(elv_rb_add); | 359 | EXPORT_SYMBOL(elv_rb_add); |
360 | 360 | ||
361 | void elv_rb_del(struct rb_root *root, struct request *rq) | 361 | void elv_rb_del(struct rb_root *root, struct request *rq) |
362 | { | 362 | { |
363 | BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); | 363 | BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); |
364 | rb_erase(&rq->rb_node, root); | 364 | rb_erase(&rq->rb_node, root); |
365 | RB_CLEAR_NODE(&rq->rb_node); | 365 | RB_CLEAR_NODE(&rq->rb_node); |
366 | } | 366 | } |
367 | 367 | ||
368 | EXPORT_SYMBOL(elv_rb_del); | 368 | EXPORT_SYMBOL(elv_rb_del); |
369 | 369 | ||
370 | struct request *elv_rb_find(struct rb_root *root, sector_t sector) | 370 | struct request *elv_rb_find(struct rb_root *root, sector_t sector) |
371 | { | 371 | { |
372 | struct rb_node *n = root->rb_node; | 372 | struct rb_node *n = root->rb_node; |
373 | struct request *rq; | 373 | struct request *rq; |
374 | 374 | ||
375 | while (n) { | 375 | while (n) { |
376 | rq = rb_entry(n, struct request, rb_node); | 376 | rq = rb_entry(n, struct request, rb_node); |
377 | 377 | ||
378 | if (sector < rq->sector) | 378 | if (sector < rq->sector) |
379 | n = n->rb_left; | 379 | n = n->rb_left; |
380 | else if (sector > rq->sector) | 380 | else if (sector > rq->sector) |
381 | n = n->rb_right; | 381 | n = n->rb_right; |
382 | else | 382 | else |
383 | return rq; | 383 | return rq; |
384 | } | 384 | } |
385 | 385 | ||
386 | return NULL; | 386 | return NULL; |
387 | } | 387 | } |
388 | 388 | ||
389 | EXPORT_SYMBOL(elv_rb_find); | 389 | EXPORT_SYMBOL(elv_rb_find); |
390 | 390 | ||
391 | /* | 391 | /* |
392 | * Insert rq into dispatch queue of q. Queue lock must be held on | 392 | * Insert rq into dispatch queue of q. Queue lock must be held on |
393 | * entry. rq is sort insted into the dispatch queue. To be used by | 393 | * entry. rq is sort insted into the dispatch queue. To be used by |
394 | * specific elevators. | 394 | * specific elevators. |
395 | */ | 395 | */ |
396 | void elv_dispatch_sort(struct request_queue *q, struct request *rq) | 396 | void elv_dispatch_sort(struct request_queue *q, struct request *rq) |
397 | { | 397 | { |
398 | sector_t boundary; | 398 | sector_t boundary; |
399 | struct list_head *entry; | 399 | struct list_head *entry; |
400 | 400 | ||
401 | if (q->last_merge == rq) | 401 | if (q->last_merge == rq) |
402 | q->last_merge = NULL; | 402 | q->last_merge = NULL; |
403 | 403 | ||
404 | elv_rqhash_del(q, rq); | 404 | elv_rqhash_del(q, rq); |
405 | 405 | ||
406 | q->nr_sorted--; | 406 | q->nr_sorted--; |
407 | 407 | ||
408 | boundary = q->end_sector; | 408 | boundary = q->end_sector; |
409 | 409 | ||
410 | list_for_each_prev(entry, &q->queue_head) { | 410 | list_for_each_prev(entry, &q->queue_head) { |
411 | struct request *pos = list_entry_rq(entry); | 411 | struct request *pos = list_entry_rq(entry); |
412 | 412 | ||
413 | if (rq_data_dir(rq) != rq_data_dir(pos)) | 413 | if (rq_data_dir(rq) != rq_data_dir(pos)) |
414 | break; | 414 | break; |
415 | if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) | 415 | if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) |
416 | break; | 416 | break; |
417 | if (rq->sector >= boundary) { | 417 | if (rq->sector >= boundary) { |
418 | if (pos->sector < boundary) | 418 | if (pos->sector < boundary) |
419 | continue; | 419 | continue; |
420 | } else { | 420 | } else { |
421 | if (pos->sector >= boundary) | 421 | if (pos->sector >= boundary) |
422 | break; | 422 | break; |
423 | } | 423 | } |
424 | if (rq->sector >= pos->sector) | 424 | if (rq->sector >= pos->sector) |
425 | break; | 425 | break; |
426 | } | 426 | } |
427 | 427 | ||
428 | list_add(&rq->queuelist, entry); | 428 | list_add(&rq->queuelist, entry); |
429 | } | 429 | } |
430 | 430 | ||
431 | EXPORT_SYMBOL(elv_dispatch_sort); | 431 | EXPORT_SYMBOL(elv_dispatch_sort); |
432 | 432 | ||
433 | /* | 433 | /* |
434 | * Insert rq into dispatch queue of q. Queue lock must be held on | 434 | * Insert rq into dispatch queue of q. Queue lock must be held on |
435 | * entry. rq is added to the back of the dispatch queue. To be used by | 435 | * entry. rq is added to the back of the dispatch queue. To be used by |
436 | * specific elevators. | 436 | * specific elevators. |
437 | */ | 437 | */ |
438 | void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) | 438 | void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) |
439 | { | 439 | { |
440 | if (q->last_merge == rq) | 440 | if (q->last_merge == rq) |
441 | q->last_merge = NULL; | 441 | q->last_merge = NULL; |
442 | 442 | ||
443 | elv_rqhash_del(q, rq); | 443 | elv_rqhash_del(q, rq); |
444 | 444 | ||
445 | q->nr_sorted--; | 445 | q->nr_sorted--; |
446 | 446 | ||
447 | q->end_sector = rq_end_sector(rq); | 447 | q->end_sector = rq_end_sector(rq); |
448 | q->boundary_rq = rq; | 448 | q->boundary_rq = rq; |
449 | list_add_tail(&rq->queuelist, &q->queue_head); | 449 | list_add_tail(&rq->queuelist, &q->queue_head); |
450 | } | 450 | } |
451 | 451 | ||
452 | EXPORT_SYMBOL(elv_dispatch_add_tail); | 452 | EXPORT_SYMBOL(elv_dispatch_add_tail); |
453 | 453 | ||
454 | int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) | 454 | int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) |
455 | { | 455 | { |
456 | elevator_t *e = q->elevator; | 456 | elevator_t *e = q->elevator; |
457 | struct request *__rq; | 457 | struct request *__rq; |
458 | int ret; | 458 | int ret; |
459 | 459 | ||
460 | /* | 460 | /* |
461 | * First try one-hit cache. | 461 | * First try one-hit cache. |
462 | */ | 462 | */ |
463 | if (q->last_merge) { | 463 | if (q->last_merge) { |
464 | ret = elv_try_merge(q->last_merge, bio); | 464 | ret = elv_try_merge(q->last_merge, bio); |
465 | if (ret != ELEVATOR_NO_MERGE) { | 465 | if (ret != ELEVATOR_NO_MERGE) { |
466 | *req = q->last_merge; | 466 | *req = q->last_merge; |
467 | return ret; | 467 | return ret; |
468 | } | 468 | } |
469 | } | 469 | } |
470 | 470 | ||
471 | /* | 471 | /* |
472 | * See if our hash lookup can find a potential backmerge. | 472 | * See if our hash lookup can find a potential backmerge. |
473 | */ | 473 | */ |
474 | __rq = elv_rqhash_find(q, bio->bi_sector); | 474 | __rq = elv_rqhash_find(q, bio->bi_sector); |
475 | if (__rq && elv_rq_merge_ok(__rq, bio)) { | 475 | if (__rq && elv_rq_merge_ok(__rq, bio)) { |
476 | *req = __rq; | 476 | *req = __rq; |
477 | return ELEVATOR_BACK_MERGE; | 477 | return ELEVATOR_BACK_MERGE; |
478 | } | 478 | } |
479 | 479 | ||
480 | if (e->ops->elevator_merge_fn) | 480 | if (e->ops->elevator_merge_fn) |
481 | return e->ops->elevator_merge_fn(q, req, bio); | 481 | return e->ops->elevator_merge_fn(q, req, bio); |
482 | 482 | ||
483 | return ELEVATOR_NO_MERGE; | 483 | return ELEVATOR_NO_MERGE; |
484 | } | 484 | } |
485 | 485 | ||
486 | void elv_merged_request(struct request_queue *q, struct request *rq, int type) | 486 | void elv_merged_request(struct request_queue *q, struct request *rq, int type) |
487 | { | 487 | { |
488 | elevator_t *e = q->elevator; | 488 | elevator_t *e = q->elevator; |
489 | 489 | ||
490 | if (e->ops->elevator_merged_fn) | 490 | if (e->ops->elevator_merged_fn) |
491 | e->ops->elevator_merged_fn(q, rq, type); | 491 | e->ops->elevator_merged_fn(q, rq, type); |
492 | 492 | ||
493 | if (type == ELEVATOR_BACK_MERGE) | 493 | if (type == ELEVATOR_BACK_MERGE) |
494 | elv_rqhash_reposition(q, rq); | 494 | elv_rqhash_reposition(q, rq); |
495 | 495 | ||
496 | q->last_merge = rq; | 496 | q->last_merge = rq; |
497 | } | 497 | } |
498 | 498 | ||
499 | void elv_merge_requests(struct request_queue *q, struct request *rq, | 499 | void elv_merge_requests(struct request_queue *q, struct request *rq, |
500 | struct request *next) | 500 | struct request *next) |
501 | { | 501 | { |
502 | elevator_t *e = q->elevator; | 502 | elevator_t *e = q->elevator; |
503 | 503 | ||
504 | if (e->ops->elevator_merge_req_fn) | 504 | if (e->ops->elevator_merge_req_fn) |
505 | e->ops->elevator_merge_req_fn(q, rq, next); | 505 | e->ops->elevator_merge_req_fn(q, rq, next); |
506 | 506 | ||
507 | elv_rqhash_reposition(q, rq); | 507 | elv_rqhash_reposition(q, rq); |
508 | elv_rqhash_del(q, next); | 508 | elv_rqhash_del(q, next); |
509 | 509 | ||
510 | q->nr_sorted--; | 510 | q->nr_sorted--; |
511 | q->last_merge = rq; | 511 | q->last_merge = rq; |
512 | } | 512 | } |
513 | 513 | ||
514 | void elv_requeue_request(struct request_queue *q, struct request *rq) | 514 | void elv_requeue_request(struct request_queue *q, struct request *rq) |
515 | { | 515 | { |
516 | /* | 516 | /* |
517 | * it already went through dequeue, we need to decrement the | 517 | * it already went through dequeue, we need to decrement the |
518 | * in_flight count again | 518 | * in_flight count again |
519 | */ | 519 | */ |
520 | if (blk_account_rq(rq)) { | 520 | if (blk_account_rq(rq)) { |
521 | q->in_flight--; | 521 | q->in_flight--; |
522 | if (blk_sorted_rq(rq)) | 522 | if (blk_sorted_rq(rq)) |
523 | elv_deactivate_rq(q, rq); | 523 | elv_deactivate_rq(q, rq); |
524 | } | 524 | } |
525 | 525 | ||
526 | rq->cmd_flags &= ~REQ_STARTED; | 526 | rq->cmd_flags &= ~REQ_STARTED; |
527 | 527 | ||
528 | elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); | 528 | elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); |
529 | } | 529 | } |
530 | 530 | ||
531 | static void elv_drain_elevator(struct request_queue *q) | 531 | static void elv_drain_elevator(struct request_queue *q) |
532 | { | 532 | { |
533 | static int printed; | 533 | static int printed; |
534 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | 534 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) |
535 | ; | 535 | ; |
536 | if (q->nr_sorted == 0) | 536 | if (q->nr_sorted == 0) |
537 | return; | 537 | return; |
538 | if (printed++ < 10) { | 538 | if (printed++ < 10) { |
539 | printk(KERN_ERR "%s: forced dispatching is broken " | 539 | printk(KERN_ERR "%s: forced dispatching is broken " |
540 | "(nr_sorted=%u), please report this\n", | 540 | "(nr_sorted=%u), please report this\n", |
541 | q->elevator->elevator_type->elevator_name, q->nr_sorted); | 541 | q->elevator->elevator_type->elevator_name, q->nr_sorted); |
542 | } | 542 | } |
543 | } | 543 | } |
544 | 544 | ||
545 | void elv_insert(struct request_queue *q, struct request *rq, int where) | 545 | void elv_insert(struct request_queue *q, struct request *rq, int where) |
546 | { | 546 | { |
547 | struct list_head *pos; | 547 | struct list_head *pos; |
548 | unsigned ordseq; | 548 | unsigned ordseq; |
549 | int unplug_it = 1; | 549 | int unplug_it = 1; |
550 | 550 | ||
551 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); | 551 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); |
552 | 552 | ||
553 | rq->q = q; | 553 | rq->q = q; |
554 | 554 | ||
555 | switch (where) { | 555 | switch (where) { |
556 | case ELEVATOR_INSERT_FRONT: | 556 | case ELEVATOR_INSERT_FRONT: |
557 | rq->cmd_flags |= REQ_SOFTBARRIER; | 557 | rq->cmd_flags |= REQ_SOFTBARRIER; |
558 | 558 | ||
559 | list_add(&rq->queuelist, &q->queue_head); | 559 | list_add(&rq->queuelist, &q->queue_head); |
560 | break; | 560 | break; |
561 | 561 | ||
562 | case ELEVATOR_INSERT_BACK: | 562 | case ELEVATOR_INSERT_BACK: |
563 | rq->cmd_flags |= REQ_SOFTBARRIER; | 563 | rq->cmd_flags |= REQ_SOFTBARRIER; |
564 | elv_drain_elevator(q); | 564 | elv_drain_elevator(q); |
565 | list_add_tail(&rq->queuelist, &q->queue_head); | 565 | list_add_tail(&rq->queuelist, &q->queue_head); |
566 | /* | 566 | /* |
567 | * We kick the queue here for the following reasons. | 567 | * We kick the queue here for the following reasons. |
568 | * - The elevator might have returned NULL previously | 568 | * - The elevator might have returned NULL previously |
569 | * to delay requests and returned them now. As the | 569 | * to delay requests and returned them now. As the |
570 | * queue wasn't empty before this request, ll_rw_blk | 570 | * queue wasn't empty before this request, ll_rw_blk |
571 | * won't run the queue on return, resulting in hang. | 571 | * won't run the queue on return, resulting in hang. |
572 | * - Usually, back inserted requests won't be merged | 572 | * - Usually, back inserted requests won't be merged |
573 | * with anything. There's no point in delaying queue | 573 | * with anything. There's no point in delaying queue |
574 | * processing. | 574 | * processing. |
575 | */ | 575 | */ |
576 | blk_remove_plug(q); | 576 | blk_remove_plug(q); |
577 | q->request_fn(q); | 577 | q->request_fn(q); |
578 | break; | 578 | break; |
579 | 579 | ||
580 | case ELEVATOR_INSERT_SORT: | 580 | case ELEVATOR_INSERT_SORT: |
581 | BUG_ON(!blk_fs_request(rq)); | 581 | BUG_ON(!blk_fs_request(rq)); |
582 | rq->cmd_flags |= REQ_SORTED; | 582 | rq->cmd_flags |= REQ_SORTED; |
583 | q->nr_sorted++; | 583 | q->nr_sorted++; |
584 | if (rq_mergeable(rq)) { | 584 | if (rq_mergeable(rq)) { |
585 | elv_rqhash_add(q, rq); | 585 | elv_rqhash_add(q, rq); |
586 | if (!q->last_merge) | 586 | if (!q->last_merge) |
587 | q->last_merge = rq; | 587 | q->last_merge = rq; |
588 | } | 588 | } |
589 | 589 | ||
590 | /* | 590 | /* |
591 | * Some ioscheds (cfq) run q->request_fn directly, so | 591 | * Some ioscheds (cfq) run q->request_fn directly, so |
592 | * rq cannot be accessed after calling | 592 | * rq cannot be accessed after calling |
593 | * elevator_add_req_fn. | 593 | * elevator_add_req_fn. |
594 | */ | 594 | */ |
595 | q->elevator->ops->elevator_add_req_fn(q, rq); | 595 | q->elevator->ops->elevator_add_req_fn(q, rq); |
596 | break; | 596 | break; |
597 | 597 | ||
598 | case ELEVATOR_INSERT_REQUEUE: | 598 | case ELEVATOR_INSERT_REQUEUE: |
599 | /* | 599 | /* |
600 | * If ordered flush isn't in progress, we do front | 600 | * If ordered flush isn't in progress, we do front |
601 | * insertion; otherwise, requests should be requeued | 601 | * insertion; otherwise, requests should be requeued |
602 | * in ordseq order. | 602 | * in ordseq order. |
603 | */ | 603 | */ |
604 | rq->cmd_flags |= REQ_SOFTBARRIER; | 604 | rq->cmd_flags |= REQ_SOFTBARRIER; |
605 | 605 | ||
606 | /* | 606 | /* |
607 | * Most requeues happen because of a busy condition, | 607 | * Most requeues happen because of a busy condition, |
608 | * don't force unplug of the queue for that case. | 608 | * don't force unplug of the queue for that case. |
609 | */ | 609 | */ |
610 | unplug_it = 0; | 610 | unplug_it = 0; |
611 | 611 | ||
612 | if (q->ordseq == 0) { | 612 | if (q->ordseq == 0) { |
613 | list_add(&rq->queuelist, &q->queue_head); | 613 | list_add(&rq->queuelist, &q->queue_head); |
614 | break; | 614 | break; |
615 | } | 615 | } |
616 | 616 | ||
617 | ordseq = blk_ordered_req_seq(rq); | 617 | ordseq = blk_ordered_req_seq(rq); |
618 | 618 | ||
619 | list_for_each(pos, &q->queue_head) { | 619 | list_for_each(pos, &q->queue_head) { |
620 | struct request *pos_rq = list_entry_rq(pos); | 620 | struct request *pos_rq = list_entry_rq(pos); |
621 | if (ordseq <= blk_ordered_req_seq(pos_rq)) | 621 | if (ordseq <= blk_ordered_req_seq(pos_rq)) |
622 | break; | 622 | break; |
623 | } | 623 | } |
624 | 624 | ||
625 | list_add_tail(&rq->queuelist, pos); | 625 | list_add_tail(&rq->queuelist, pos); |
626 | break; | 626 | break; |
627 | 627 | ||
628 | default: | 628 | default: |
629 | printk(KERN_ERR "%s: bad insertion point %d\n", | 629 | printk(KERN_ERR "%s: bad insertion point %d\n", |
630 | __FUNCTION__, where); | 630 | __FUNCTION__, where); |
631 | BUG(); | 631 | BUG(); |
632 | } | 632 | } |
633 | 633 | ||
634 | if (unplug_it && blk_queue_plugged(q)) { | 634 | if (unplug_it && blk_queue_plugged(q)) { |
635 | int nrq = q->rq.count[READ] + q->rq.count[WRITE] | 635 | int nrq = q->rq.count[READ] + q->rq.count[WRITE] |
636 | - q->in_flight; | 636 | - q->in_flight; |
637 | 637 | ||
638 | if (nrq >= q->unplug_thresh) | 638 | if (nrq >= q->unplug_thresh) |
639 | __generic_unplug_device(q); | 639 | __generic_unplug_device(q); |
640 | } | 640 | } |
641 | } | 641 | } |
642 | 642 | ||
643 | void __elv_add_request(struct request_queue *q, struct request *rq, int where, | 643 | void __elv_add_request(struct request_queue *q, struct request *rq, int where, |
644 | int plug) | 644 | int plug) |
645 | { | 645 | { |
646 | if (q->ordcolor) | 646 | if (q->ordcolor) |
647 | rq->cmd_flags |= REQ_ORDERED_COLOR; | 647 | rq->cmd_flags |= REQ_ORDERED_COLOR; |
648 | 648 | ||
649 | if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { | 649 | if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { |
650 | /* | 650 | /* |
651 | * toggle ordered color | 651 | * toggle ordered color |
652 | */ | 652 | */ |
653 | if (blk_barrier_rq(rq)) | 653 | if (blk_barrier_rq(rq)) |
654 | q->ordcolor ^= 1; | 654 | q->ordcolor ^= 1; |
655 | 655 | ||
656 | /* | 656 | /* |
657 | * barriers implicitly indicate back insertion | 657 | * barriers implicitly indicate back insertion |
658 | */ | 658 | */ |
659 | if (where == ELEVATOR_INSERT_SORT) | 659 | if (where == ELEVATOR_INSERT_SORT) |
660 | where = ELEVATOR_INSERT_BACK; | 660 | where = ELEVATOR_INSERT_BACK; |
661 | 661 | ||
662 | /* | 662 | /* |
663 | * this request is scheduling boundary, update | 663 | * this request is scheduling boundary, update |
664 | * end_sector | 664 | * end_sector |
665 | */ | 665 | */ |
666 | if (blk_fs_request(rq)) { | 666 | if (blk_fs_request(rq)) { |
667 | q->end_sector = rq_end_sector(rq); | 667 | q->end_sector = rq_end_sector(rq); |
668 | q->boundary_rq = rq; | 668 | q->boundary_rq = rq; |
669 | } | 669 | } |
670 | } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) | 670 | } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) |
671 | where = ELEVATOR_INSERT_BACK; | 671 | where = ELEVATOR_INSERT_BACK; |
672 | 672 | ||
673 | if (plug) | 673 | if (plug) |
674 | blk_plug_device(q); | 674 | blk_plug_device(q); |
675 | 675 | ||
676 | elv_insert(q, rq, where); | 676 | elv_insert(q, rq, where); |
677 | } | 677 | } |
678 | 678 | ||
679 | EXPORT_SYMBOL(__elv_add_request); | 679 | EXPORT_SYMBOL(__elv_add_request); |
680 | 680 | ||
681 | void elv_add_request(struct request_queue *q, struct request *rq, int where, | 681 | void elv_add_request(struct request_queue *q, struct request *rq, int where, |
682 | int plug) | 682 | int plug) |
683 | { | 683 | { |
684 | unsigned long flags; | 684 | unsigned long flags; |
685 | 685 | ||
686 | spin_lock_irqsave(q->queue_lock, flags); | 686 | spin_lock_irqsave(q->queue_lock, flags); |
687 | __elv_add_request(q, rq, where, plug); | 687 | __elv_add_request(q, rq, where, plug); |
688 | spin_unlock_irqrestore(q->queue_lock, flags); | 688 | spin_unlock_irqrestore(q->queue_lock, flags); |
689 | } | 689 | } |
690 | 690 | ||
691 | EXPORT_SYMBOL(elv_add_request); | 691 | EXPORT_SYMBOL(elv_add_request); |
692 | 692 | ||
693 | static inline struct request *__elv_next_request(struct request_queue *q) | 693 | static inline struct request *__elv_next_request(struct request_queue *q) |
694 | { | 694 | { |
695 | struct request *rq; | 695 | struct request *rq; |
696 | 696 | ||
697 | while (1) { | 697 | while (1) { |
698 | while (!list_empty(&q->queue_head)) { | 698 | while (!list_empty(&q->queue_head)) { |
699 | rq = list_entry_rq(q->queue_head.next); | 699 | rq = list_entry_rq(q->queue_head.next); |
700 | if (blk_do_ordered(q, &rq)) | 700 | if (blk_do_ordered(q, &rq)) |
701 | return rq; | 701 | return rq; |
702 | } | 702 | } |
703 | 703 | ||
704 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) | 704 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) |
705 | return NULL; | 705 | return NULL; |
706 | } | 706 | } |
707 | } | 707 | } |
708 | 708 | ||
709 | struct request *elv_next_request(struct request_queue *q) | 709 | struct request *elv_next_request(struct request_queue *q) |
710 | { | 710 | { |
711 | struct request *rq; | 711 | struct request *rq; |
712 | int ret; | 712 | int ret; |
713 | 713 | ||
714 | while ((rq = __elv_next_request(q)) != NULL) { | 714 | while ((rq = __elv_next_request(q)) != NULL) { |
715 | if (!(rq->cmd_flags & REQ_STARTED)) { | 715 | if (!(rq->cmd_flags & REQ_STARTED)) { |
716 | /* | 716 | /* |
717 | * This is the first time the device driver | 717 | * This is the first time the device driver |
718 | * sees this request (possibly after | 718 | * sees this request (possibly after |
719 | * requeueing). Notify IO scheduler. | 719 | * requeueing). Notify IO scheduler. |
720 | */ | 720 | */ |
721 | if (blk_sorted_rq(rq)) | 721 | if (blk_sorted_rq(rq)) |
722 | elv_activate_rq(q, rq); | 722 | elv_activate_rq(q, rq); |
723 | 723 | ||
724 | /* | 724 | /* |
725 | * just mark as started even if we don't start | 725 | * just mark as started even if we don't start |
726 | * it, a request that has been delayed should | 726 | * it, a request that has been delayed should |
727 | * not be passed by new incoming requests | 727 | * not be passed by new incoming requests |
728 | */ | 728 | */ |
729 | rq->cmd_flags |= REQ_STARTED; | 729 | rq->cmd_flags |= REQ_STARTED; |
730 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); | 730 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); |
731 | } | 731 | } |
732 | 732 | ||
733 | if (!q->boundary_rq || q->boundary_rq == rq) { | 733 | if (!q->boundary_rq || q->boundary_rq == rq) { |
734 | q->end_sector = rq_end_sector(rq); | 734 | q->end_sector = rq_end_sector(rq); |
735 | q->boundary_rq = NULL; | 735 | q->boundary_rq = NULL; |
736 | } | 736 | } |
737 | 737 | ||
738 | if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) | 738 | if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) |
739 | break; | 739 | break; |
740 | 740 | ||
741 | ret = q->prep_rq_fn(q, rq); | 741 | ret = q->prep_rq_fn(q, rq); |
742 | if (ret == BLKPREP_OK) { | 742 | if (ret == BLKPREP_OK) { |
743 | break; | 743 | break; |
744 | } else if (ret == BLKPREP_DEFER) { | 744 | } else if (ret == BLKPREP_DEFER) { |
745 | /* | 745 | /* |
746 | * the request may have been (partially) prepped. | 746 | * the request may have been (partially) prepped. |
747 | * we need to keep this request in the front to | 747 | * we need to keep this request in the front to |
748 | * avoid resource deadlock. REQ_STARTED will | 748 | * avoid resource deadlock. REQ_STARTED will |
749 | * prevent other fs requests from passing this one. | 749 | * prevent other fs requests from passing this one. |
750 | */ | 750 | */ |
751 | rq = NULL; | 751 | rq = NULL; |
752 | break; | 752 | break; |
753 | } else if (ret == BLKPREP_KILL) { | 753 | } else if (ret == BLKPREP_KILL) { |
754 | int nr_bytes = rq->hard_nr_sectors << 9; | ||
755 | |||
756 | if (!nr_bytes) | ||
757 | nr_bytes = rq->data_len; | ||
758 | |||
759 | blkdev_dequeue_request(rq); | ||
760 | rq->cmd_flags |= REQ_QUIET; | 754 | rq->cmd_flags |= REQ_QUIET; |
761 | end_that_request_chunk(rq, 0, nr_bytes); | 755 | end_queued_request(rq, 0); |
762 | end_that_request_last(rq, 0); | ||
763 | } else { | 756 | } else { |
764 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, | 757 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, |
765 | ret); | 758 | ret); |
766 | break; | 759 | break; |
767 | } | 760 | } |
768 | } | 761 | } |
769 | 762 | ||
770 | return rq; | 763 | return rq; |
771 | } | 764 | } |
772 | 765 | ||
773 | EXPORT_SYMBOL(elv_next_request); | 766 | EXPORT_SYMBOL(elv_next_request); |
774 | 767 | ||
775 | void elv_dequeue_request(struct request_queue *q, struct request *rq) | 768 | void elv_dequeue_request(struct request_queue *q, struct request *rq) |
776 | { | 769 | { |
777 | BUG_ON(list_empty(&rq->queuelist)); | 770 | BUG_ON(list_empty(&rq->queuelist)); |
778 | BUG_ON(ELV_ON_HASH(rq)); | 771 | BUG_ON(ELV_ON_HASH(rq)); |
779 | 772 | ||
780 | list_del_init(&rq->queuelist); | 773 | list_del_init(&rq->queuelist); |
781 | 774 | ||
782 | /* | 775 | /* |
783 | * the time frame between a request being removed from the lists | 776 | * the time frame between a request being removed from the lists |
784 | * and to it is freed is accounted as io that is in progress at | 777 | * and to it is freed is accounted as io that is in progress at |
785 | * the driver side. | 778 | * the driver side. |
786 | */ | 779 | */ |
787 | if (blk_account_rq(rq)) | 780 | if (blk_account_rq(rq)) |
788 | q->in_flight++; | 781 | q->in_flight++; |
789 | } | 782 | } |
790 | 783 | ||
791 | EXPORT_SYMBOL(elv_dequeue_request); | 784 | EXPORT_SYMBOL(elv_dequeue_request); |
792 | 785 | ||
793 | int elv_queue_empty(struct request_queue *q) | 786 | int elv_queue_empty(struct request_queue *q) |
794 | { | 787 | { |
795 | elevator_t *e = q->elevator; | 788 | elevator_t *e = q->elevator; |
796 | 789 | ||
797 | if (!list_empty(&q->queue_head)) | 790 | if (!list_empty(&q->queue_head)) |
798 | return 0; | 791 | return 0; |
799 | 792 | ||
800 | if (e->ops->elevator_queue_empty_fn) | 793 | if (e->ops->elevator_queue_empty_fn) |
801 | return e->ops->elevator_queue_empty_fn(q); | 794 | return e->ops->elevator_queue_empty_fn(q); |
802 | 795 | ||
803 | return 1; | 796 | return 1; |
804 | } | 797 | } |
805 | 798 | ||
806 | EXPORT_SYMBOL(elv_queue_empty); | 799 | EXPORT_SYMBOL(elv_queue_empty); |
807 | 800 | ||
808 | struct request *elv_latter_request(struct request_queue *q, struct request *rq) | 801 | struct request *elv_latter_request(struct request_queue *q, struct request *rq) |
809 | { | 802 | { |
810 | elevator_t *e = q->elevator; | 803 | elevator_t *e = q->elevator; |
811 | 804 | ||
812 | if (e->ops->elevator_latter_req_fn) | 805 | if (e->ops->elevator_latter_req_fn) |
813 | return e->ops->elevator_latter_req_fn(q, rq); | 806 | return e->ops->elevator_latter_req_fn(q, rq); |
814 | return NULL; | 807 | return NULL; |
815 | } | 808 | } |
816 | 809 | ||
817 | struct request *elv_former_request(struct request_queue *q, struct request *rq) | 810 | struct request *elv_former_request(struct request_queue *q, struct request *rq) |
818 | { | 811 | { |
819 | elevator_t *e = q->elevator; | 812 | elevator_t *e = q->elevator; |
820 | 813 | ||
821 | if (e->ops->elevator_former_req_fn) | 814 | if (e->ops->elevator_former_req_fn) |
822 | return e->ops->elevator_former_req_fn(q, rq); | 815 | return e->ops->elevator_former_req_fn(q, rq); |
823 | return NULL; | 816 | return NULL; |
824 | } | 817 | } |
825 | 818 | ||
826 | int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | 819 | int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
827 | { | 820 | { |
828 | elevator_t *e = q->elevator; | 821 | elevator_t *e = q->elevator; |
829 | 822 | ||
830 | if (e->ops->elevator_set_req_fn) | 823 | if (e->ops->elevator_set_req_fn) |
831 | return e->ops->elevator_set_req_fn(q, rq, gfp_mask); | 824 | return e->ops->elevator_set_req_fn(q, rq, gfp_mask); |
832 | 825 | ||
833 | rq->elevator_private = NULL; | 826 | rq->elevator_private = NULL; |
834 | return 0; | 827 | return 0; |
835 | } | 828 | } |
836 | 829 | ||
837 | void elv_put_request(struct request_queue *q, struct request *rq) | 830 | void elv_put_request(struct request_queue *q, struct request *rq) |
838 | { | 831 | { |
839 | elevator_t *e = q->elevator; | 832 | elevator_t *e = q->elevator; |
840 | 833 | ||
841 | if (e->ops->elevator_put_req_fn) | 834 | if (e->ops->elevator_put_req_fn) |
842 | e->ops->elevator_put_req_fn(rq); | 835 | e->ops->elevator_put_req_fn(rq); |
843 | } | 836 | } |
844 | 837 | ||
845 | int elv_may_queue(struct request_queue *q, int rw) | 838 | int elv_may_queue(struct request_queue *q, int rw) |
846 | { | 839 | { |
847 | elevator_t *e = q->elevator; | 840 | elevator_t *e = q->elevator; |
848 | 841 | ||
849 | if (e->ops->elevator_may_queue_fn) | 842 | if (e->ops->elevator_may_queue_fn) |
850 | return e->ops->elevator_may_queue_fn(q, rw); | 843 | return e->ops->elevator_may_queue_fn(q, rw); |
851 | 844 | ||
852 | return ELV_MQUEUE_MAY; | 845 | return ELV_MQUEUE_MAY; |
853 | } | 846 | } |
854 | 847 | ||
855 | void elv_completed_request(struct request_queue *q, struct request *rq) | 848 | void elv_completed_request(struct request_queue *q, struct request *rq) |
856 | { | 849 | { |
857 | elevator_t *e = q->elevator; | 850 | elevator_t *e = q->elevator; |
858 | 851 | ||
859 | /* | 852 | /* |
860 | * request is released from the driver, io must be done | 853 | * request is released from the driver, io must be done |
861 | */ | 854 | */ |
862 | if (blk_account_rq(rq)) { | 855 | if (blk_account_rq(rq)) { |
863 | q->in_flight--; | 856 | q->in_flight--; |
864 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) | 857 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) |
865 | e->ops->elevator_completed_req_fn(q, rq); | 858 | e->ops->elevator_completed_req_fn(q, rq); |
866 | } | 859 | } |
867 | 860 | ||
868 | /* | 861 | /* |
869 | * Check if the queue is waiting for fs requests to be | 862 | * Check if the queue is waiting for fs requests to be |
870 | * drained for flush sequence. | 863 | * drained for flush sequence. |
871 | */ | 864 | */ |
872 | if (unlikely(q->ordseq)) { | 865 | if (unlikely(q->ordseq)) { |
873 | struct request *first_rq = list_entry_rq(q->queue_head.next); | 866 | struct request *first_rq = list_entry_rq(q->queue_head.next); |
874 | if (q->in_flight == 0 && | 867 | if (q->in_flight == 0 && |
875 | blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && | 868 | blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && |
876 | blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { | 869 | blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { |
877 | blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); | 870 | blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); |
878 | q->request_fn(q); | 871 | q->request_fn(q); |
879 | } | 872 | } |
880 | } | 873 | } |
881 | } | 874 | } |
882 | 875 | ||
883 | #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) | 876 | #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) |
884 | 877 | ||
885 | static ssize_t | 878 | static ssize_t |
886 | elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | 879 | elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) |
887 | { | 880 | { |
888 | elevator_t *e = container_of(kobj, elevator_t, kobj); | 881 | elevator_t *e = container_of(kobj, elevator_t, kobj); |
889 | struct elv_fs_entry *entry = to_elv(attr); | 882 | struct elv_fs_entry *entry = to_elv(attr); |
890 | ssize_t error; | 883 | ssize_t error; |
891 | 884 | ||
892 | if (!entry->show) | 885 | if (!entry->show) |
893 | return -EIO; | 886 | return -EIO; |
894 | 887 | ||
895 | mutex_lock(&e->sysfs_lock); | 888 | mutex_lock(&e->sysfs_lock); |
896 | error = e->ops ? entry->show(e, page) : -ENOENT; | 889 | error = e->ops ? entry->show(e, page) : -ENOENT; |
897 | mutex_unlock(&e->sysfs_lock); | 890 | mutex_unlock(&e->sysfs_lock); |
898 | return error; | 891 | return error; |
899 | } | 892 | } |
900 | 893 | ||
901 | static ssize_t | 894 | static ssize_t |
902 | elv_attr_store(struct kobject *kobj, struct attribute *attr, | 895 | elv_attr_store(struct kobject *kobj, struct attribute *attr, |
903 | const char *page, size_t length) | 896 | const char *page, size_t length) |
904 | { | 897 | { |
905 | elevator_t *e = container_of(kobj, elevator_t, kobj); | 898 | elevator_t *e = container_of(kobj, elevator_t, kobj); |
906 | struct elv_fs_entry *entry = to_elv(attr); | 899 | struct elv_fs_entry *entry = to_elv(attr); |
907 | ssize_t error; | 900 | ssize_t error; |
908 | 901 | ||
909 | if (!entry->store) | 902 | if (!entry->store) |
910 | return -EIO; | 903 | return -EIO; |
911 | 904 | ||
912 | mutex_lock(&e->sysfs_lock); | 905 | mutex_lock(&e->sysfs_lock); |
913 | error = e->ops ? entry->store(e, page, length) : -ENOENT; | 906 | error = e->ops ? entry->store(e, page, length) : -ENOENT; |
914 | mutex_unlock(&e->sysfs_lock); | 907 | mutex_unlock(&e->sysfs_lock); |
915 | return error; | 908 | return error; |
916 | } | 909 | } |
917 | 910 | ||
918 | static struct sysfs_ops elv_sysfs_ops = { | 911 | static struct sysfs_ops elv_sysfs_ops = { |
919 | .show = elv_attr_show, | 912 | .show = elv_attr_show, |
920 | .store = elv_attr_store, | 913 | .store = elv_attr_store, |
921 | }; | 914 | }; |
922 | 915 | ||
923 | static struct kobj_type elv_ktype = { | 916 | static struct kobj_type elv_ktype = { |
924 | .sysfs_ops = &elv_sysfs_ops, | 917 | .sysfs_ops = &elv_sysfs_ops, |
925 | .release = elevator_release, | 918 | .release = elevator_release, |
926 | }; | 919 | }; |
927 | 920 | ||
928 | int elv_register_queue(struct request_queue *q) | 921 | int elv_register_queue(struct request_queue *q) |
929 | { | 922 | { |
930 | elevator_t *e = q->elevator; | 923 | elevator_t *e = q->elevator; |
931 | int error; | 924 | int error; |
932 | 925 | ||
933 | e->kobj.parent = &q->kobj; | 926 | e->kobj.parent = &q->kobj; |
934 | 927 | ||
935 | error = kobject_add(&e->kobj); | 928 | error = kobject_add(&e->kobj); |
936 | if (!error) { | 929 | if (!error) { |
937 | struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; | 930 | struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; |
938 | if (attr) { | 931 | if (attr) { |
939 | while (attr->attr.name) { | 932 | while (attr->attr.name) { |
940 | if (sysfs_create_file(&e->kobj, &attr->attr)) | 933 | if (sysfs_create_file(&e->kobj, &attr->attr)) |
941 | break; | 934 | break; |
942 | attr++; | 935 | attr++; |
943 | } | 936 | } |
944 | } | 937 | } |
945 | kobject_uevent(&e->kobj, KOBJ_ADD); | 938 | kobject_uevent(&e->kobj, KOBJ_ADD); |
946 | } | 939 | } |
947 | return error; | 940 | return error; |
948 | } | 941 | } |
949 | 942 | ||
950 | static void __elv_unregister_queue(elevator_t *e) | 943 | static void __elv_unregister_queue(elevator_t *e) |
951 | { | 944 | { |
952 | kobject_uevent(&e->kobj, KOBJ_REMOVE); | 945 | kobject_uevent(&e->kobj, KOBJ_REMOVE); |
953 | kobject_del(&e->kobj); | 946 | kobject_del(&e->kobj); |
954 | } | 947 | } |
955 | 948 | ||
956 | void elv_unregister_queue(struct request_queue *q) | 949 | void elv_unregister_queue(struct request_queue *q) |
957 | { | 950 | { |
958 | if (q) | 951 | if (q) |
959 | __elv_unregister_queue(q->elevator); | 952 | __elv_unregister_queue(q->elevator); |
960 | } | 953 | } |
961 | 954 | ||
962 | int elv_register(struct elevator_type *e) | 955 | int elv_register(struct elevator_type *e) |
963 | { | 956 | { |
964 | char *def = ""; | 957 | char *def = ""; |
965 | 958 | ||
966 | spin_lock(&elv_list_lock); | 959 | spin_lock(&elv_list_lock); |
967 | BUG_ON(elevator_find(e->elevator_name)); | 960 | BUG_ON(elevator_find(e->elevator_name)); |
968 | list_add_tail(&e->list, &elv_list); | 961 | list_add_tail(&e->list, &elv_list); |
969 | spin_unlock(&elv_list_lock); | 962 | spin_unlock(&elv_list_lock); |
970 | 963 | ||
971 | if (!strcmp(e->elevator_name, chosen_elevator) || | 964 | if (!strcmp(e->elevator_name, chosen_elevator) || |
972 | (!*chosen_elevator && | 965 | (!*chosen_elevator && |
973 | !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) | 966 | !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) |
974 | def = " (default)"; | 967 | def = " (default)"; |
975 | 968 | ||
976 | printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def); | 969 | printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def); |
977 | return 0; | 970 | return 0; |
978 | } | 971 | } |
979 | EXPORT_SYMBOL_GPL(elv_register); | 972 | EXPORT_SYMBOL_GPL(elv_register); |
980 | 973 | ||
981 | void elv_unregister(struct elevator_type *e) | 974 | void elv_unregister(struct elevator_type *e) |
982 | { | 975 | { |
983 | struct task_struct *g, *p; | 976 | struct task_struct *g, *p; |
984 | 977 | ||
985 | /* | 978 | /* |
986 | * Iterate every thread in the process to remove the io contexts. | 979 | * Iterate every thread in the process to remove the io contexts. |
987 | */ | 980 | */ |
988 | if (e->ops.trim) { | 981 | if (e->ops.trim) { |
989 | read_lock(&tasklist_lock); | 982 | read_lock(&tasklist_lock); |
990 | do_each_thread(g, p) { | 983 | do_each_thread(g, p) { |
991 | task_lock(p); | 984 | task_lock(p); |
992 | if (p->io_context) | 985 | if (p->io_context) |
993 | e->ops.trim(p->io_context); | 986 | e->ops.trim(p->io_context); |
994 | task_unlock(p); | 987 | task_unlock(p); |
995 | } while_each_thread(g, p); | 988 | } while_each_thread(g, p); |
996 | read_unlock(&tasklist_lock); | 989 | read_unlock(&tasklist_lock); |
997 | } | 990 | } |
998 | 991 | ||
999 | spin_lock(&elv_list_lock); | 992 | spin_lock(&elv_list_lock); |
1000 | list_del_init(&e->list); | 993 | list_del_init(&e->list); |
1001 | spin_unlock(&elv_list_lock); | 994 | spin_unlock(&elv_list_lock); |
1002 | } | 995 | } |
1003 | EXPORT_SYMBOL_GPL(elv_unregister); | 996 | EXPORT_SYMBOL_GPL(elv_unregister); |
1004 | 997 | ||
1005 | /* | 998 | /* |
1006 | * switch to new_e io scheduler. be careful not to introduce deadlocks - | 999 | * switch to new_e io scheduler. be careful not to introduce deadlocks - |
1007 | * we don't free the old io scheduler, before we have allocated what we | 1000 | * we don't free the old io scheduler, before we have allocated what we |
1008 | * need for the new one. this way we have a chance of going back to the old | 1001 | * need for the new one. this way we have a chance of going back to the old |
1009 | * one, if the new one fails init for some reason. | 1002 | * one, if the new one fails init for some reason. |
1010 | */ | 1003 | */ |
1011 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | 1004 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) |
1012 | { | 1005 | { |
1013 | elevator_t *old_elevator, *e; | 1006 | elevator_t *old_elevator, *e; |
1014 | void *data; | 1007 | void *data; |
1015 | 1008 | ||
1016 | /* | 1009 | /* |
1017 | * Allocate new elevator | 1010 | * Allocate new elevator |
1018 | */ | 1011 | */ |
1019 | e = elevator_alloc(q, new_e); | 1012 | e = elevator_alloc(q, new_e); |
1020 | if (!e) | 1013 | if (!e) |
1021 | return 0; | 1014 | return 0; |
1022 | 1015 | ||
1023 | data = elevator_init_queue(q, e); | 1016 | data = elevator_init_queue(q, e); |
1024 | if (!data) { | 1017 | if (!data) { |
1025 | kobject_put(&e->kobj); | 1018 | kobject_put(&e->kobj); |
1026 | return 0; | 1019 | return 0; |
1027 | } | 1020 | } |
1028 | 1021 | ||
1029 | /* | 1022 | /* |
1030 | * Turn on BYPASS and drain all requests w/ elevator private data | 1023 | * Turn on BYPASS and drain all requests w/ elevator private data |
1031 | */ | 1024 | */ |
1032 | spin_lock_irq(q->queue_lock); | 1025 | spin_lock_irq(q->queue_lock); |
1033 | 1026 | ||
1034 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 1027 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
1035 | 1028 | ||
1036 | elv_drain_elevator(q); | 1029 | elv_drain_elevator(q); |
1037 | 1030 | ||
1038 | while (q->rq.elvpriv) { | 1031 | while (q->rq.elvpriv) { |
1039 | blk_remove_plug(q); | 1032 | blk_remove_plug(q); |
1040 | q->request_fn(q); | 1033 | q->request_fn(q); |
1041 | spin_unlock_irq(q->queue_lock); | 1034 | spin_unlock_irq(q->queue_lock); |
1042 | msleep(10); | 1035 | msleep(10); |
1043 | spin_lock_irq(q->queue_lock); | 1036 | spin_lock_irq(q->queue_lock); |
1044 | elv_drain_elevator(q); | 1037 | elv_drain_elevator(q); |
1045 | } | 1038 | } |
1046 | 1039 | ||
1047 | /* | 1040 | /* |
1048 | * Remember old elevator. | 1041 | * Remember old elevator. |
1049 | */ | 1042 | */ |
1050 | old_elevator = q->elevator; | 1043 | old_elevator = q->elevator; |
1051 | 1044 | ||
1052 | /* | 1045 | /* |
1053 | * attach and start new elevator | 1046 | * attach and start new elevator |
1054 | */ | 1047 | */ |
1055 | elevator_attach(q, e, data); | 1048 | elevator_attach(q, e, data); |
1056 | 1049 | ||
1057 | spin_unlock_irq(q->queue_lock); | 1050 | spin_unlock_irq(q->queue_lock); |
1058 | 1051 | ||
1059 | __elv_unregister_queue(old_elevator); | 1052 | __elv_unregister_queue(old_elevator); |
1060 | 1053 | ||
1061 | if (elv_register_queue(q)) | 1054 | if (elv_register_queue(q)) |
1062 | goto fail_register; | 1055 | goto fail_register; |
1063 | 1056 | ||
1064 | /* | 1057 | /* |
1065 | * finally exit old elevator and turn off BYPASS. | 1058 | * finally exit old elevator and turn off BYPASS. |
1066 | */ | 1059 | */ |
1067 | elevator_exit(old_elevator); | 1060 | elevator_exit(old_elevator); |
1068 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 1061 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
1069 | return 1; | 1062 | return 1; |
1070 | 1063 | ||
1071 | fail_register: | 1064 | fail_register: |
1072 | /* | 1065 | /* |
1073 | * switch failed, exit the new io scheduler and reattach the old | 1066 | * switch failed, exit the new io scheduler and reattach the old |
1074 | * one again (along with re-adding the sysfs dir) | 1067 | * one again (along with re-adding the sysfs dir) |
1075 | */ | 1068 | */ |
1076 | elevator_exit(e); | 1069 | elevator_exit(e); |
1077 | q->elevator = old_elevator; | 1070 | q->elevator = old_elevator; |
1078 | elv_register_queue(q); | 1071 | elv_register_queue(q); |
1079 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 1072 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
1080 | return 0; | 1073 | return 0; |
1081 | } | 1074 | } |
1082 | 1075 | ||
1083 | ssize_t elv_iosched_store(struct request_queue *q, const char *name, | 1076 | ssize_t elv_iosched_store(struct request_queue *q, const char *name, |
1084 | size_t count) | 1077 | size_t count) |
1085 | { | 1078 | { |
1086 | char elevator_name[ELV_NAME_MAX]; | 1079 | char elevator_name[ELV_NAME_MAX]; |
1087 | size_t len; | 1080 | size_t len; |
1088 | struct elevator_type *e; | 1081 | struct elevator_type *e; |
1089 | 1082 | ||
1090 | elevator_name[sizeof(elevator_name) - 1] = '\0'; | 1083 | elevator_name[sizeof(elevator_name) - 1] = '\0'; |
1091 | strncpy(elevator_name, name, sizeof(elevator_name) - 1); | 1084 | strncpy(elevator_name, name, sizeof(elevator_name) - 1); |
1092 | len = strlen(elevator_name); | 1085 | len = strlen(elevator_name); |
1093 | 1086 | ||
1094 | if (len && elevator_name[len - 1] == '\n') | 1087 | if (len && elevator_name[len - 1] == '\n') |
1095 | elevator_name[len - 1] = '\0'; | 1088 | elevator_name[len - 1] = '\0'; |
1096 | 1089 | ||
1097 | e = elevator_get(elevator_name); | 1090 | e = elevator_get(elevator_name); |
1098 | if (!e) { | 1091 | if (!e) { |
1099 | printk(KERN_ERR "elevator: type %s not found\n", elevator_name); | 1092 | printk(KERN_ERR "elevator: type %s not found\n", elevator_name); |
1100 | return -EINVAL; | 1093 | return -EINVAL; |
1101 | } | 1094 | } |
1102 | 1095 | ||
1103 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { | 1096 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { |
1104 | elevator_put(e); | 1097 | elevator_put(e); |
1105 | return count; | 1098 | return count; |
1106 | } | 1099 | } |
1107 | 1100 | ||
1108 | if (!elevator_switch(q, e)) | 1101 | if (!elevator_switch(q, e)) |
1109 | printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); | 1102 | printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); |
1110 | return count; | 1103 | return count; |
1111 | } | 1104 | } |
1112 | 1105 | ||
1113 | ssize_t elv_iosched_show(struct request_queue *q, char *name) | 1106 | ssize_t elv_iosched_show(struct request_queue *q, char *name) |
1114 | { | 1107 | { |
1115 | elevator_t *e = q->elevator; | 1108 | elevator_t *e = q->elevator; |
1116 | struct elevator_type *elv = e->elevator_type; | 1109 | struct elevator_type *elv = e->elevator_type; |
1117 | struct elevator_type *__e; | 1110 | struct elevator_type *__e; |
1118 | int len = 0; | 1111 | int len = 0; |
1119 | 1112 | ||
1120 | spin_lock(&elv_list_lock); | 1113 | spin_lock(&elv_list_lock); |
1121 | list_for_each_entry(__e, &elv_list, list) { | 1114 | list_for_each_entry(__e, &elv_list, list) { |
1122 | if (!strcmp(elv->elevator_name, __e->elevator_name)) | 1115 | if (!strcmp(elv->elevator_name, __e->elevator_name)) |
1123 | len += sprintf(name+len, "[%s] ", elv->elevator_name); | 1116 | len += sprintf(name+len, "[%s] ", elv->elevator_name); |
1124 | else | 1117 | else |
1125 | len += sprintf(name+len, "%s ", __e->elevator_name); | 1118 | len += sprintf(name+len, "%s ", __e->elevator_name); |
1126 | } | 1119 | } |
1127 | spin_unlock(&elv_list_lock); | 1120 | spin_unlock(&elv_list_lock); |
1128 | 1121 | ||
1129 | len += sprintf(len+name, "\n"); | 1122 | len += sprintf(len+name, "\n"); |
1130 | return len; | 1123 | return len; |
1131 | } | 1124 | } |
1132 | 1125 | ||
1133 | struct request *elv_rb_former_request(struct request_queue *q, | 1126 | struct request *elv_rb_former_request(struct request_queue *q, |
1134 | struct request *rq) | 1127 | struct request *rq) |
1135 | { | 1128 | { |
1136 | struct rb_node *rbprev = rb_prev(&rq->rb_node); | 1129 | struct rb_node *rbprev = rb_prev(&rq->rb_node); |
1137 | 1130 | ||
1138 | if (rbprev) | 1131 | if (rbprev) |
1139 | return rb_entry_rq(rbprev); | 1132 | return rb_entry_rq(rbprev); |
1140 | 1133 | ||
1141 | return NULL; | 1134 | return NULL; |
1142 | } | 1135 | } |
1143 | 1136 | ||
1144 | EXPORT_SYMBOL(elv_rb_former_request); | 1137 | EXPORT_SYMBOL(elv_rb_former_request); |
1145 | 1138 | ||
1146 | struct request *elv_rb_latter_request(struct request_queue *q, | 1139 | struct request *elv_rb_latter_request(struct request_queue *q, |
1147 | struct request *rq) | 1140 | struct request *rq) |
1148 | { | 1141 | { |
1149 | struct rb_node *rbnext = rb_next(&rq->rb_node); | 1142 | struct rb_node *rbnext = rb_next(&rq->rb_node); |
1150 | 1143 | ||
1151 | if (rbnext) | 1144 | if (rbnext) |
1152 | return rb_entry_rq(rbnext); | 1145 | return rb_entry_rq(rbnext); |
1153 | 1146 | ||
1154 | return NULL; | 1147 | return NULL; |
1155 | } | 1148 | } |
1156 | 1149 | ||
1157 | EXPORT_SYMBOL(elv_rb_latter_request); | 1150 | EXPORT_SYMBOL(elv_rb_latter_request); |
1158 | 1151 |
block/ll_rw_blk.c
1 | /* | 1 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics | 3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics |
4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> | 5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> |
6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 | 6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 |
7 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 | 7 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 |
8 | */ | 8 | */ |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * This handles all read/write requests to block devices | 11 | * This handles all read/write requests to block devices |
12 | */ | 12 | */ |
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/backing-dev.h> | 15 | #include <linux/backing-dev.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/highmem.h> | 18 | #include <linux/highmem.h> |
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/kernel_stat.h> | 20 | #include <linux/kernel_stat.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ | 23 | #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ |
24 | #include <linux/completion.h> | 24 | #include <linux/completion.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/swap.h> | 26 | #include <linux/swap.h> |
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/task_io_accounting_ops.h> | 28 | #include <linux/task_io_accounting_ops.h> |
29 | #include <linux/interrupt.h> | 29 | #include <linux/interrupt.h> |
30 | #include <linux/cpu.h> | 30 | #include <linux/cpu.h> |
31 | #include <linux/blktrace_api.h> | 31 | #include <linux/blktrace_api.h> |
32 | #include <linux/fault-inject.h> | 32 | #include <linux/fault-inject.h> |
33 | 33 | ||
34 | /* | 34 | /* |
35 | * for max sense size | 35 | * for max sense size |
36 | */ | 36 | */ |
37 | #include <scsi/scsi_cmnd.h> | 37 | #include <scsi/scsi_cmnd.h> |
38 | 38 | ||
39 | static void blk_unplug_work(struct work_struct *work); | 39 | static void blk_unplug_work(struct work_struct *work); |
40 | static void blk_unplug_timeout(unsigned long data); | 40 | static void blk_unplug_timeout(unsigned long data); |
41 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); | 41 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); |
42 | static void init_request_from_bio(struct request *req, struct bio *bio); | 42 | static void init_request_from_bio(struct request *req, struct bio *bio); |
43 | static int __make_request(struct request_queue *q, struct bio *bio); | 43 | static int __make_request(struct request_queue *q, struct bio *bio); |
44 | static struct io_context *current_io_context(gfp_t gfp_flags, int node); | 44 | static struct io_context *current_io_context(gfp_t gfp_flags, int node); |
45 | static void blk_recalc_rq_segments(struct request *rq); | 45 | static void blk_recalc_rq_segments(struct request *rq); |
46 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 46 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
47 | struct bio *bio); | 47 | struct bio *bio); |
48 | 48 | ||
49 | /* | 49 | /* |
50 | * For the allocated request tables | 50 | * For the allocated request tables |
51 | */ | 51 | */ |
52 | static struct kmem_cache *request_cachep; | 52 | static struct kmem_cache *request_cachep; |
53 | 53 | ||
54 | /* | 54 | /* |
55 | * For queue allocation | 55 | * For queue allocation |
56 | */ | 56 | */ |
57 | static struct kmem_cache *requestq_cachep; | 57 | static struct kmem_cache *requestq_cachep; |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * For io context allocations | 60 | * For io context allocations |
61 | */ | 61 | */ |
62 | static struct kmem_cache *iocontext_cachep; | 62 | static struct kmem_cache *iocontext_cachep; |
63 | 63 | ||
64 | /* | 64 | /* |
65 | * Controlling structure to kblockd | 65 | * Controlling structure to kblockd |
66 | */ | 66 | */ |
67 | static struct workqueue_struct *kblockd_workqueue; | 67 | static struct workqueue_struct *kblockd_workqueue; |
68 | 68 | ||
69 | unsigned long blk_max_low_pfn, blk_max_pfn; | 69 | unsigned long blk_max_low_pfn, blk_max_pfn; |
70 | 70 | ||
71 | EXPORT_SYMBOL(blk_max_low_pfn); | 71 | EXPORT_SYMBOL(blk_max_low_pfn); |
72 | EXPORT_SYMBOL(blk_max_pfn); | 72 | EXPORT_SYMBOL(blk_max_pfn); |
73 | 73 | ||
74 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | 74 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); |
75 | 75 | ||
76 | /* Amount of time in which a process may batch requests */ | 76 | /* Amount of time in which a process may batch requests */ |
77 | #define BLK_BATCH_TIME (HZ/50UL) | 77 | #define BLK_BATCH_TIME (HZ/50UL) |
78 | 78 | ||
79 | /* Number of requests a "batching" process may submit */ | 79 | /* Number of requests a "batching" process may submit */ |
80 | #define BLK_BATCH_REQ 32 | 80 | #define BLK_BATCH_REQ 32 |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Return the threshold (number of used requests) at which the queue is | 83 | * Return the threshold (number of used requests) at which the queue is |
84 | * considered to be congested. It include a little hysteresis to keep the | 84 | * considered to be congested. It include a little hysteresis to keep the |
85 | * context switch rate down. | 85 | * context switch rate down. |
86 | */ | 86 | */ |
87 | static inline int queue_congestion_on_threshold(struct request_queue *q) | 87 | static inline int queue_congestion_on_threshold(struct request_queue *q) |
88 | { | 88 | { |
89 | return q->nr_congestion_on; | 89 | return q->nr_congestion_on; |
90 | } | 90 | } |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * The threshold at which a queue is considered to be uncongested | 93 | * The threshold at which a queue is considered to be uncongested |
94 | */ | 94 | */ |
95 | static inline int queue_congestion_off_threshold(struct request_queue *q) | 95 | static inline int queue_congestion_off_threshold(struct request_queue *q) |
96 | { | 96 | { |
97 | return q->nr_congestion_off; | 97 | return q->nr_congestion_off; |
98 | } | 98 | } |
99 | 99 | ||
100 | static void blk_queue_congestion_threshold(struct request_queue *q) | 100 | static void blk_queue_congestion_threshold(struct request_queue *q) |
101 | { | 101 | { |
102 | int nr; | 102 | int nr; |
103 | 103 | ||
104 | nr = q->nr_requests - (q->nr_requests / 8) + 1; | 104 | nr = q->nr_requests - (q->nr_requests / 8) + 1; |
105 | if (nr > q->nr_requests) | 105 | if (nr > q->nr_requests) |
106 | nr = q->nr_requests; | 106 | nr = q->nr_requests; |
107 | q->nr_congestion_on = nr; | 107 | q->nr_congestion_on = nr; |
108 | 108 | ||
109 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; | 109 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; |
110 | if (nr < 1) | 110 | if (nr < 1) |
111 | nr = 1; | 111 | nr = 1; |
112 | q->nr_congestion_off = nr; | 112 | q->nr_congestion_off = nr; |
113 | } | 113 | } |
114 | 114 | ||
115 | /** | 115 | /** |
116 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info | 116 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info |
117 | * @bdev: device | 117 | * @bdev: device |
118 | * | 118 | * |
119 | * Locates the passed device's request queue and returns the address of its | 119 | * Locates the passed device's request queue and returns the address of its |
120 | * backing_dev_info | 120 | * backing_dev_info |
121 | * | 121 | * |
122 | * Will return NULL if the request queue cannot be located. | 122 | * Will return NULL if the request queue cannot be located. |
123 | */ | 123 | */ |
124 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) | 124 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) |
125 | { | 125 | { |
126 | struct backing_dev_info *ret = NULL; | 126 | struct backing_dev_info *ret = NULL; |
127 | struct request_queue *q = bdev_get_queue(bdev); | 127 | struct request_queue *q = bdev_get_queue(bdev); |
128 | 128 | ||
129 | if (q) | 129 | if (q) |
130 | ret = &q->backing_dev_info; | 130 | ret = &q->backing_dev_info; |
131 | return ret; | 131 | return ret; |
132 | } | 132 | } |
133 | EXPORT_SYMBOL(blk_get_backing_dev_info); | 133 | EXPORT_SYMBOL(blk_get_backing_dev_info); |
134 | 134 | ||
135 | /** | 135 | /** |
136 | * blk_queue_prep_rq - set a prepare_request function for queue | 136 | * blk_queue_prep_rq - set a prepare_request function for queue |
137 | * @q: queue | 137 | * @q: queue |
138 | * @pfn: prepare_request function | 138 | * @pfn: prepare_request function |
139 | * | 139 | * |
140 | * It's possible for a queue to register a prepare_request callback which | 140 | * It's possible for a queue to register a prepare_request callback which |
141 | * is invoked before the request is handed to the request_fn. The goal of | 141 | * is invoked before the request is handed to the request_fn. The goal of |
142 | * the function is to prepare a request for I/O, it can be used to build a | 142 | * the function is to prepare a request for I/O, it can be used to build a |
143 | * cdb from the request data for instance. | 143 | * cdb from the request data for instance. |
144 | * | 144 | * |
145 | */ | 145 | */ |
146 | void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) | 146 | void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) |
147 | { | 147 | { |
148 | q->prep_rq_fn = pfn; | 148 | q->prep_rq_fn = pfn; |
149 | } | 149 | } |
150 | 150 | ||
151 | EXPORT_SYMBOL(blk_queue_prep_rq); | 151 | EXPORT_SYMBOL(blk_queue_prep_rq); |
152 | 152 | ||
153 | /** | 153 | /** |
154 | * blk_queue_merge_bvec - set a merge_bvec function for queue | 154 | * blk_queue_merge_bvec - set a merge_bvec function for queue |
155 | * @q: queue | 155 | * @q: queue |
156 | * @mbfn: merge_bvec_fn | 156 | * @mbfn: merge_bvec_fn |
157 | * | 157 | * |
158 | * Usually queues have static limitations on the max sectors or segments that | 158 | * Usually queues have static limitations on the max sectors or segments that |
159 | * we can put in a request. Stacking drivers may have some settings that | 159 | * we can put in a request. Stacking drivers may have some settings that |
160 | * are dynamic, and thus we have to query the queue whether it is ok to | 160 | * are dynamic, and thus we have to query the queue whether it is ok to |
161 | * add a new bio_vec to a bio at a given offset or not. If the block device | 161 | * add a new bio_vec to a bio at a given offset or not. If the block device |
162 | * has such limitations, it needs to register a merge_bvec_fn to control | 162 | * has such limitations, it needs to register a merge_bvec_fn to control |
163 | * the size of bio's sent to it. Note that a block device *must* allow a | 163 | * the size of bio's sent to it. Note that a block device *must* allow a |
164 | * single page to be added to an empty bio. The block device driver may want | 164 | * single page to be added to an empty bio. The block device driver may want |
165 | * to use the bio_split() function to deal with these bio's. By default | 165 | * to use the bio_split() function to deal with these bio's. By default |
166 | * no merge_bvec_fn is defined for a queue, and only the fixed limits are | 166 | * no merge_bvec_fn is defined for a queue, and only the fixed limits are |
167 | * honored. | 167 | * honored. |
168 | */ | 168 | */ |
169 | void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) | 169 | void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) |
170 | { | 170 | { |
171 | q->merge_bvec_fn = mbfn; | 171 | q->merge_bvec_fn = mbfn; |
172 | } | 172 | } |
173 | 173 | ||
174 | EXPORT_SYMBOL(blk_queue_merge_bvec); | 174 | EXPORT_SYMBOL(blk_queue_merge_bvec); |
175 | 175 | ||
176 | void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) | 176 | void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) |
177 | { | 177 | { |
178 | q->softirq_done_fn = fn; | 178 | q->softirq_done_fn = fn; |
179 | } | 179 | } |
180 | 180 | ||
181 | EXPORT_SYMBOL(blk_queue_softirq_done); | 181 | EXPORT_SYMBOL(blk_queue_softirq_done); |
182 | 182 | ||
183 | /** | 183 | /** |
184 | * blk_queue_make_request - define an alternate make_request function for a device | 184 | * blk_queue_make_request - define an alternate make_request function for a device |
185 | * @q: the request queue for the device to be affected | 185 | * @q: the request queue for the device to be affected |
186 | * @mfn: the alternate make_request function | 186 | * @mfn: the alternate make_request function |
187 | * | 187 | * |
188 | * Description: | 188 | * Description: |
189 | * The normal way for &struct bios to be passed to a device | 189 | * The normal way for &struct bios to be passed to a device |
190 | * driver is for them to be collected into requests on a request | 190 | * driver is for them to be collected into requests on a request |
191 | * queue, and then to allow the device driver to select requests | 191 | * queue, and then to allow the device driver to select requests |
192 | * off that queue when it is ready. This works well for many block | 192 | * off that queue when it is ready. This works well for many block |
193 | * devices. However some block devices (typically virtual devices | 193 | * devices. However some block devices (typically virtual devices |
194 | * such as md or lvm) do not benefit from the processing on the | 194 | * such as md or lvm) do not benefit from the processing on the |
195 | * request queue, and are served best by having the requests passed | 195 | * request queue, and are served best by having the requests passed |
196 | * directly to them. This can be achieved by providing a function | 196 | * directly to them. This can be achieved by providing a function |
197 | * to blk_queue_make_request(). | 197 | * to blk_queue_make_request(). |
198 | * | 198 | * |
199 | * Caveat: | 199 | * Caveat: |
200 | * The driver that does this *must* be able to deal appropriately | 200 | * The driver that does this *must* be able to deal appropriately |
201 | * with buffers in "highmemory". This can be accomplished by either calling | 201 | * with buffers in "highmemory". This can be accomplished by either calling |
202 | * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling | 202 | * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling |
203 | * blk_queue_bounce() to create a buffer in normal memory. | 203 | * blk_queue_bounce() to create a buffer in normal memory. |
204 | **/ | 204 | **/ |
205 | void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn) | 205 | void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn) |
206 | { | 206 | { |
207 | /* | 207 | /* |
208 | * set defaults | 208 | * set defaults |
209 | */ | 209 | */ |
210 | q->nr_requests = BLKDEV_MAX_RQ; | 210 | q->nr_requests = BLKDEV_MAX_RQ; |
211 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); | 211 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); |
212 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); | 212 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); |
213 | q->make_request_fn = mfn; | 213 | q->make_request_fn = mfn; |
214 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 214 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
215 | q->backing_dev_info.state = 0; | 215 | q->backing_dev_info.state = 0; |
216 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; | 216 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; |
217 | blk_queue_max_sectors(q, SAFE_MAX_SECTORS); | 217 | blk_queue_max_sectors(q, SAFE_MAX_SECTORS); |
218 | blk_queue_hardsect_size(q, 512); | 218 | blk_queue_hardsect_size(q, 512); |
219 | blk_queue_dma_alignment(q, 511); | 219 | blk_queue_dma_alignment(q, 511); |
220 | blk_queue_congestion_threshold(q); | 220 | blk_queue_congestion_threshold(q); |
221 | q->nr_batching = BLK_BATCH_REQ; | 221 | q->nr_batching = BLK_BATCH_REQ; |
222 | 222 | ||
223 | q->unplug_thresh = 4; /* hmm */ | 223 | q->unplug_thresh = 4; /* hmm */ |
224 | q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ | 224 | q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ |
225 | if (q->unplug_delay == 0) | 225 | if (q->unplug_delay == 0) |
226 | q->unplug_delay = 1; | 226 | q->unplug_delay = 1; |
227 | 227 | ||
228 | INIT_WORK(&q->unplug_work, blk_unplug_work); | 228 | INIT_WORK(&q->unplug_work, blk_unplug_work); |
229 | 229 | ||
230 | q->unplug_timer.function = blk_unplug_timeout; | 230 | q->unplug_timer.function = blk_unplug_timeout; |
231 | q->unplug_timer.data = (unsigned long)q; | 231 | q->unplug_timer.data = (unsigned long)q; |
232 | 232 | ||
233 | /* | 233 | /* |
234 | * by default assume old behaviour and bounce for any highmem page | 234 | * by default assume old behaviour and bounce for any highmem page |
235 | */ | 235 | */ |
236 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | 236 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); |
237 | } | 237 | } |
238 | 238 | ||
239 | EXPORT_SYMBOL(blk_queue_make_request); | 239 | EXPORT_SYMBOL(blk_queue_make_request); |
240 | 240 | ||
241 | static void rq_init(struct request_queue *q, struct request *rq) | 241 | static void rq_init(struct request_queue *q, struct request *rq) |
242 | { | 242 | { |
243 | INIT_LIST_HEAD(&rq->queuelist); | 243 | INIT_LIST_HEAD(&rq->queuelist); |
244 | INIT_LIST_HEAD(&rq->donelist); | 244 | INIT_LIST_HEAD(&rq->donelist); |
245 | 245 | ||
246 | rq->errors = 0; | 246 | rq->errors = 0; |
247 | rq->bio = rq->biotail = NULL; | 247 | rq->bio = rq->biotail = NULL; |
248 | INIT_HLIST_NODE(&rq->hash); | 248 | INIT_HLIST_NODE(&rq->hash); |
249 | RB_CLEAR_NODE(&rq->rb_node); | 249 | RB_CLEAR_NODE(&rq->rb_node); |
250 | rq->ioprio = 0; | 250 | rq->ioprio = 0; |
251 | rq->buffer = NULL; | 251 | rq->buffer = NULL; |
252 | rq->ref_count = 1; | 252 | rq->ref_count = 1; |
253 | rq->q = q; | 253 | rq->q = q; |
254 | rq->special = NULL; | 254 | rq->special = NULL; |
255 | rq->data_len = 0; | 255 | rq->data_len = 0; |
256 | rq->data = NULL; | 256 | rq->data = NULL; |
257 | rq->nr_phys_segments = 0; | 257 | rq->nr_phys_segments = 0; |
258 | rq->sense = NULL; | 258 | rq->sense = NULL; |
259 | rq->end_io = NULL; | 259 | rq->end_io = NULL; |
260 | rq->end_io_data = NULL; | 260 | rq->end_io_data = NULL; |
261 | rq->completion_data = NULL; | 261 | rq->completion_data = NULL; |
262 | rq->next_rq = NULL; | 262 | rq->next_rq = NULL; |
263 | } | 263 | } |
264 | 264 | ||
265 | /** | 265 | /** |
266 | * blk_queue_ordered - does this queue support ordered writes | 266 | * blk_queue_ordered - does this queue support ordered writes |
267 | * @q: the request queue | 267 | * @q: the request queue |
268 | * @ordered: one of QUEUE_ORDERED_* | 268 | * @ordered: one of QUEUE_ORDERED_* |
269 | * @prepare_flush_fn: rq setup helper for cache flush ordered writes | 269 | * @prepare_flush_fn: rq setup helper for cache flush ordered writes |
270 | * | 270 | * |
271 | * Description: | 271 | * Description: |
272 | * For journalled file systems, doing ordered writes on a commit | 272 | * For journalled file systems, doing ordered writes on a commit |
273 | * block instead of explicitly doing wait_on_buffer (which is bad | 273 | * block instead of explicitly doing wait_on_buffer (which is bad |
274 | * for performance) can be a big win. Block drivers supporting this | 274 | * for performance) can be a big win. Block drivers supporting this |
275 | * feature should call this function and indicate so. | 275 | * feature should call this function and indicate so. |
276 | * | 276 | * |
277 | **/ | 277 | **/ |
278 | int blk_queue_ordered(struct request_queue *q, unsigned ordered, | 278 | int blk_queue_ordered(struct request_queue *q, unsigned ordered, |
279 | prepare_flush_fn *prepare_flush_fn) | 279 | prepare_flush_fn *prepare_flush_fn) |
280 | { | 280 | { |
281 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && | 281 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && |
282 | prepare_flush_fn == NULL) { | 282 | prepare_flush_fn == NULL) { |
283 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); | 283 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); |
284 | return -EINVAL; | 284 | return -EINVAL; |
285 | } | 285 | } |
286 | 286 | ||
287 | if (ordered != QUEUE_ORDERED_NONE && | 287 | if (ordered != QUEUE_ORDERED_NONE && |
288 | ordered != QUEUE_ORDERED_DRAIN && | 288 | ordered != QUEUE_ORDERED_DRAIN && |
289 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && | 289 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && |
290 | ordered != QUEUE_ORDERED_DRAIN_FUA && | 290 | ordered != QUEUE_ORDERED_DRAIN_FUA && |
291 | ordered != QUEUE_ORDERED_TAG && | 291 | ordered != QUEUE_ORDERED_TAG && |
292 | ordered != QUEUE_ORDERED_TAG_FLUSH && | 292 | ordered != QUEUE_ORDERED_TAG_FLUSH && |
293 | ordered != QUEUE_ORDERED_TAG_FUA) { | 293 | ordered != QUEUE_ORDERED_TAG_FUA) { |
294 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); | 294 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); |
295 | return -EINVAL; | 295 | return -EINVAL; |
296 | } | 296 | } |
297 | 297 | ||
298 | q->ordered = ordered; | 298 | q->ordered = ordered; |
299 | q->next_ordered = ordered; | 299 | q->next_ordered = ordered; |
300 | q->prepare_flush_fn = prepare_flush_fn; | 300 | q->prepare_flush_fn = prepare_flush_fn; |
301 | 301 | ||
302 | return 0; | 302 | return 0; |
303 | } | 303 | } |
304 | 304 | ||
305 | EXPORT_SYMBOL(blk_queue_ordered); | 305 | EXPORT_SYMBOL(blk_queue_ordered); |
306 | 306 | ||
307 | /** | 307 | /** |
308 | * blk_queue_issue_flush_fn - set function for issuing a flush | 308 | * blk_queue_issue_flush_fn - set function for issuing a flush |
309 | * @q: the request queue | 309 | * @q: the request queue |
310 | * @iff: the function to be called issuing the flush | 310 | * @iff: the function to be called issuing the flush |
311 | * | 311 | * |
312 | * Description: | 312 | * Description: |
313 | * If a driver supports issuing a flush command, the support is notified | 313 | * If a driver supports issuing a flush command, the support is notified |
314 | * to the block layer by defining it through this call. | 314 | * to the block layer by defining it through this call. |
315 | * | 315 | * |
316 | **/ | 316 | **/ |
317 | void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff) | 317 | void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff) |
318 | { | 318 | { |
319 | q->issue_flush_fn = iff; | 319 | q->issue_flush_fn = iff; |
320 | } | 320 | } |
321 | 321 | ||
322 | EXPORT_SYMBOL(blk_queue_issue_flush_fn); | 322 | EXPORT_SYMBOL(blk_queue_issue_flush_fn); |
323 | 323 | ||
324 | /* | 324 | /* |
325 | * Cache flushing for ordered writes handling | 325 | * Cache flushing for ordered writes handling |
326 | */ | 326 | */ |
327 | inline unsigned blk_ordered_cur_seq(struct request_queue *q) | 327 | inline unsigned blk_ordered_cur_seq(struct request_queue *q) |
328 | { | 328 | { |
329 | if (!q->ordseq) | 329 | if (!q->ordseq) |
330 | return 0; | 330 | return 0; |
331 | return 1 << ffz(q->ordseq); | 331 | return 1 << ffz(q->ordseq); |
332 | } | 332 | } |
333 | 333 | ||
334 | unsigned blk_ordered_req_seq(struct request *rq) | 334 | unsigned blk_ordered_req_seq(struct request *rq) |
335 | { | 335 | { |
336 | struct request_queue *q = rq->q; | 336 | struct request_queue *q = rq->q; |
337 | 337 | ||
338 | BUG_ON(q->ordseq == 0); | 338 | BUG_ON(q->ordseq == 0); |
339 | 339 | ||
340 | if (rq == &q->pre_flush_rq) | 340 | if (rq == &q->pre_flush_rq) |
341 | return QUEUE_ORDSEQ_PREFLUSH; | 341 | return QUEUE_ORDSEQ_PREFLUSH; |
342 | if (rq == &q->bar_rq) | 342 | if (rq == &q->bar_rq) |
343 | return QUEUE_ORDSEQ_BAR; | 343 | return QUEUE_ORDSEQ_BAR; |
344 | if (rq == &q->post_flush_rq) | 344 | if (rq == &q->post_flush_rq) |
345 | return QUEUE_ORDSEQ_POSTFLUSH; | 345 | return QUEUE_ORDSEQ_POSTFLUSH; |
346 | 346 | ||
347 | /* | 347 | /* |
348 | * !fs requests don't need to follow barrier ordering. Always | 348 | * !fs requests don't need to follow barrier ordering. Always |
349 | * put them at the front. This fixes the following deadlock. | 349 | * put them at the front. This fixes the following deadlock. |
350 | * | 350 | * |
351 | * http://thread.gmane.org/gmane.linux.kernel/537473 | 351 | * http://thread.gmane.org/gmane.linux.kernel/537473 |
352 | */ | 352 | */ |
353 | if (!blk_fs_request(rq)) | 353 | if (!blk_fs_request(rq)) |
354 | return QUEUE_ORDSEQ_DRAIN; | 354 | return QUEUE_ORDSEQ_DRAIN; |
355 | 355 | ||
356 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == | 356 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == |
357 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) | 357 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) |
358 | return QUEUE_ORDSEQ_DRAIN; | 358 | return QUEUE_ORDSEQ_DRAIN; |
359 | else | 359 | else |
360 | return QUEUE_ORDSEQ_DONE; | 360 | return QUEUE_ORDSEQ_DONE; |
361 | } | 361 | } |
362 | 362 | ||
363 | void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) | 363 | void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) |
364 | { | 364 | { |
365 | struct request *rq; | 365 | struct request *rq; |
366 | int uptodate; | 366 | int uptodate; |
367 | 367 | ||
368 | if (error && !q->orderr) | 368 | if (error && !q->orderr) |
369 | q->orderr = error; | 369 | q->orderr = error; |
370 | 370 | ||
371 | BUG_ON(q->ordseq & seq); | 371 | BUG_ON(q->ordseq & seq); |
372 | q->ordseq |= seq; | 372 | q->ordseq |= seq; |
373 | 373 | ||
374 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) | 374 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) |
375 | return; | 375 | return; |
376 | 376 | ||
377 | /* | 377 | /* |
378 | * Okay, sequence complete. | 378 | * Okay, sequence complete. |
379 | */ | 379 | */ |
380 | uptodate = 1; | 380 | uptodate = 1; |
381 | if (q->orderr) | 381 | if (q->orderr) |
382 | uptodate = q->orderr; | 382 | uptodate = q->orderr; |
383 | 383 | ||
384 | q->ordseq = 0; | 384 | q->ordseq = 0; |
385 | rq = q->orig_bar_rq; | 385 | rq = q->orig_bar_rq; |
386 | 386 | ||
387 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); | 387 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); |
388 | end_that_request_last(rq, uptodate); | 388 | end_that_request_last(rq, uptodate); |
389 | } | 389 | } |
390 | 390 | ||
391 | static void pre_flush_end_io(struct request *rq, int error) | 391 | static void pre_flush_end_io(struct request *rq, int error) |
392 | { | 392 | { |
393 | elv_completed_request(rq->q, rq); | 393 | elv_completed_request(rq->q, rq); |
394 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); | 394 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); |
395 | } | 395 | } |
396 | 396 | ||
397 | static void bar_end_io(struct request *rq, int error) | 397 | static void bar_end_io(struct request *rq, int error) |
398 | { | 398 | { |
399 | elv_completed_request(rq->q, rq); | 399 | elv_completed_request(rq->q, rq); |
400 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); | 400 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); |
401 | } | 401 | } |
402 | 402 | ||
403 | static void post_flush_end_io(struct request *rq, int error) | 403 | static void post_flush_end_io(struct request *rq, int error) |
404 | { | 404 | { |
405 | elv_completed_request(rq->q, rq); | 405 | elv_completed_request(rq->q, rq); |
406 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); | 406 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); |
407 | } | 407 | } |
408 | 408 | ||
409 | static void queue_flush(struct request_queue *q, unsigned which) | 409 | static void queue_flush(struct request_queue *q, unsigned which) |
410 | { | 410 | { |
411 | struct request *rq; | 411 | struct request *rq; |
412 | rq_end_io_fn *end_io; | 412 | rq_end_io_fn *end_io; |
413 | 413 | ||
414 | if (which == QUEUE_ORDERED_PREFLUSH) { | 414 | if (which == QUEUE_ORDERED_PREFLUSH) { |
415 | rq = &q->pre_flush_rq; | 415 | rq = &q->pre_flush_rq; |
416 | end_io = pre_flush_end_io; | 416 | end_io = pre_flush_end_io; |
417 | } else { | 417 | } else { |
418 | rq = &q->post_flush_rq; | 418 | rq = &q->post_flush_rq; |
419 | end_io = post_flush_end_io; | 419 | end_io = post_flush_end_io; |
420 | } | 420 | } |
421 | 421 | ||
422 | rq->cmd_flags = REQ_HARDBARRIER; | 422 | rq->cmd_flags = REQ_HARDBARRIER; |
423 | rq_init(q, rq); | 423 | rq_init(q, rq); |
424 | rq->elevator_private = NULL; | 424 | rq->elevator_private = NULL; |
425 | rq->elevator_private2 = NULL; | 425 | rq->elevator_private2 = NULL; |
426 | rq->rq_disk = q->bar_rq.rq_disk; | 426 | rq->rq_disk = q->bar_rq.rq_disk; |
427 | rq->end_io = end_io; | 427 | rq->end_io = end_io; |
428 | q->prepare_flush_fn(q, rq); | 428 | q->prepare_flush_fn(q, rq); |
429 | 429 | ||
430 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | 430 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
431 | } | 431 | } |
432 | 432 | ||
433 | static inline struct request *start_ordered(struct request_queue *q, | 433 | static inline struct request *start_ordered(struct request_queue *q, |
434 | struct request *rq) | 434 | struct request *rq) |
435 | { | 435 | { |
436 | q->orderr = 0; | 436 | q->orderr = 0; |
437 | q->ordered = q->next_ordered; | 437 | q->ordered = q->next_ordered; |
438 | q->ordseq |= QUEUE_ORDSEQ_STARTED; | 438 | q->ordseq |= QUEUE_ORDSEQ_STARTED; |
439 | 439 | ||
440 | /* | 440 | /* |
441 | * Prep proxy barrier request. | 441 | * Prep proxy barrier request. |
442 | */ | 442 | */ |
443 | blkdev_dequeue_request(rq); | 443 | blkdev_dequeue_request(rq); |
444 | q->orig_bar_rq = rq; | 444 | q->orig_bar_rq = rq; |
445 | rq = &q->bar_rq; | 445 | rq = &q->bar_rq; |
446 | rq->cmd_flags = 0; | 446 | rq->cmd_flags = 0; |
447 | rq_init(q, rq); | 447 | rq_init(q, rq); |
448 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) | 448 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) |
449 | rq->cmd_flags |= REQ_RW; | 449 | rq->cmd_flags |= REQ_RW; |
450 | if (q->ordered & QUEUE_ORDERED_FUA) | 450 | if (q->ordered & QUEUE_ORDERED_FUA) |
451 | rq->cmd_flags |= REQ_FUA; | 451 | rq->cmd_flags |= REQ_FUA; |
452 | rq->elevator_private = NULL; | 452 | rq->elevator_private = NULL; |
453 | rq->elevator_private2 = NULL; | 453 | rq->elevator_private2 = NULL; |
454 | init_request_from_bio(rq, q->orig_bar_rq->bio); | 454 | init_request_from_bio(rq, q->orig_bar_rq->bio); |
455 | rq->end_io = bar_end_io; | 455 | rq->end_io = bar_end_io; |
456 | 456 | ||
457 | /* | 457 | /* |
458 | * Queue ordered sequence. As we stack them at the head, we | 458 | * Queue ordered sequence. As we stack them at the head, we |
459 | * need to queue in reverse order. Note that we rely on that | 459 | * need to queue in reverse order. Note that we rely on that |
460 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | 460 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs |
461 | * request gets inbetween ordered sequence. | 461 | * request gets inbetween ordered sequence. |
462 | */ | 462 | */ |
463 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) | 463 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) |
464 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); | 464 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); |
465 | else | 465 | else |
466 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; | 466 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; |
467 | 467 | ||
468 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | 468 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
469 | 469 | ||
470 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { | 470 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { |
471 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); | 471 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); |
472 | rq = &q->pre_flush_rq; | 472 | rq = &q->pre_flush_rq; |
473 | } else | 473 | } else |
474 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; | 474 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; |
475 | 475 | ||
476 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) | 476 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) |
477 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; | 477 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; |
478 | else | 478 | else |
479 | rq = NULL; | 479 | rq = NULL; |
480 | 480 | ||
481 | return rq; | 481 | return rq; |
482 | } | 482 | } |
483 | 483 | ||
484 | int blk_do_ordered(struct request_queue *q, struct request **rqp) | 484 | int blk_do_ordered(struct request_queue *q, struct request **rqp) |
485 | { | 485 | { |
486 | struct request *rq = *rqp; | 486 | struct request *rq = *rqp; |
487 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); | 487 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); |
488 | 488 | ||
489 | if (!q->ordseq) { | 489 | if (!q->ordseq) { |
490 | if (!is_barrier) | 490 | if (!is_barrier) |
491 | return 1; | 491 | return 1; |
492 | 492 | ||
493 | if (q->next_ordered != QUEUE_ORDERED_NONE) { | 493 | if (q->next_ordered != QUEUE_ORDERED_NONE) { |
494 | *rqp = start_ordered(q, rq); | 494 | *rqp = start_ordered(q, rq); |
495 | return 1; | 495 | return 1; |
496 | } else { | 496 | } else { |
497 | /* | 497 | /* |
498 | * This can happen when the queue switches to | 498 | * This can happen when the queue switches to |
499 | * ORDERED_NONE while this request is on it. | 499 | * ORDERED_NONE while this request is on it. |
500 | */ | 500 | */ |
501 | blkdev_dequeue_request(rq); | 501 | blkdev_dequeue_request(rq); |
502 | end_that_request_first(rq, -EOPNOTSUPP, | 502 | end_that_request_first(rq, -EOPNOTSUPP, |
503 | rq->hard_nr_sectors); | 503 | rq->hard_nr_sectors); |
504 | end_that_request_last(rq, -EOPNOTSUPP); | 504 | end_that_request_last(rq, -EOPNOTSUPP); |
505 | *rqp = NULL; | 505 | *rqp = NULL; |
506 | return 0; | 506 | return 0; |
507 | } | 507 | } |
508 | } | 508 | } |
509 | 509 | ||
510 | /* | 510 | /* |
511 | * Ordered sequence in progress | 511 | * Ordered sequence in progress |
512 | */ | 512 | */ |
513 | 513 | ||
514 | /* Special requests are not subject to ordering rules. */ | 514 | /* Special requests are not subject to ordering rules. */ |
515 | if (!blk_fs_request(rq) && | 515 | if (!blk_fs_request(rq) && |
516 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) | 516 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) |
517 | return 1; | 517 | return 1; |
518 | 518 | ||
519 | if (q->ordered & QUEUE_ORDERED_TAG) { | 519 | if (q->ordered & QUEUE_ORDERED_TAG) { |
520 | /* Ordered by tag. Blocking the next barrier is enough. */ | 520 | /* Ordered by tag. Blocking the next barrier is enough. */ |
521 | if (is_barrier && rq != &q->bar_rq) | 521 | if (is_barrier && rq != &q->bar_rq) |
522 | *rqp = NULL; | 522 | *rqp = NULL; |
523 | } else { | 523 | } else { |
524 | /* Ordered by draining. Wait for turn. */ | 524 | /* Ordered by draining. Wait for turn. */ |
525 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); | 525 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); |
526 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) | 526 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) |
527 | *rqp = NULL; | 527 | *rqp = NULL; |
528 | } | 528 | } |
529 | 529 | ||
530 | return 1; | 530 | return 1; |
531 | } | 531 | } |
532 | 532 | ||
533 | static void req_bio_endio(struct request *rq, struct bio *bio, | 533 | static void req_bio_endio(struct request *rq, struct bio *bio, |
534 | unsigned int nbytes, int error) | 534 | unsigned int nbytes, int error) |
535 | { | 535 | { |
536 | struct request_queue *q = rq->q; | 536 | struct request_queue *q = rq->q; |
537 | 537 | ||
538 | if (&q->bar_rq != rq) { | 538 | if (&q->bar_rq != rq) { |
539 | if (error) | 539 | if (error) |
540 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 540 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
541 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 541 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
542 | error = -EIO; | 542 | error = -EIO; |
543 | 543 | ||
544 | if (unlikely(nbytes > bio->bi_size)) { | 544 | if (unlikely(nbytes > bio->bi_size)) { |
545 | printk("%s: want %u bytes done, only %u left\n", | 545 | printk("%s: want %u bytes done, only %u left\n", |
546 | __FUNCTION__, nbytes, bio->bi_size); | 546 | __FUNCTION__, nbytes, bio->bi_size); |
547 | nbytes = bio->bi_size; | 547 | nbytes = bio->bi_size; |
548 | } | 548 | } |
549 | 549 | ||
550 | bio->bi_size -= nbytes; | 550 | bio->bi_size -= nbytes; |
551 | bio->bi_sector += (nbytes >> 9); | 551 | bio->bi_sector += (nbytes >> 9); |
552 | if (bio->bi_size == 0) | 552 | if (bio->bi_size == 0) |
553 | bio_endio(bio, error); | 553 | bio_endio(bio, error); |
554 | } else { | 554 | } else { |
555 | 555 | ||
556 | /* | 556 | /* |
557 | * Okay, this is the barrier request in progress, just | 557 | * Okay, this is the barrier request in progress, just |
558 | * record the error; | 558 | * record the error; |
559 | */ | 559 | */ |
560 | if (error && !q->orderr) | 560 | if (error && !q->orderr) |
561 | q->orderr = error; | 561 | q->orderr = error; |
562 | } | 562 | } |
563 | } | 563 | } |
564 | 564 | ||
565 | /** | 565 | /** |
566 | * blk_queue_bounce_limit - set bounce buffer limit for queue | 566 | * blk_queue_bounce_limit - set bounce buffer limit for queue |
567 | * @q: the request queue for the device | 567 | * @q: the request queue for the device |
568 | * @dma_addr: bus address limit | 568 | * @dma_addr: bus address limit |
569 | * | 569 | * |
570 | * Description: | 570 | * Description: |
571 | * Different hardware can have different requirements as to what pages | 571 | * Different hardware can have different requirements as to what pages |
572 | * it can do I/O directly to. A low level driver can call | 572 | * it can do I/O directly to. A low level driver can call |
573 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce | 573 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce |
574 | * buffers for doing I/O to pages residing above @page. | 574 | * buffers for doing I/O to pages residing above @page. |
575 | **/ | 575 | **/ |
576 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) | 576 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) |
577 | { | 577 | { |
578 | unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; | 578 | unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; |
579 | int dma = 0; | 579 | int dma = 0; |
580 | 580 | ||
581 | q->bounce_gfp = GFP_NOIO; | 581 | q->bounce_gfp = GFP_NOIO; |
582 | #if BITS_PER_LONG == 64 | 582 | #if BITS_PER_LONG == 64 |
583 | /* Assume anything <= 4GB can be handled by IOMMU. | 583 | /* Assume anything <= 4GB can be handled by IOMMU. |
584 | Actually some IOMMUs can handle everything, but I don't | 584 | Actually some IOMMUs can handle everything, but I don't |
585 | know of a way to test this here. */ | 585 | know of a way to test this here. */ |
586 | if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) | 586 | if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) |
587 | dma = 1; | 587 | dma = 1; |
588 | q->bounce_pfn = max_low_pfn; | 588 | q->bounce_pfn = max_low_pfn; |
589 | #else | 589 | #else |
590 | if (bounce_pfn < blk_max_low_pfn) | 590 | if (bounce_pfn < blk_max_low_pfn) |
591 | dma = 1; | 591 | dma = 1; |
592 | q->bounce_pfn = bounce_pfn; | 592 | q->bounce_pfn = bounce_pfn; |
593 | #endif | 593 | #endif |
594 | if (dma) { | 594 | if (dma) { |
595 | init_emergency_isa_pool(); | 595 | init_emergency_isa_pool(); |
596 | q->bounce_gfp = GFP_NOIO | GFP_DMA; | 596 | q->bounce_gfp = GFP_NOIO | GFP_DMA; |
597 | q->bounce_pfn = bounce_pfn; | 597 | q->bounce_pfn = bounce_pfn; |
598 | } | 598 | } |
599 | } | 599 | } |
600 | 600 | ||
601 | EXPORT_SYMBOL(blk_queue_bounce_limit); | 601 | EXPORT_SYMBOL(blk_queue_bounce_limit); |
602 | 602 | ||
603 | /** | 603 | /** |
604 | * blk_queue_max_sectors - set max sectors for a request for this queue | 604 | * blk_queue_max_sectors - set max sectors for a request for this queue |
605 | * @q: the request queue for the device | 605 | * @q: the request queue for the device |
606 | * @max_sectors: max sectors in the usual 512b unit | 606 | * @max_sectors: max sectors in the usual 512b unit |
607 | * | 607 | * |
608 | * Description: | 608 | * Description: |
609 | * Enables a low level driver to set an upper limit on the size of | 609 | * Enables a low level driver to set an upper limit on the size of |
610 | * received requests. | 610 | * received requests. |
611 | **/ | 611 | **/ |
612 | void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) | 612 | void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) |
613 | { | 613 | { |
614 | if ((max_sectors << 9) < PAGE_CACHE_SIZE) { | 614 | if ((max_sectors << 9) < PAGE_CACHE_SIZE) { |
615 | max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); | 615 | max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); |
616 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); | 616 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); |
617 | } | 617 | } |
618 | 618 | ||
619 | if (BLK_DEF_MAX_SECTORS > max_sectors) | 619 | if (BLK_DEF_MAX_SECTORS > max_sectors) |
620 | q->max_hw_sectors = q->max_sectors = max_sectors; | 620 | q->max_hw_sectors = q->max_sectors = max_sectors; |
621 | else { | 621 | else { |
622 | q->max_sectors = BLK_DEF_MAX_SECTORS; | 622 | q->max_sectors = BLK_DEF_MAX_SECTORS; |
623 | q->max_hw_sectors = max_sectors; | 623 | q->max_hw_sectors = max_sectors; |
624 | } | 624 | } |
625 | } | 625 | } |
626 | 626 | ||
627 | EXPORT_SYMBOL(blk_queue_max_sectors); | 627 | EXPORT_SYMBOL(blk_queue_max_sectors); |
628 | 628 | ||
629 | /** | 629 | /** |
630 | * blk_queue_max_phys_segments - set max phys segments for a request for this queue | 630 | * blk_queue_max_phys_segments - set max phys segments for a request for this queue |
631 | * @q: the request queue for the device | 631 | * @q: the request queue for the device |
632 | * @max_segments: max number of segments | 632 | * @max_segments: max number of segments |
633 | * | 633 | * |
634 | * Description: | 634 | * Description: |
635 | * Enables a low level driver to set an upper limit on the number of | 635 | * Enables a low level driver to set an upper limit on the number of |
636 | * physical data segments in a request. This would be the largest sized | 636 | * physical data segments in a request. This would be the largest sized |
637 | * scatter list the driver could handle. | 637 | * scatter list the driver could handle. |
638 | **/ | 638 | **/ |
639 | void blk_queue_max_phys_segments(struct request_queue *q, | 639 | void blk_queue_max_phys_segments(struct request_queue *q, |
640 | unsigned short max_segments) | 640 | unsigned short max_segments) |
641 | { | 641 | { |
642 | if (!max_segments) { | 642 | if (!max_segments) { |
643 | max_segments = 1; | 643 | max_segments = 1; |
644 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); | 644 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); |
645 | } | 645 | } |
646 | 646 | ||
647 | q->max_phys_segments = max_segments; | 647 | q->max_phys_segments = max_segments; |
648 | } | 648 | } |
649 | 649 | ||
650 | EXPORT_SYMBOL(blk_queue_max_phys_segments); | 650 | EXPORT_SYMBOL(blk_queue_max_phys_segments); |
651 | 651 | ||
652 | /** | 652 | /** |
653 | * blk_queue_max_hw_segments - set max hw segments for a request for this queue | 653 | * blk_queue_max_hw_segments - set max hw segments for a request for this queue |
654 | * @q: the request queue for the device | 654 | * @q: the request queue for the device |
655 | * @max_segments: max number of segments | 655 | * @max_segments: max number of segments |
656 | * | 656 | * |
657 | * Description: | 657 | * Description: |
658 | * Enables a low level driver to set an upper limit on the number of | 658 | * Enables a low level driver to set an upper limit on the number of |
659 | * hw data segments in a request. This would be the largest number of | 659 | * hw data segments in a request. This would be the largest number of |
660 | * address/length pairs the host adapter can actually give as once | 660 | * address/length pairs the host adapter can actually give as once |
661 | * to the device. | 661 | * to the device. |
662 | **/ | 662 | **/ |
663 | void blk_queue_max_hw_segments(struct request_queue *q, | 663 | void blk_queue_max_hw_segments(struct request_queue *q, |
664 | unsigned short max_segments) | 664 | unsigned short max_segments) |
665 | { | 665 | { |
666 | if (!max_segments) { | 666 | if (!max_segments) { |
667 | max_segments = 1; | 667 | max_segments = 1; |
668 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); | 668 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); |
669 | } | 669 | } |
670 | 670 | ||
671 | q->max_hw_segments = max_segments; | 671 | q->max_hw_segments = max_segments; |
672 | } | 672 | } |
673 | 673 | ||
674 | EXPORT_SYMBOL(blk_queue_max_hw_segments); | 674 | EXPORT_SYMBOL(blk_queue_max_hw_segments); |
675 | 675 | ||
676 | /** | 676 | /** |
677 | * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg | 677 | * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg |
678 | * @q: the request queue for the device | 678 | * @q: the request queue for the device |
679 | * @max_size: max size of segment in bytes | 679 | * @max_size: max size of segment in bytes |
680 | * | 680 | * |
681 | * Description: | 681 | * Description: |
682 | * Enables a low level driver to set an upper limit on the size of a | 682 | * Enables a low level driver to set an upper limit on the size of a |
683 | * coalesced segment | 683 | * coalesced segment |
684 | **/ | 684 | **/ |
685 | void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) | 685 | void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) |
686 | { | 686 | { |
687 | if (max_size < PAGE_CACHE_SIZE) { | 687 | if (max_size < PAGE_CACHE_SIZE) { |
688 | max_size = PAGE_CACHE_SIZE; | 688 | max_size = PAGE_CACHE_SIZE; |
689 | printk("%s: set to minimum %d\n", __FUNCTION__, max_size); | 689 | printk("%s: set to minimum %d\n", __FUNCTION__, max_size); |
690 | } | 690 | } |
691 | 691 | ||
692 | q->max_segment_size = max_size; | 692 | q->max_segment_size = max_size; |
693 | } | 693 | } |
694 | 694 | ||
695 | EXPORT_SYMBOL(blk_queue_max_segment_size); | 695 | EXPORT_SYMBOL(blk_queue_max_segment_size); |
696 | 696 | ||
697 | /** | 697 | /** |
698 | * blk_queue_hardsect_size - set hardware sector size for the queue | 698 | * blk_queue_hardsect_size - set hardware sector size for the queue |
699 | * @q: the request queue for the device | 699 | * @q: the request queue for the device |
700 | * @size: the hardware sector size, in bytes | 700 | * @size: the hardware sector size, in bytes |
701 | * | 701 | * |
702 | * Description: | 702 | * Description: |
703 | * This should typically be set to the lowest possible sector size | 703 | * This should typically be set to the lowest possible sector size |
704 | * that the hardware can operate on (possible without reverting to | 704 | * that the hardware can operate on (possible without reverting to |
705 | * even internal read-modify-write operations). Usually the default | 705 | * even internal read-modify-write operations). Usually the default |
706 | * of 512 covers most hardware. | 706 | * of 512 covers most hardware. |
707 | **/ | 707 | **/ |
708 | void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) | 708 | void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) |
709 | { | 709 | { |
710 | q->hardsect_size = size; | 710 | q->hardsect_size = size; |
711 | } | 711 | } |
712 | 712 | ||
713 | EXPORT_SYMBOL(blk_queue_hardsect_size); | 713 | EXPORT_SYMBOL(blk_queue_hardsect_size); |
714 | 714 | ||
715 | /* | 715 | /* |
716 | * Returns the minimum that is _not_ zero, unless both are zero. | 716 | * Returns the minimum that is _not_ zero, unless both are zero. |
717 | */ | 717 | */ |
718 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | 718 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) |
719 | 719 | ||
720 | /** | 720 | /** |
721 | * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers | 721 | * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers |
722 | * @t: the stacking driver (top) | 722 | * @t: the stacking driver (top) |
723 | * @b: the underlying device (bottom) | 723 | * @b: the underlying device (bottom) |
724 | **/ | 724 | **/ |
725 | void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) | 725 | void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) |
726 | { | 726 | { |
727 | /* zero is "infinity" */ | 727 | /* zero is "infinity" */ |
728 | t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); | 728 | t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); |
729 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); | 729 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); |
730 | 730 | ||
731 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); | 731 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); |
732 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); | 732 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); |
733 | t->max_segment_size = min(t->max_segment_size,b->max_segment_size); | 733 | t->max_segment_size = min(t->max_segment_size,b->max_segment_size); |
734 | t->hardsect_size = max(t->hardsect_size,b->hardsect_size); | 734 | t->hardsect_size = max(t->hardsect_size,b->hardsect_size); |
735 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) | 735 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) |
736 | clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); | 736 | clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); |
737 | } | 737 | } |
738 | 738 | ||
739 | EXPORT_SYMBOL(blk_queue_stack_limits); | 739 | EXPORT_SYMBOL(blk_queue_stack_limits); |
740 | 740 | ||
741 | /** | 741 | /** |
742 | * blk_queue_segment_boundary - set boundary rules for segment merging | 742 | * blk_queue_segment_boundary - set boundary rules for segment merging |
743 | * @q: the request queue for the device | 743 | * @q: the request queue for the device |
744 | * @mask: the memory boundary mask | 744 | * @mask: the memory boundary mask |
745 | **/ | 745 | **/ |
746 | void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) | 746 | void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) |
747 | { | 747 | { |
748 | if (mask < PAGE_CACHE_SIZE - 1) { | 748 | if (mask < PAGE_CACHE_SIZE - 1) { |
749 | mask = PAGE_CACHE_SIZE - 1; | 749 | mask = PAGE_CACHE_SIZE - 1; |
750 | printk("%s: set to minimum %lx\n", __FUNCTION__, mask); | 750 | printk("%s: set to minimum %lx\n", __FUNCTION__, mask); |
751 | } | 751 | } |
752 | 752 | ||
753 | q->seg_boundary_mask = mask; | 753 | q->seg_boundary_mask = mask; |
754 | } | 754 | } |
755 | 755 | ||
756 | EXPORT_SYMBOL(blk_queue_segment_boundary); | 756 | EXPORT_SYMBOL(blk_queue_segment_boundary); |
757 | 757 | ||
758 | /** | 758 | /** |
759 | * blk_queue_dma_alignment - set dma length and memory alignment | 759 | * blk_queue_dma_alignment - set dma length and memory alignment |
760 | * @q: the request queue for the device | 760 | * @q: the request queue for the device |
761 | * @mask: alignment mask | 761 | * @mask: alignment mask |
762 | * | 762 | * |
763 | * description: | 763 | * description: |
764 | * set required memory and length aligment for direct dma transactions. | 764 | * set required memory and length aligment for direct dma transactions. |
765 | * this is used when buiding direct io requests for the queue. | 765 | * this is used when buiding direct io requests for the queue. |
766 | * | 766 | * |
767 | **/ | 767 | **/ |
768 | void blk_queue_dma_alignment(struct request_queue *q, int mask) | 768 | void blk_queue_dma_alignment(struct request_queue *q, int mask) |
769 | { | 769 | { |
770 | q->dma_alignment = mask; | 770 | q->dma_alignment = mask; |
771 | } | 771 | } |
772 | 772 | ||
773 | EXPORT_SYMBOL(blk_queue_dma_alignment); | 773 | EXPORT_SYMBOL(blk_queue_dma_alignment); |
774 | 774 | ||
775 | /** | 775 | /** |
776 | * blk_queue_find_tag - find a request by its tag and queue | 776 | * blk_queue_find_tag - find a request by its tag and queue |
777 | * @q: The request queue for the device | 777 | * @q: The request queue for the device |
778 | * @tag: The tag of the request | 778 | * @tag: The tag of the request |
779 | * | 779 | * |
780 | * Notes: | 780 | * Notes: |
781 | * Should be used when a device returns a tag and you want to match | 781 | * Should be used when a device returns a tag and you want to match |
782 | * it with a request. | 782 | * it with a request. |
783 | * | 783 | * |
784 | * no locks need be held. | 784 | * no locks need be held. |
785 | **/ | 785 | **/ |
786 | struct request *blk_queue_find_tag(struct request_queue *q, int tag) | 786 | struct request *blk_queue_find_tag(struct request_queue *q, int tag) |
787 | { | 787 | { |
788 | return blk_map_queue_find_tag(q->queue_tags, tag); | 788 | return blk_map_queue_find_tag(q->queue_tags, tag); |
789 | } | 789 | } |
790 | 790 | ||
791 | EXPORT_SYMBOL(blk_queue_find_tag); | 791 | EXPORT_SYMBOL(blk_queue_find_tag); |
792 | 792 | ||
793 | /** | 793 | /** |
794 | * __blk_free_tags - release a given set of tag maintenance info | 794 | * __blk_free_tags - release a given set of tag maintenance info |
795 | * @bqt: the tag map to free | 795 | * @bqt: the tag map to free |
796 | * | 796 | * |
797 | * Tries to free the specified @bqt@. Returns true if it was | 797 | * Tries to free the specified @bqt@. Returns true if it was |
798 | * actually freed and false if there are still references using it | 798 | * actually freed and false if there are still references using it |
799 | */ | 799 | */ |
800 | static int __blk_free_tags(struct blk_queue_tag *bqt) | 800 | static int __blk_free_tags(struct blk_queue_tag *bqt) |
801 | { | 801 | { |
802 | int retval; | 802 | int retval; |
803 | 803 | ||
804 | retval = atomic_dec_and_test(&bqt->refcnt); | 804 | retval = atomic_dec_and_test(&bqt->refcnt); |
805 | if (retval) { | 805 | if (retval) { |
806 | BUG_ON(bqt->busy); | 806 | BUG_ON(bqt->busy); |
807 | BUG_ON(!list_empty(&bqt->busy_list)); | 807 | BUG_ON(!list_empty(&bqt->busy_list)); |
808 | 808 | ||
809 | kfree(bqt->tag_index); | 809 | kfree(bqt->tag_index); |
810 | bqt->tag_index = NULL; | 810 | bqt->tag_index = NULL; |
811 | 811 | ||
812 | kfree(bqt->tag_map); | 812 | kfree(bqt->tag_map); |
813 | bqt->tag_map = NULL; | 813 | bqt->tag_map = NULL; |
814 | 814 | ||
815 | kfree(bqt); | 815 | kfree(bqt); |
816 | 816 | ||
817 | } | 817 | } |
818 | 818 | ||
819 | return retval; | 819 | return retval; |
820 | } | 820 | } |
821 | 821 | ||
822 | /** | 822 | /** |
823 | * __blk_queue_free_tags - release tag maintenance info | 823 | * __blk_queue_free_tags - release tag maintenance info |
824 | * @q: the request queue for the device | 824 | * @q: the request queue for the device |
825 | * | 825 | * |
826 | * Notes: | 826 | * Notes: |
827 | * blk_cleanup_queue() will take care of calling this function, if tagging | 827 | * blk_cleanup_queue() will take care of calling this function, if tagging |
828 | * has been used. So there's no need to call this directly. | 828 | * has been used. So there's no need to call this directly. |
829 | **/ | 829 | **/ |
830 | static void __blk_queue_free_tags(struct request_queue *q) | 830 | static void __blk_queue_free_tags(struct request_queue *q) |
831 | { | 831 | { |
832 | struct blk_queue_tag *bqt = q->queue_tags; | 832 | struct blk_queue_tag *bqt = q->queue_tags; |
833 | 833 | ||
834 | if (!bqt) | 834 | if (!bqt) |
835 | return; | 835 | return; |
836 | 836 | ||
837 | __blk_free_tags(bqt); | 837 | __blk_free_tags(bqt); |
838 | 838 | ||
839 | q->queue_tags = NULL; | 839 | q->queue_tags = NULL; |
840 | q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); | 840 | q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); |
841 | } | 841 | } |
842 | 842 | ||
843 | 843 | ||
844 | /** | 844 | /** |
845 | * blk_free_tags - release a given set of tag maintenance info | 845 | * blk_free_tags - release a given set of tag maintenance info |
846 | * @bqt: the tag map to free | 846 | * @bqt: the tag map to free |
847 | * | 847 | * |
848 | * For externally managed @bqt@ frees the map. Callers of this | 848 | * For externally managed @bqt@ frees the map. Callers of this |
849 | * function must guarantee to have released all the queues that | 849 | * function must guarantee to have released all the queues that |
850 | * might have been using this tag map. | 850 | * might have been using this tag map. |
851 | */ | 851 | */ |
852 | void blk_free_tags(struct blk_queue_tag *bqt) | 852 | void blk_free_tags(struct blk_queue_tag *bqt) |
853 | { | 853 | { |
854 | if (unlikely(!__blk_free_tags(bqt))) | 854 | if (unlikely(!__blk_free_tags(bqt))) |
855 | BUG(); | 855 | BUG(); |
856 | } | 856 | } |
857 | EXPORT_SYMBOL(blk_free_tags); | 857 | EXPORT_SYMBOL(blk_free_tags); |
858 | 858 | ||
859 | /** | 859 | /** |
860 | * blk_queue_free_tags - release tag maintenance info | 860 | * blk_queue_free_tags - release tag maintenance info |
861 | * @q: the request queue for the device | 861 | * @q: the request queue for the device |
862 | * | 862 | * |
863 | * Notes: | 863 | * Notes: |
864 | * This is used to disabled tagged queuing to a device, yet leave | 864 | * This is used to disabled tagged queuing to a device, yet leave |
865 | * queue in function. | 865 | * queue in function. |
866 | **/ | 866 | **/ |
867 | void blk_queue_free_tags(struct request_queue *q) | 867 | void blk_queue_free_tags(struct request_queue *q) |
868 | { | 868 | { |
869 | clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | 869 | clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); |
870 | } | 870 | } |
871 | 871 | ||
872 | EXPORT_SYMBOL(blk_queue_free_tags); | 872 | EXPORT_SYMBOL(blk_queue_free_tags); |
873 | 873 | ||
874 | static int | 874 | static int |
875 | init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth) | 875 | init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth) |
876 | { | 876 | { |
877 | struct request **tag_index; | 877 | struct request **tag_index; |
878 | unsigned long *tag_map; | 878 | unsigned long *tag_map; |
879 | int nr_ulongs; | 879 | int nr_ulongs; |
880 | 880 | ||
881 | if (q && depth > q->nr_requests * 2) { | 881 | if (q && depth > q->nr_requests * 2) { |
882 | depth = q->nr_requests * 2; | 882 | depth = q->nr_requests * 2; |
883 | printk(KERN_ERR "%s: adjusted depth to %d\n", | 883 | printk(KERN_ERR "%s: adjusted depth to %d\n", |
884 | __FUNCTION__, depth); | 884 | __FUNCTION__, depth); |
885 | } | 885 | } |
886 | 886 | ||
887 | tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); | 887 | tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); |
888 | if (!tag_index) | 888 | if (!tag_index) |
889 | goto fail; | 889 | goto fail; |
890 | 890 | ||
891 | nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; | 891 | nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; |
892 | tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); | 892 | tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); |
893 | if (!tag_map) | 893 | if (!tag_map) |
894 | goto fail; | 894 | goto fail; |
895 | 895 | ||
896 | tags->real_max_depth = depth; | 896 | tags->real_max_depth = depth; |
897 | tags->max_depth = depth; | 897 | tags->max_depth = depth; |
898 | tags->tag_index = tag_index; | 898 | tags->tag_index = tag_index; |
899 | tags->tag_map = tag_map; | 899 | tags->tag_map = tag_map; |
900 | 900 | ||
901 | return 0; | 901 | return 0; |
902 | fail: | 902 | fail: |
903 | kfree(tag_index); | 903 | kfree(tag_index); |
904 | return -ENOMEM; | 904 | return -ENOMEM; |
905 | } | 905 | } |
906 | 906 | ||
907 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, | 907 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, |
908 | int depth) | 908 | int depth) |
909 | { | 909 | { |
910 | struct blk_queue_tag *tags; | 910 | struct blk_queue_tag *tags; |
911 | 911 | ||
912 | tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); | 912 | tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); |
913 | if (!tags) | 913 | if (!tags) |
914 | goto fail; | 914 | goto fail; |
915 | 915 | ||
916 | if (init_tag_map(q, tags, depth)) | 916 | if (init_tag_map(q, tags, depth)) |
917 | goto fail; | 917 | goto fail; |
918 | 918 | ||
919 | INIT_LIST_HEAD(&tags->busy_list); | 919 | INIT_LIST_HEAD(&tags->busy_list); |
920 | tags->busy = 0; | 920 | tags->busy = 0; |
921 | atomic_set(&tags->refcnt, 1); | 921 | atomic_set(&tags->refcnt, 1); |
922 | return tags; | 922 | return tags; |
923 | fail: | 923 | fail: |
924 | kfree(tags); | 924 | kfree(tags); |
925 | return NULL; | 925 | return NULL; |
926 | } | 926 | } |
927 | 927 | ||
928 | /** | 928 | /** |
929 | * blk_init_tags - initialize the tag info for an external tag map | 929 | * blk_init_tags - initialize the tag info for an external tag map |
930 | * @depth: the maximum queue depth supported | 930 | * @depth: the maximum queue depth supported |
931 | * @tags: the tag to use | 931 | * @tags: the tag to use |
932 | **/ | 932 | **/ |
933 | struct blk_queue_tag *blk_init_tags(int depth) | 933 | struct blk_queue_tag *blk_init_tags(int depth) |
934 | { | 934 | { |
935 | return __blk_queue_init_tags(NULL, depth); | 935 | return __blk_queue_init_tags(NULL, depth); |
936 | } | 936 | } |
937 | EXPORT_SYMBOL(blk_init_tags); | 937 | EXPORT_SYMBOL(blk_init_tags); |
938 | 938 | ||
939 | /** | 939 | /** |
940 | * blk_queue_init_tags - initialize the queue tag info | 940 | * blk_queue_init_tags - initialize the queue tag info |
941 | * @q: the request queue for the device | 941 | * @q: the request queue for the device |
942 | * @depth: the maximum queue depth supported | 942 | * @depth: the maximum queue depth supported |
943 | * @tags: the tag to use | 943 | * @tags: the tag to use |
944 | **/ | 944 | **/ |
945 | int blk_queue_init_tags(struct request_queue *q, int depth, | 945 | int blk_queue_init_tags(struct request_queue *q, int depth, |
946 | struct blk_queue_tag *tags) | 946 | struct blk_queue_tag *tags) |
947 | { | 947 | { |
948 | int rc; | 948 | int rc; |
949 | 949 | ||
950 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); | 950 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); |
951 | 951 | ||
952 | if (!tags && !q->queue_tags) { | 952 | if (!tags && !q->queue_tags) { |
953 | tags = __blk_queue_init_tags(q, depth); | 953 | tags = __blk_queue_init_tags(q, depth); |
954 | 954 | ||
955 | if (!tags) | 955 | if (!tags) |
956 | goto fail; | 956 | goto fail; |
957 | } else if (q->queue_tags) { | 957 | } else if (q->queue_tags) { |
958 | if ((rc = blk_queue_resize_tags(q, depth))) | 958 | if ((rc = blk_queue_resize_tags(q, depth))) |
959 | return rc; | 959 | return rc; |
960 | set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | 960 | set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); |
961 | return 0; | 961 | return 0; |
962 | } else | 962 | } else |
963 | atomic_inc(&tags->refcnt); | 963 | atomic_inc(&tags->refcnt); |
964 | 964 | ||
965 | /* | 965 | /* |
966 | * assign it, all done | 966 | * assign it, all done |
967 | */ | 967 | */ |
968 | q->queue_tags = tags; | 968 | q->queue_tags = tags; |
969 | q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); | 969 | q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); |
970 | return 0; | 970 | return 0; |
971 | fail: | 971 | fail: |
972 | kfree(tags); | 972 | kfree(tags); |
973 | return -ENOMEM; | 973 | return -ENOMEM; |
974 | } | 974 | } |
975 | 975 | ||
976 | EXPORT_SYMBOL(blk_queue_init_tags); | 976 | EXPORT_SYMBOL(blk_queue_init_tags); |
977 | 977 | ||
978 | /** | 978 | /** |
979 | * blk_queue_resize_tags - change the queueing depth | 979 | * blk_queue_resize_tags - change the queueing depth |
980 | * @q: the request queue for the device | 980 | * @q: the request queue for the device |
981 | * @new_depth: the new max command queueing depth | 981 | * @new_depth: the new max command queueing depth |
982 | * | 982 | * |
983 | * Notes: | 983 | * Notes: |
984 | * Must be called with the queue lock held. | 984 | * Must be called with the queue lock held. |
985 | **/ | 985 | **/ |
986 | int blk_queue_resize_tags(struct request_queue *q, int new_depth) | 986 | int blk_queue_resize_tags(struct request_queue *q, int new_depth) |
987 | { | 987 | { |
988 | struct blk_queue_tag *bqt = q->queue_tags; | 988 | struct blk_queue_tag *bqt = q->queue_tags; |
989 | struct request **tag_index; | 989 | struct request **tag_index; |
990 | unsigned long *tag_map; | 990 | unsigned long *tag_map; |
991 | int max_depth, nr_ulongs; | 991 | int max_depth, nr_ulongs; |
992 | 992 | ||
993 | if (!bqt) | 993 | if (!bqt) |
994 | return -ENXIO; | 994 | return -ENXIO; |
995 | 995 | ||
996 | /* | 996 | /* |
997 | * if we already have large enough real_max_depth. just | 997 | * if we already have large enough real_max_depth. just |
998 | * adjust max_depth. *NOTE* as requests with tag value | 998 | * adjust max_depth. *NOTE* as requests with tag value |
999 | * between new_depth and real_max_depth can be in-flight, tag | 999 | * between new_depth and real_max_depth can be in-flight, tag |
1000 | * map can not be shrunk blindly here. | 1000 | * map can not be shrunk blindly here. |
1001 | */ | 1001 | */ |
1002 | if (new_depth <= bqt->real_max_depth) { | 1002 | if (new_depth <= bqt->real_max_depth) { |
1003 | bqt->max_depth = new_depth; | 1003 | bqt->max_depth = new_depth; |
1004 | return 0; | 1004 | return 0; |
1005 | } | 1005 | } |
1006 | 1006 | ||
1007 | /* | 1007 | /* |
1008 | * Currently cannot replace a shared tag map with a new | 1008 | * Currently cannot replace a shared tag map with a new |
1009 | * one, so error out if this is the case | 1009 | * one, so error out if this is the case |
1010 | */ | 1010 | */ |
1011 | if (atomic_read(&bqt->refcnt) != 1) | 1011 | if (atomic_read(&bqt->refcnt) != 1) |
1012 | return -EBUSY; | 1012 | return -EBUSY; |
1013 | 1013 | ||
1014 | /* | 1014 | /* |
1015 | * save the old state info, so we can copy it back | 1015 | * save the old state info, so we can copy it back |
1016 | */ | 1016 | */ |
1017 | tag_index = bqt->tag_index; | 1017 | tag_index = bqt->tag_index; |
1018 | tag_map = bqt->tag_map; | 1018 | tag_map = bqt->tag_map; |
1019 | max_depth = bqt->real_max_depth; | 1019 | max_depth = bqt->real_max_depth; |
1020 | 1020 | ||
1021 | if (init_tag_map(q, bqt, new_depth)) | 1021 | if (init_tag_map(q, bqt, new_depth)) |
1022 | return -ENOMEM; | 1022 | return -ENOMEM; |
1023 | 1023 | ||
1024 | memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); | 1024 | memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); |
1025 | nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; | 1025 | nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; |
1026 | memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); | 1026 | memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); |
1027 | 1027 | ||
1028 | kfree(tag_index); | 1028 | kfree(tag_index); |
1029 | kfree(tag_map); | 1029 | kfree(tag_map); |
1030 | return 0; | 1030 | return 0; |
1031 | } | 1031 | } |
1032 | 1032 | ||
1033 | EXPORT_SYMBOL(blk_queue_resize_tags); | 1033 | EXPORT_SYMBOL(blk_queue_resize_tags); |
1034 | 1034 | ||
1035 | /** | 1035 | /** |
1036 | * blk_queue_end_tag - end tag operations for a request | 1036 | * blk_queue_end_tag - end tag operations for a request |
1037 | * @q: the request queue for the device | 1037 | * @q: the request queue for the device |
1038 | * @rq: the request that has completed | 1038 | * @rq: the request that has completed |
1039 | * | 1039 | * |
1040 | * Description: | 1040 | * Description: |
1041 | * Typically called when end_that_request_first() returns 0, meaning | 1041 | * Typically called when end_that_request_first() returns 0, meaning |
1042 | * all transfers have been done for a request. It's important to call | 1042 | * all transfers have been done for a request. It's important to call |
1043 | * this function before end_that_request_last(), as that will put the | 1043 | * this function before end_that_request_last(), as that will put the |
1044 | * request back on the free list thus corrupting the internal tag list. | 1044 | * request back on the free list thus corrupting the internal tag list. |
1045 | * | 1045 | * |
1046 | * Notes: | 1046 | * Notes: |
1047 | * queue lock must be held. | 1047 | * queue lock must be held. |
1048 | **/ | 1048 | **/ |
1049 | void blk_queue_end_tag(struct request_queue *q, struct request *rq) | 1049 | void blk_queue_end_tag(struct request_queue *q, struct request *rq) |
1050 | { | 1050 | { |
1051 | struct blk_queue_tag *bqt = q->queue_tags; | 1051 | struct blk_queue_tag *bqt = q->queue_tags; |
1052 | int tag = rq->tag; | 1052 | int tag = rq->tag; |
1053 | 1053 | ||
1054 | BUG_ON(tag == -1); | 1054 | BUG_ON(tag == -1); |
1055 | 1055 | ||
1056 | if (unlikely(tag >= bqt->real_max_depth)) | 1056 | if (unlikely(tag >= bqt->real_max_depth)) |
1057 | /* | 1057 | /* |
1058 | * This can happen after tag depth has been reduced. | 1058 | * This can happen after tag depth has been reduced. |
1059 | * FIXME: how about a warning or info message here? | 1059 | * FIXME: how about a warning or info message here? |
1060 | */ | 1060 | */ |
1061 | return; | 1061 | return; |
1062 | 1062 | ||
1063 | list_del_init(&rq->queuelist); | 1063 | list_del_init(&rq->queuelist); |
1064 | rq->cmd_flags &= ~REQ_QUEUED; | 1064 | rq->cmd_flags &= ~REQ_QUEUED; |
1065 | rq->tag = -1; | 1065 | rq->tag = -1; |
1066 | 1066 | ||
1067 | if (unlikely(bqt->tag_index[tag] == NULL)) | 1067 | if (unlikely(bqt->tag_index[tag] == NULL)) |
1068 | printk(KERN_ERR "%s: tag %d is missing\n", | 1068 | printk(KERN_ERR "%s: tag %d is missing\n", |
1069 | __FUNCTION__, tag); | 1069 | __FUNCTION__, tag); |
1070 | 1070 | ||
1071 | bqt->tag_index[tag] = NULL; | 1071 | bqt->tag_index[tag] = NULL; |
1072 | 1072 | ||
1073 | /* | 1073 | /* |
1074 | * We use test_and_clear_bit's memory ordering properties here. | 1074 | * We use test_and_clear_bit's memory ordering properties here. |
1075 | * The tag_map bit acts as a lock for tag_index[bit], so we need | 1075 | * The tag_map bit acts as a lock for tag_index[bit], so we need |
1076 | * a barrer before clearing the bit (precisely: release semantics). | 1076 | * a barrer before clearing the bit (precisely: release semantics). |
1077 | * Could use clear_bit_unlock when it is merged. | 1077 | * Could use clear_bit_unlock when it is merged. |
1078 | */ | 1078 | */ |
1079 | if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) { | 1079 | if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) { |
1080 | printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", | 1080 | printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", |
1081 | __FUNCTION__, tag); | 1081 | __FUNCTION__, tag); |
1082 | return; | 1082 | return; |
1083 | } | 1083 | } |
1084 | 1084 | ||
1085 | bqt->busy--; | 1085 | bqt->busy--; |
1086 | } | 1086 | } |
1087 | 1087 | ||
1088 | EXPORT_SYMBOL(blk_queue_end_tag); | 1088 | EXPORT_SYMBOL(blk_queue_end_tag); |
1089 | 1089 | ||
1090 | /** | 1090 | /** |
1091 | * blk_queue_start_tag - find a free tag and assign it | 1091 | * blk_queue_start_tag - find a free tag and assign it |
1092 | * @q: the request queue for the device | 1092 | * @q: the request queue for the device |
1093 | * @rq: the block request that needs tagging | 1093 | * @rq: the block request that needs tagging |
1094 | * | 1094 | * |
1095 | * Description: | 1095 | * Description: |
1096 | * This can either be used as a stand-alone helper, or possibly be | 1096 | * This can either be used as a stand-alone helper, or possibly be |
1097 | * assigned as the queue &prep_rq_fn (in which case &struct request | 1097 | * assigned as the queue &prep_rq_fn (in which case &struct request |
1098 | * automagically gets a tag assigned). Note that this function | 1098 | * automagically gets a tag assigned). Note that this function |
1099 | * assumes that any type of request can be queued! if this is not | 1099 | * assumes that any type of request can be queued! if this is not |
1100 | * true for your device, you must check the request type before | 1100 | * true for your device, you must check the request type before |
1101 | * calling this function. The request will also be removed from | 1101 | * calling this function. The request will also be removed from |
1102 | * the request queue, so it's the drivers responsibility to readd | 1102 | * the request queue, so it's the drivers responsibility to readd |
1103 | * it if it should need to be restarted for some reason. | 1103 | * it if it should need to be restarted for some reason. |
1104 | * | 1104 | * |
1105 | * Notes: | 1105 | * Notes: |
1106 | * queue lock must be held. | 1106 | * queue lock must be held. |
1107 | **/ | 1107 | **/ |
1108 | int blk_queue_start_tag(struct request_queue *q, struct request *rq) | 1108 | int blk_queue_start_tag(struct request_queue *q, struct request *rq) |
1109 | { | 1109 | { |
1110 | struct blk_queue_tag *bqt = q->queue_tags; | 1110 | struct blk_queue_tag *bqt = q->queue_tags; |
1111 | int tag; | 1111 | int tag; |
1112 | 1112 | ||
1113 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { | 1113 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { |
1114 | printk(KERN_ERR | 1114 | printk(KERN_ERR |
1115 | "%s: request %p for device [%s] already tagged %d", | 1115 | "%s: request %p for device [%s] already tagged %d", |
1116 | __FUNCTION__, rq, | 1116 | __FUNCTION__, rq, |
1117 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); | 1117 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); |
1118 | BUG(); | 1118 | BUG(); |
1119 | } | 1119 | } |
1120 | 1120 | ||
1121 | /* | 1121 | /* |
1122 | * Protect against shared tag maps, as we may not have exclusive | 1122 | * Protect against shared tag maps, as we may not have exclusive |
1123 | * access to the tag map. | 1123 | * access to the tag map. |
1124 | */ | 1124 | */ |
1125 | do { | 1125 | do { |
1126 | tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); | 1126 | tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); |
1127 | if (tag >= bqt->max_depth) | 1127 | if (tag >= bqt->max_depth) |
1128 | return 1; | 1128 | return 1; |
1129 | 1129 | ||
1130 | } while (test_and_set_bit(tag, bqt->tag_map)); | 1130 | } while (test_and_set_bit(tag, bqt->tag_map)); |
1131 | /* | 1131 | /* |
1132 | * We rely on test_and_set_bit providing lock memory ordering semantics | 1132 | * We rely on test_and_set_bit providing lock memory ordering semantics |
1133 | * (could use test_and_set_bit_lock when it is merged). | 1133 | * (could use test_and_set_bit_lock when it is merged). |
1134 | */ | 1134 | */ |
1135 | 1135 | ||
1136 | rq->cmd_flags |= REQ_QUEUED; | 1136 | rq->cmd_flags |= REQ_QUEUED; |
1137 | rq->tag = tag; | 1137 | rq->tag = tag; |
1138 | bqt->tag_index[tag] = rq; | 1138 | bqt->tag_index[tag] = rq; |
1139 | blkdev_dequeue_request(rq); | 1139 | blkdev_dequeue_request(rq); |
1140 | list_add(&rq->queuelist, &bqt->busy_list); | 1140 | list_add(&rq->queuelist, &bqt->busy_list); |
1141 | bqt->busy++; | 1141 | bqt->busy++; |
1142 | return 0; | 1142 | return 0; |
1143 | } | 1143 | } |
1144 | 1144 | ||
1145 | EXPORT_SYMBOL(blk_queue_start_tag); | 1145 | EXPORT_SYMBOL(blk_queue_start_tag); |
1146 | 1146 | ||
1147 | /** | 1147 | /** |
1148 | * blk_queue_invalidate_tags - invalidate all pending tags | 1148 | * blk_queue_invalidate_tags - invalidate all pending tags |
1149 | * @q: the request queue for the device | 1149 | * @q: the request queue for the device |
1150 | * | 1150 | * |
1151 | * Description: | 1151 | * Description: |
1152 | * Hardware conditions may dictate a need to stop all pending requests. | 1152 | * Hardware conditions may dictate a need to stop all pending requests. |
1153 | * In this case, we will safely clear the block side of the tag queue and | 1153 | * In this case, we will safely clear the block side of the tag queue and |
1154 | * readd all requests to the request queue in the right order. | 1154 | * readd all requests to the request queue in the right order. |
1155 | * | 1155 | * |
1156 | * Notes: | 1156 | * Notes: |
1157 | * queue lock must be held. | 1157 | * queue lock must be held. |
1158 | **/ | 1158 | **/ |
1159 | void blk_queue_invalidate_tags(struct request_queue *q) | 1159 | void blk_queue_invalidate_tags(struct request_queue *q) |
1160 | { | 1160 | { |
1161 | struct blk_queue_tag *bqt = q->queue_tags; | 1161 | struct blk_queue_tag *bqt = q->queue_tags; |
1162 | struct list_head *tmp, *n; | 1162 | struct list_head *tmp, *n; |
1163 | struct request *rq; | 1163 | struct request *rq; |
1164 | 1164 | ||
1165 | list_for_each_safe(tmp, n, &bqt->busy_list) { | 1165 | list_for_each_safe(tmp, n, &bqt->busy_list) { |
1166 | rq = list_entry_rq(tmp); | 1166 | rq = list_entry_rq(tmp); |
1167 | 1167 | ||
1168 | if (rq->tag == -1) { | 1168 | if (rq->tag == -1) { |
1169 | printk(KERN_ERR | 1169 | printk(KERN_ERR |
1170 | "%s: bad tag found on list\n", __FUNCTION__); | 1170 | "%s: bad tag found on list\n", __FUNCTION__); |
1171 | list_del_init(&rq->queuelist); | 1171 | list_del_init(&rq->queuelist); |
1172 | rq->cmd_flags &= ~REQ_QUEUED; | 1172 | rq->cmd_flags &= ~REQ_QUEUED; |
1173 | } else | 1173 | } else |
1174 | blk_queue_end_tag(q, rq); | 1174 | blk_queue_end_tag(q, rq); |
1175 | 1175 | ||
1176 | rq->cmd_flags &= ~REQ_STARTED; | 1176 | rq->cmd_flags &= ~REQ_STARTED; |
1177 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); | 1177 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); |
1178 | } | 1178 | } |
1179 | } | 1179 | } |
1180 | 1180 | ||
1181 | EXPORT_SYMBOL(blk_queue_invalidate_tags); | 1181 | EXPORT_SYMBOL(blk_queue_invalidate_tags); |
1182 | 1182 | ||
1183 | void blk_dump_rq_flags(struct request *rq, char *msg) | 1183 | void blk_dump_rq_flags(struct request *rq, char *msg) |
1184 | { | 1184 | { |
1185 | int bit; | 1185 | int bit; |
1186 | 1186 | ||
1187 | printk("%s: dev %s: type=%x, flags=%x\n", msg, | 1187 | printk("%s: dev %s: type=%x, flags=%x\n", msg, |
1188 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, | 1188 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, |
1189 | rq->cmd_flags); | 1189 | rq->cmd_flags); |
1190 | 1190 | ||
1191 | printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, | 1191 | printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, |
1192 | rq->nr_sectors, | 1192 | rq->nr_sectors, |
1193 | rq->current_nr_sectors); | 1193 | rq->current_nr_sectors); |
1194 | printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); | 1194 | printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); |
1195 | 1195 | ||
1196 | if (blk_pc_request(rq)) { | 1196 | if (blk_pc_request(rq)) { |
1197 | printk("cdb: "); | 1197 | printk("cdb: "); |
1198 | for (bit = 0; bit < sizeof(rq->cmd); bit++) | 1198 | for (bit = 0; bit < sizeof(rq->cmd); bit++) |
1199 | printk("%02x ", rq->cmd[bit]); | 1199 | printk("%02x ", rq->cmd[bit]); |
1200 | printk("\n"); | 1200 | printk("\n"); |
1201 | } | 1201 | } |
1202 | } | 1202 | } |
1203 | 1203 | ||
1204 | EXPORT_SYMBOL(blk_dump_rq_flags); | 1204 | EXPORT_SYMBOL(blk_dump_rq_flags); |
1205 | 1205 | ||
1206 | void blk_recount_segments(struct request_queue *q, struct bio *bio) | 1206 | void blk_recount_segments(struct request_queue *q, struct bio *bio) |
1207 | { | 1207 | { |
1208 | struct request rq; | 1208 | struct request rq; |
1209 | struct bio *nxt = bio->bi_next; | 1209 | struct bio *nxt = bio->bi_next; |
1210 | rq.q = q; | 1210 | rq.q = q; |
1211 | rq.bio = rq.biotail = bio; | 1211 | rq.bio = rq.biotail = bio; |
1212 | bio->bi_next = NULL; | 1212 | bio->bi_next = NULL; |
1213 | blk_recalc_rq_segments(&rq); | 1213 | blk_recalc_rq_segments(&rq); |
1214 | bio->bi_next = nxt; | 1214 | bio->bi_next = nxt; |
1215 | bio->bi_phys_segments = rq.nr_phys_segments; | 1215 | bio->bi_phys_segments = rq.nr_phys_segments; |
1216 | bio->bi_hw_segments = rq.nr_hw_segments; | 1216 | bio->bi_hw_segments = rq.nr_hw_segments; |
1217 | bio->bi_flags |= (1 << BIO_SEG_VALID); | 1217 | bio->bi_flags |= (1 << BIO_SEG_VALID); |
1218 | } | 1218 | } |
1219 | EXPORT_SYMBOL(blk_recount_segments); | 1219 | EXPORT_SYMBOL(blk_recount_segments); |
1220 | 1220 | ||
1221 | static void blk_recalc_rq_segments(struct request *rq) | 1221 | static void blk_recalc_rq_segments(struct request *rq) |
1222 | { | 1222 | { |
1223 | int nr_phys_segs; | 1223 | int nr_phys_segs; |
1224 | int nr_hw_segs; | 1224 | int nr_hw_segs; |
1225 | unsigned int phys_size; | 1225 | unsigned int phys_size; |
1226 | unsigned int hw_size; | 1226 | unsigned int hw_size; |
1227 | struct bio_vec *bv, *bvprv = NULL; | 1227 | struct bio_vec *bv, *bvprv = NULL; |
1228 | int seg_size; | 1228 | int seg_size; |
1229 | int hw_seg_size; | 1229 | int hw_seg_size; |
1230 | int cluster; | 1230 | int cluster; |
1231 | struct req_iterator iter; | 1231 | struct req_iterator iter; |
1232 | int high, highprv = 1; | 1232 | int high, highprv = 1; |
1233 | struct request_queue *q = rq->q; | 1233 | struct request_queue *q = rq->q; |
1234 | 1234 | ||
1235 | if (!rq->bio) | 1235 | if (!rq->bio) |
1236 | return; | 1236 | return; |
1237 | 1237 | ||
1238 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); | 1238 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); |
1239 | hw_seg_size = seg_size = 0; | 1239 | hw_seg_size = seg_size = 0; |
1240 | phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; | 1240 | phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; |
1241 | rq_for_each_segment(bv, rq, iter) { | 1241 | rq_for_each_segment(bv, rq, iter) { |
1242 | /* | 1242 | /* |
1243 | * the trick here is making sure that a high page is never | 1243 | * the trick here is making sure that a high page is never |
1244 | * considered part of another segment, since that might | 1244 | * considered part of another segment, since that might |
1245 | * change with the bounce page. | 1245 | * change with the bounce page. |
1246 | */ | 1246 | */ |
1247 | high = page_to_pfn(bv->bv_page) > q->bounce_pfn; | 1247 | high = page_to_pfn(bv->bv_page) > q->bounce_pfn; |
1248 | if (high || highprv) | 1248 | if (high || highprv) |
1249 | goto new_hw_segment; | 1249 | goto new_hw_segment; |
1250 | if (cluster) { | 1250 | if (cluster) { |
1251 | if (seg_size + bv->bv_len > q->max_segment_size) | 1251 | if (seg_size + bv->bv_len > q->max_segment_size) |
1252 | goto new_segment; | 1252 | goto new_segment; |
1253 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) | 1253 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) |
1254 | goto new_segment; | 1254 | goto new_segment; |
1255 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) | 1255 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) |
1256 | goto new_segment; | 1256 | goto new_segment; |
1257 | if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | 1257 | if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) |
1258 | goto new_hw_segment; | 1258 | goto new_hw_segment; |
1259 | 1259 | ||
1260 | seg_size += bv->bv_len; | 1260 | seg_size += bv->bv_len; |
1261 | hw_seg_size += bv->bv_len; | 1261 | hw_seg_size += bv->bv_len; |
1262 | bvprv = bv; | 1262 | bvprv = bv; |
1263 | continue; | 1263 | continue; |
1264 | } | 1264 | } |
1265 | new_segment: | 1265 | new_segment: |
1266 | if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && | 1266 | if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && |
1267 | !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | 1267 | !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) |
1268 | hw_seg_size += bv->bv_len; | 1268 | hw_seg_size += bv->bv_len; |
1269 | else { | 1269 | else { |
1270 | new_hw_segment: | 1270 | new_hw_segment: |
1271 | if (nr_hw_segs == 1 && | 1271 | if (nr_hw_segs == 1 && |
1272 | hw_seg_size > rq->bio->bi_hw_front_size) | 1272 | hw_seg_size > rq->bio->bi_hw_front_size) |
1273 | rq->bio->bi_hw_front_size = hw_seg_size; | 1273 | rq->bio->bi_hw_front_size = hw_seg_size; |
1274 | hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; | 1274 | hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; |
1275 | nr_hw_segs++; | 1275 | nr_hw_segs++; |
1276 | } | 1276 | } |
1277 | 1277 | ||
1278 | nr_phys_segs++; | 1278 | nr_phys_segs++; |
1279 | bvprv = bv; | 1279 | bvprv = bv; |
1280 | seg_size = bv->bv_len; | 1280 | seg_size = bv->bv_len; |
1281 | highprv = high; | 1281 | highprv = high; |
1282 | } | 1282 | } |
1283 | 1283 | ||
1284 | if (nr_hw_segs == 1 && | 1284 | if (nr_hw_segs == 1 && |
1285 | hw_seg_size > rq->bio->bi_hw_front_size) | 1285 | hw_seg_size > rq->bio->bi_hw_front_size) |
1286 | rq->bio->bi_hw_front_size = hw_seg_size; | 1286 | rq->bio->bi_hw_front_size = hw_seg_size; |
1287 | if (hw_seg_size > rq->biotail->bi_hw_back_size) | 1287 | if (hw_seg_size > rq->biotail->bi_hw_back_size) |
1288 | rq->biotail->bi_hw_back_size = hw_seg_size; | 1288 | rq->biotail->bi_hw_back_size = hw_seg_size; |
1289 | rq->nr_phys_segments = nr_phys_segs; | 1289 | rq->nr_phys_segments = nr_phys_segs; |
1290 | rq->nr_hw_segments = nr_hw_segs; | 1290 | rq->nr_hw_segments = nr_hw_segs; |
1291 | } | 1291 | } |
1292 | 1292 | ||
1293 | static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, | 1293 | static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, |
1294 | struct bio *nxt) | 1294 | struct bio *nxt) |
1295 | { | 1295 | { |
1296 | if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) | 1296 | if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) |
1297 | return 0; | 1297 | return 0; |
1298 | 1298 | ||
1299 | if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) | 1299 | if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) |
1300 | return 0; | 1300 | return 0; |
1301 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) | 1301 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) |
1302 | return 0; | 1302 | return 0; |
1303 | 1303 | ||
1304 | /* | 1304 | /* |
1305 | * bio and nxt are contigous in memory, check if the queue allows | 1305 | * bio and nxt are contigous in memory, check if the queue allows |
1306 | * these two to be merged into one | 1306 | * these two to be merged into one |
1307 | */ | 1307 | */ |
1308 | if (BIO_SEG_BOUNDARY(q, bio, nxt)) | 1308 | if (BIO_SEG_BOUNDARY(q, bio, nxt)) |
1309 | return 1; | 1309 | return 1; |
1310 | 1310 | ||
1311 | return 0; | 1311 | return 0; |
1312 | } | 1312 | } |
1313 | 1313 | ||
1314 | static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, | 1314 | static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, |
1315 | struct bio *nxt) | 1315 | struct bio *nxt) |
1316 | { | 1316 | { |
1317 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 1317 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
1318 | blk_recount_segments(q, bio); | 1318 | blk_recount_segments(q, bio); |
1319 | if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) | 1319 | if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) |
1320 | blk_recount_segments(q, nxt); | 1320 | blk_recount_segments(q, nxt); |
1321 | if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || | 1321 | if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || |
1322 | BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) | 1322 | BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) |
1323 | return 0; | 1323 | return 0; |
1324 | if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) | 1324 | if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) |
1325 | return 0; | 1325 | return 0; |
1326 | 1326 | ||
1327 | return 1; | 1327 | return 1; |
1328 | } | 1328 | } |
1329 | 1329 | ||
1330 | /* | 1330 | /* |
1331 | * map a request to scatterlist, return number of sg entries setup. Caller | 1331 | * map a request to scatterlist, return number of sg entries setup. Caller |
1332 | * must make sure sg can hold rq->nr_phys_segments entries | 1332 | * must make sure sg can hold rq->nr_phys_segments entries |
1333 | */ | 1333 | */ |
1334 | int blk_rq_map_sg(struct request_queue *q, struct request *rq, | 1334 | int blk_rq_map_sg(struct request_queue *q, struct request *rq, |
1335 | struct scatterlist *sg) | 1335 | struct scatterlist *sg) |
1336 | { | 1336 | { |
1337 | struct bio_vec *bvec, *bvprv; | 1337 | struct bio_vec *bvec, *bvprv; |
1338 | struct req_iterator iter; | 1338 | struct req_iterator iter; |
1339 | int nsegs, cluster; | 1339 | int nsegs, cluster; |
1340 | 1340 | ||
1341 | nsegs = 0; | 1341 | nsegs = 0; |
1342 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); | 1342 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); |
1343 | 1343 | ||
1344 | /* | 1344 | /* |
1345 | * for each bio in rq | 1345 | * for each bio in rq |
1346 | */ | 1346 | */ |
1347 | bvprv = NULL; | 1347 | bvprv = NULL; |
1348 | rq_for_each_segment(bvec, rq, iter) { | 1348 | rq_for_each_segment(bvec, rq, iter) { |
1349 | int nbytes = bvec->bv_len; | 1349 | int nbytes = bvec->bv_len; |
1350 | 1350 | ||
1351 | if (bvprv && cluster) { | 1351 | if (bvprv && cluster) { |
1352 | if (sg[nsegs - 1].length + nbytes > q->max_segment_size) | 1352 | if (sg[nsegs - 1].length + nbytes > q->max_segment_size) |
1353 | goto new_segment; | 1353 | goto new_segment; |
1354 | 1354 | ||
1355 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) | 1355 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) |
1356 | goto new_segment; | 1356 | goto new_segment; |
1357 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) | 1357 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) |
1358 | goto new_segment; | 1358 | goto new_segment; |
1359 | 1359 | ||
1360 | sg[nsegs - 1].length += nbytes; | 1360 | sg[nsegs - 1].length += nbytes; |
1361 | } else { | 1361 | } else { |
1362 | new_segment: | 1362 | new_segment: |
1363 | memset(&sg[nsegs],0,sizeof(struct scatterlist)); | 1363 | memset(&sg[nsegs],0,sizeof(struct scatterlist)); |
1364 | sg[nsegs].page = bvec->bv_page; | 1364 | sg[nsegs].page = bvec->bv_page; |
1365 | sg[nsegs].length = nbytes; | 1365 | sg[nsegs].length = nbytes; |
1366 | sg[nsegs].offset = bvec->bv_offset; | 1366 | sg[nsegs].offset = bvec->bv_offset; |
1367 | 1367 | ||
1368 | nsegs++; | 1368 | nsegs++; |
1369 | } | 1369 | } |
1370 | bvprv = bvec; | 1370 | bvprv = bvec; |
1371 | } /* segments in rq */ | 1371 | } /* segments in rq */ |
1372 | 1372 | ||
1373 | return nsegs; | 1373 | return nsegs; |
1374 | } | 1374 | } |
1375 | 1375 | ||
1376 | EXPORT_SYMBOL(blk_rq_map_sg); | 1376 | EXPORT_SYMBOL(blk_rq_map_sg); |
1377 | 1377 | ||
1378 | /* | 1378 | /* |
1379 | * the standard queue merge functions, can be overridden with device | 1379 | * the standard queue merge functions, can be overridden with device |
1380 | * specific ones if so desired | 1380 | * specific ones if so desired |
1381 | */ | 1381 | */ |
1382 | 1382 | ||
1383 | static inline int ll_new_mergeable(struct request_queue *q, | 1383 | static inline int ll_new_mergeable(struct request_queue *q, |
1384 | struct request *req, | 1384 | struct request *req, |
1385 | struct bio *bio) | 1385 | struct bio *bio) |
1386 | { | 1386 | { |
1387 | int nr_phys_segs = bio_phys_segments(q, bio); | 1387 | int nr_phys_segs = bio_phys_segments(q, bio); |
1388 | 1388 | ||
1389 | if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { | 1389 | if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { |
1390 | req->cmd_flags |= REQ_NOMERGE; | 1390 | req->cmd_flags |= REQ_NOMERGE; |
1391 | if (req == q->last_merge) | 1391 | if (req == q->last_merge) |
1392 | q->last_merge = NULL; | 1392 | q->last_merge = NULL; |
1393 | return 0; | 1393 | return 0; |
1394 | } | 1394 | } |
1395 | 1395 | ||
1396 | /* | 1396 | /* |
1397 | * A hw segment is just getting larger, bump just the phys | 1397 | * A hw segment is just getting larger, bump just the phys |
1398 | * counter. | 1398 | * counter. |
1399 | */ | 1399 | */ |
1400 | req->nr_phys_segments += nr_phys_segs; | 1400 | req->nr_phys_segments += nr_phys_segs; |
1401 | return 1; | 1401 | return 1; |
1402 | } | 1402 | } |
1403 | 1403 | ||
1404 | static inline int ll_new_hw_segment(struct request_queue *q, | 1404 | static inline int ll_new_hw_segment(struct request_queue *q, |
1405 | struct request *req, | 1405 | struct request *req, |
1406 | struct bio *bio) | 1406 | struct bio *bio) |
1407 | { | 1407 | { |
1408 | int nr_hw_segs = bio_hw_segments(q, bio); | 1408 | int nr_hw_segs = bio_hw_segments(q, bio); |
1409 | int nr_phys_segs = bio_phys_segments(q, bio); | 1409 | int nr_phys_segs = bio_phys_segments(q, bio); |
1410 | 1410 | ||
1411 | if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments | 1411 | if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments |
1412 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { | 1412 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { |
1413 | req->cmd_flags |= REQ_NOMERGE; | 1413 | req->cmd_flags |= REQ_NOMERGE; |
1414 | if (req == q->last_merge) | 1414 | if (req == q->last_merge) |
1415 | q->last_merge = NULL; | 1415 | q->last_merge = NULL; |
1416 | return 0; | 1416 | return 0; |
1417 | } | 1417 | } |
1418 | 1418 | ||
1419 | /* | 1419 | /* |
1420 | * This will form the start of a new hw segment. Bump both | 1420 | * This will form the start of a new hw segment. Bump both |
1421 | * counters. | 1421 | * counters. |
1422 | */ | 1422 | */ |
1423 | req->nr_hw_segments += nr_hw_segs; | 1423 | req->nr_hw_segments += nr_hw_segs; |
1424 | req->nr_phys_segments += nr_phys_segs; | 1424 | req->nr_phys_segments += nr_phys_segs; |
1425 | return 1; | 1425 | return 1; |
1426 | } | 1426 | } |
1427 | 1427 | ||
1428 | static int ll_back_merge_fn(struct request_queue *q, struct request *req, | 1428 | static int ll_back_merge_fn(struct request_queue *q, struct request *req, |
1429 | struct bio *bio) | 1429 | struct bio *bio) |
1430 | { | 1430 | { |
1431 | unsigned short max_sectors; | 1431 | unsigned short max_sectors; |
1432 | int len; | 1432 | int len; |
1433 | 1433 | ||
1434 | if (unlikely(blk_pc_request(req))) | 1434 | if (unlikely(blk_pc_request(req))) |
1435 | max_sectors = q->max_hw_sectors; | 1435 | max_sectors = q->max_hw_sectors; |
1436 | else | 1436 | else |
1437 | max_sectors = q->max_sectors; | 1437 | max_sectors = q->max_sectors; |
1438 | 1438 | ||
1439 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | 1439 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { |
1440 | req->cmd_flags |= REQ_NOMERGE; | 1440 | req->cmd_flags |= REQ_NOMERGE; |
1441 | if (req == q->last_merge) | 1441 | if (req == q->last_merge) |
1442 | q->last_merge = NULL; | 1442 | q->last_merge = NULL; |
1443 | return 0; | 1443 | return 0; |
1444 | } | 1444 | } |
1445 | if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) | 1445 | if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) |
1446 | blk_recount_segments(q, req->biotail); | 1446 | blk_recount_segments(q, req->biotail); |
1447 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 1447 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
1448 | blk_recount_segments(q, bio); | 1448 | blk_recount_segments(q, bio); |
1449 | len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; | 1449 | len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; |
1450 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && | 1450 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && |
1451 | !BIOVEC_VIRT_OVERSIZE(len)) { | 1451 | !BIOVEC_VIRT_OVERSIZE(len)) { |
1452 | int mergeable = ll_new_mergeable(q, req, bio); | 1452 | int mergeable = ll_new_mergeable(q, req, bio); |
1453 | 1453 | ||
1454 | if (mergeable) { | 1454 | if (mergeable) { |
1455 | if (req->nr_hw_segments == 1) | 1455 | if (req->nr_hw_segments == 1) |
1456 | req->bio->bi_hw_front_size = len; | 1456 | req->bio->bi_hw_front_size = len; |
1457 | if (bio->bi_hw_segments == 1) | 1457 | if (bio->bi_hw_segments == 1) |
1458 | bio->bi_hw_back_size = len; | 1458 | bio->bi_hw_back_size = len; |
1459 | } | 1459 | } |
1460 | return mergeable; | 1460 | return mergeable; |
1461 | } | 1461 | } |
1462 | 1462 | ||
1463 | return ll_new_hw_segment(q, req, bio); | 1463 | return ll_new_hw_segment(q, req, bio); |
1464 | } | 1464 | } |
1465 | 1465 | ||
1466 | static int ll_front_merge_fn(struct request_queue *q, struct request *req, | 1466 | static int ll_front_merge_fn(struct request_queue *q, struct request *req, |
1467 | struct bio *bio) | 1467 | struct bio *bio) |
1468 | { | 1468 | { |
1469 | unsigned short max_sectors; | 1469 | unsigned short max_sectors; |
1470 | int len; | 1470 | int len; |
1471 | 1471 | ||
1472 | if (unlikely(blk_pc_request(req))) | 1472 | if (unlikely(blk_pc_request(req))) |
1473 | max_sectors = q->max_hw_sectors; | 1473 | max_sectors = q->max_hw_sectors; |
1474 | else | 1474 | else |
1475 | max_sectors = q->max_sectors; | 1475 | max_sectors = q->max_sectors; |
1476 | 1476 | ||
1477 | 1477 | ||
1478 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | 1478 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { |
1479 | req->cmd_flags |= REQ_NOMERGE; | 1479 | req->cmd_flags |= REQ_NOMERGE; |
1480 | if (req == q->last_merge) | 1480 | if (req == q->last_merge) |
1481 | q->last_merge = NULL; | 1481 | q->last_merge = NULL; |
1482 | return 0; | 1482 | return 0; |
1483 | } | 1483 | } |
1484 | len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; | 1484 | len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; |
1485 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 1485 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
1486 | blk_recount_segments(q, bio); | 1486 | blk_recount_segments(q, bio); |
1487 | if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) | 1487 | if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) |
1488 | blk_recount_segments(q, req->bio); | 1488 | blk_recount_segments(q, req->bio); |
1489 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && | 1489 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && |
1490 | !BIOVEC_VIRT_OVERSIZE(len)) { | 1490 | !BIOVEC_VIRT_OVERSIZE(len)) { |
1491 | int mergeable = ll_new_mergeable(q, req, bio); | 1491 | int mergeable = ll_new_mergeable(q, req, bio); |
1492 | 1492 | ||
1493 | if (mergeable) { | 1493 | if (mergeable) { |
1494 | if (bio->bi_hw_segments == 1) | 1494 | if (bio->bi_hw_segments == 1) |
1495 | bio->bi_hw_front_size = len; | 1495 | bio->bi_hw_front_size = len; |
1496 | if (req->nr_hw_segments == 1) | 1496 | if (req->nr_hw_segments == 1) |
1497 | req->biotail->bi_hw_back_size = len; | 1497 | req->biotail->bi_hw_back_size = len; |
1498 | } | 1498 | } |
1499 | return mergeable; | 1499 | return mergeable; |
1500 | } | 1500 | } |
1501 | 1501 | ||
1502 | return ll_new_hw_segment(q, req, bio); | 1502 | return ll_new_hw_segment(q, req, bio); |
1503 | } | 1503 | } |
1504 | 1504 | ||
1505 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | 1505 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, |
1506 | struct request *next) | 1506 | struct request *next) |
1507 | { | 1507 | { |
1508 | int total_phys_segments; | 1508 | int total_phys_segments; |
1509 | int total_hw_segments; | 1509 | int total_hw_segments; |
1510 | 1510 | ||
1511 | /* | 1511 | /* |
1512 | * First check if the either of the requests are re-queued | 1512 | * First check if the either of the requests are re-queued |
1513 | * requests. Can't merge them if they are. | 1513 | * requests. Can't merge them if they are. |
1514 | */ | 1514 | */ |
1515 | if (req->special || next->special) | 1515 | if (req->special || next->special) |
1516 | return 0; | 1516 | return 0; |
1517 | 1517 | ||
1518 | /* | 1518 | /* |
1519 | * Will it become too large? | 1519 | * Will it become too large? |
1520 | */ | 1520 | */ |
1521 | if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) | 1521 | if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) |
1522 | return 0; | 1522 | return 0; |
1523 | 1523 | ||
1524 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; | 1524 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; |
1525 | if (blk_phys_contig_segment(q, req->biotail, next->bio)) | 1525 | if (blk_phys_contig_segment(q, req->biotail, next->bio)) |
1526 | total_phys_segments--; | 1526 | total_phys_segments--; |
1527 | 1527 | ||
1528 | if (total_phys_segments > q->max_phys_segments) | 1528 | if (total_phys_segments > q->max_phys_segments) |
1529 | return 0; | 1529 | return 0; |
1530 | 1530 | ||
1531 | total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; | 1531 | total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; |
1532 | if (blk_hw_contig_segment(q, req->biotail, next->bio)) { | 1532 | if (blk_hw_contig_segment(q, req->biotail, next->bio)) { |
1533 | int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; | 1533 | int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; |
1534 | /* | 1534 | /* |
1535 | * propagate the combined length to the end of the requests | 1535 | * propagate the combined length to the end of the requests |
1536 | */ | 1536 | */ |
1537 | if (req->nr_hw_segments == 1) | 1537 | if (req->nr_hw_segments == 1) |
1538 | req->bio->bi_hw_front_size = len; | 1538 | req->bio->bi_hw_front_size = len; |
1539 | if (next->nr_hw_segments == 1) | 1539 | if (next->nr_hw_segments == 1) |
1540 | next->biotail->bi_hw_back_size = len; | 1540 | next->biotail->bi_hw_back_size = len; |
1541 | total_hw_segments--; | 1541 | total_hw_segments--; |
1542 | } | 1542 | } |
1543 | 1543 | ||
1544 | if (total_hw_segments > q->max_hw_segments) | 1544 | if (total_hw_segments > q->max_hw_segments) |
1545 | return 0; | 1545 | return 0; |
1546 | 1546 | ||
1547 | /* Merge is OK... */ | 1547 | /* Merge is OK... */ |
1548 | req->nr_phys_segments = total_phys_segments; | 1548 | req->nr_phys_segments = total_phys_segments; |
1549 | req->nr_hw_segments = total_hw_segments; | 1549 | req->nr_hw_segments = total_hw_segments; |
1550 | return 1; | 1550 | return 1; |
1551 | } | 1551 | } |
1552 | 1552 | ||
1553 | /* | 1553 | /* |
1554 | * "plug" the device if there are no outstanding requests: this will | 1554 | * "plug" the device if there are no outstanding requests: this will |
1555 | * force the transfer to start only after we have put all the requests | 1555 | * force the transfer to start only after we have put all the requests |
1556 | * on the list. | 1556 | * on the list. |
1557 | * | 1557 | * |
1558 | * This is called with interrupts off and no requests on the queue and | 1558 | * This is called with interrupts off and no requests on the queue and |
1559 | * with the queue lock held. | 1559 | * with the queue lock held. |
1560 | */ | 1560 | */ |
1561 | void blk_plug_device(struct request_queue *q) | 1561 | void blk_plug_device(struct request_queue *q) |
1562 | { | 1562 | { |
1563 | WARN_ON(!irqs_disabled()); | 1563 | WARN_ON(!irqs_disabled()); |
1564 | 1564 | ||
1565 | /* | 1565 | /* |
1566 | * don't plug a stopped queue, it must be paired with blk_start_queue() | 1566 | * don't plug a stopped queue, it must be paired with blk_start_queue() |
1567 | * which will restart the queueing | 1567 | * which will restart the queueing |
1568 | */ | 1568 | */ |
1569 | if (blk_queue_stopped(q)) | 1569 | if (blk_queue_stopped(q)) |
1570 | return; | 1570 | return; |
1571 | 1571 | ||
1572 | if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { | 1572 | if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { |
1573 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); | 1573 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); |
1574 | blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); | 1574 | blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); |
1575 | } | 1575 | } |
1576 | } | 1576 | } |
1577 | 1577 | ||
1578 | EXPORT_SYMBOL(blk_plug_device); | 1578 | EXPORT_SYMBOL(blk_plug_device); |
1579 | 1579 | ||
1580 | /* | 1580 | /* |
1581 | * remove the queue from the plugged list, if present. called with | 1581 | * remove the queue from the plugged list, if present. called with |
1582 | * queue lock held and interrupts disabled. | 1582 | * queue lock held and interrupts disabled. |
1583 | */ | 1583 | */ |
1584 | int blk_remove_plug(struct request_queue *q) | 1584 | int blk_remove_plug(struct request_queue *q) |
1585 | { | 1585 | { |
1586 | WARN_ON(!irqs_disabled()); | 1586 | WARN_ON(!irqs_disabled()); |
1587 | 1587 | ||
1588 | if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) | 1588 | if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) |
1589 | return 0; | 1589 | return 0; |
1590 | 1590 | ||
1591 | del_timer(&q->unplug_timer); | 1591 | del_timer(&q->unplug_timer); |
1592 | return 1; | 1592 | return 1; |
1593 | } | 1593 | } |
1594 | 1594 | ||
1595 | EXPORT_SYMBOL(blk_remove_plug); | 1595 | EXPORT_SYMBOL(blk_remove_plug); |
1596 | 1596 | ||
1597 | /* | 1597 | /* |
1598 | * remove the plug and let it rip.. | 1598 | * remove the plug and let it rip.. |
1599 | */ | 1599 | */ |
1600 | void __generic_unplug_device(struct request_queue *q) | 1600 | void __generic_unplug_device(struct request_queue *q) |
1601 | { | 1601 | { |
1602 | if (unlikely(blk_queue_stopped(q))) | 1602 | if (unlikely(blk_queue_stopped(q))) |
1603 | return; | 1603 | return; |
1604 | 1604 | ||
1605 | if (!blk_remove_plug(q)) | 1605 | if (!blk_remove_plug(q)) |
1606 | return; | 1606 | return; |
1607 | 1607 | ||
1608 | q->request_fn(q); | 1608 | q->request_fn(q); |
1609 | } | 1609 | } |
1610 | EXPORT_SYMBOL(__generic_unplug_device); | 1610 | EXPORT_SYMBOL(__generic_unplug_device); |
1611 | 1611 | ||
1612 | /** | 1612 | /** |
1613 | * generic_unplug_device - fire a request queue | 1613 | * generic_unplug_device - fire a request queue |
1614 | * @q: The &struct request_queue in question | 1614 | * @q: The &struct request_queue in question |
1615 | * | 1615 | * |
1616 | * Description: | 1616 | * Description: |
1617 | * Linux uses plugging to build bigger requests queues before letting | 1617 | * Linux uses plugging to build bigger requests queues before letting |
1618 | * the device have at them. If a queue is plugged, the I/O scheduler | 1618 | * the device have at them. If a queue is plugged, the I/O scheduler |
1619 | * is still adding and merging requests on the queue. Once the queue | 1619 | * is still adding and merging requests on the queue. Once the queue |
1620 | * gets unplugged, the request_fn defined for the queue is invoked and | 1620 | * gets unplugged, the request_fn defined for the queue is invoked and |
1621 | * transfers started. | 1621 | * transfers started. |
1622 | **/ | 1622 | **/ |
1623 | void generic_unplug_device(struct request_queue *q) | 1623 | void generic_unplug_device(struct request_queue *q) |
1624 | { | 1624 | { |
1625 | spin_lock_irq(q->queue_lock); | 1625 | spin_lock_irq(q->queue_lock); |
1626 | __generic_unplug_device(q); | 1626 | __generic_unplug_device(q); |
1627 | spin_unlock_irq(q->queue_lock); | 1627 | spin_unlock_irq(q->queue_lock); |
1628 | } | 1628 | } |
1629 | EXPORT_SYMBOL(generic_unplug_device); | 1629 | EXPORT_SYMBOL(generic_unplug_device); |
1630 | 1630 | ||
1631 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, | 1631 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, |
1632 | struct page *page) | 1632 | struct page *page) |
1633 | { | 1633 | { |
1634 | struct request_queue *q = bdi->unplug_io_data; | 1634 | struct request_queue *q = bdi->unplug_io_data; |
1635 | 1635 | ||
1636 | /* | 1636 | /* |
1637 | * devices don't necessarily have an ->unplug_fn defined | 1637 | * devices don't necessarily have an ->unplug_fn defined |
1638 | */ | 1638 | */ |
1639 | if (q->unplug_fn) { | 1639 | if (q->unplug_fn) { |
1640 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, | 1640 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, |
1641 | q->rq.count[READ] + q->rq.count[WRITE]); | 1641 | q->rq.count[READ] + q->rq.count[WRITE]); |
1642 | 1642 | ||
1643 | q->unplug_fn(q); | 1643 | q->unplug_fn(q); |
1644 | } | 1644 | } |
1645 | } | 1645 | } |
1646 | 1646 | ||
1647 | static void blk_unplug_work(struct work_struct *work) | 1647 | static void blk_unplug_work(struct work_struct *work) |
1648 | { | 1648 | { |
1649 | struct request_queue *q = | 1649 | struct request_queue *q = |
1650 | container_of(work, struct request_queue, unplug_work); | 1650 | container_of(work, struct request_queue, unplug_work); |
1651 | 1651 | ||
1652 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, | 1652 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, |
1653 | q->rq.count[READ] + q->rq.count[WRITE]); | 1653 | q->rq.count[READ] + q->rq.count[WRITE]); |
1654 | 1654 | ||
1655 | q->unplug_fn(q); | 1655 | q->unplug_fn(q); |
1656 | } | 1656 | } |
1657 | 1657 | ||
1658 | static void blk_unplug_timeout(unsigned long data) | 1658 | static void blk_unplug_timeout(unsigned long data) |
1659 | { | 1659 | { |
1660 | struct request_queue *q = (struct request_queue *)data; | 1660 | struct request_queue *q = (struct request_queue *)data; |
1661 | 1661 | ||
1662 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, | 1662 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, |
1663 | q->rq.count[READ] + q->rq.count[WRITE]); | 1663 | q->rq.count[READ] + q->rq.count[WRITE]); |
1664 | 1664 | ||
1665 | kblockd_schedule_work(&q->unplug_work); | 1665 | kblockd_schedule_work(&q->unplug_work); |
1666 | } | 1666 | } |
1667 | 1667 | ||
1668 | /** | 1668 | /** |
1669 | * blk_start_queue - restart a previously stopped queue | 1669 | * blk_start_queue - restart a previously stopped queue |
1670 | * @q: The &struct request_queue in question | 1670 | * @q: The &struct request_queue in question |
1671 | * | 1671 | * |
1672 | * Description: | 1672 | * Description: |
1673 | * blk_start_queue() will clear the stop flag on the queue, and call | 1673 | * blk_start_queue() will clear the stop flag on the queue, and call |
1674 | * the request_fn for the queue if it was in a stopped state when | 1674 | * the request_fn for the queue if it was in a stopped state when |
1675 | * entered. Also see blk_stop_queue(). Queue lock must be held. | 1675 | * entered. Also see blk_stop_queue(). Queue lock must be held. |
1676 | **/ | 1676 | **/ |
1677 | void blk_start_queue(struct request_queue *q) | 1677 | void blk_start_queue(struct request_queue *q) |
1678 | { | 1678 | { |
1679 | WARN_ON(!irqs_disabled()); | 1679 | WARN_ON(!irqs_disabled()); |
1680 | 1680 | ||
1681 | clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); | 1681 | clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); |
1682 | 1682 | ||
1683 | /* | 1683 | /* |
1684 | * one level of recursion is ok and is much faster than kicking | 1684 | * one level of recursion is ok and is much faster than kicking |
1685 | * the unplug handling | 1685 | * the unplug handling |
1686 | */ | 1686 | */ |
1687 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { | 1687 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { |
1688 | q->request_fn(q); | 1688 | q->request_fn(q); |
1689 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); | 1689 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); |
1690 | } else { | 1690 | } else { |
1691 | blk_plug_device(q); | 1691 | blk_plug_device(q); |
1692 | kblockd_schedule_work(&q->unplug_work); | 1692 | kblockd_schedule_work(&q->unplug_work); |
1693 | } | 1693 | } |
1694 | } | 1694 | } |
1695 | 1695 | ||
1696 | EXPORT_SYMBOL(blk_start_queue); | 1696 | EXPORT_SYMBOL(blk_start_queue); |
1697 | 1697 | ||
1698 | /** | 1698 | /** |
1699 | * blk_stop_queue - stop a queue | 1699 | * blk_stop_queue - stop a queue |
1700 | * @q: The &struct request_queue in question | 1700 | * @q: The &struct request_queue in question |
1701 | * | 1701 | * |
1702 | * Description: | 1702 | * Description: |
1703 | * The Linux block layer assumes that a block driver will consume all | 1703 | * The Linux block layer assumes that a block driver will consume all |
1704 | * entries on the request queue when the request_fn strategy is called. | 1704 | * entries on the request queue when the request_fn strategy is called. |
1705 | * Often this will not happen, because of hardware limitations (queue | 1705 | * Often this will not happen, because of hardware limitations (queue |
1706 | * depth settings). If a device driver gets a 'queue full' response, | 1706 | * depth settings). If a device driver gets a 'queue full' response, |
1707 | * or if it simply chooses not to queue more I/O at one point, it can | 1707 | * or if it simply chooses not to queue more I/O at one point, it can |
1708 | * call this function to prevent the request_fn from being called until | 1708 | * call this function to prevent the request_fn from being called until |
1709 | * the driver has signalled it's ready to go again. This happens by calling | 1709 | * the driver has signalled it's ready to go again. This happens by calling |
1710 | * blk_start_queue() to restart queue operations. Queue lock must be held. | 1710 | * blk_start_queue() to restart queue operations. Queue lock must be held. |
1711 | **/ | 1711 | **/ |
1712 | void blk_stop_queue(struct request_queue *q) | 1712 | void blk_stop_queue(struct request_queue *q) |
1713 | { | 1713 | { |
1714 | blk_remove_plug(q); | 1714 | blk_remove_plug(q); |
1715 | set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); | 1715 | set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); |
1716 | } | 1716 | } |
1717 | EXPORT_SYMBOL(blk_stop_queue); | 1717 | EXPORT_SYMBOL(blk_stop_queue); |
1718 | 1718 | ||
1719 | /** | 1719 | /** |
1720 | * blk_sync_queue - cancel any pending callbacks on a queue | 1720 | * blk_sync_queue - cancel any pending callbacks on a queue |
1721 | * @q: the queue | 1721 | * @q: the queue |
1722 | * | 1722 | * |
1723 | * Description: | 1723 | * Description: |
1724 | * The block layer may perform asynchronous callback activity | 1724 | * The block layer may perform asynchronous callback activity |
1725 | * on a queue, such as calling the unplug function after a timeout. | 1725 | * on a queue, such as calling the unplug function after a timeout. |
1726 | * A block device may call blk_sync_queue to ensure that any | 1726 | * A block device may call blk_sync_queue to ensure that any |
1727 | * such activity is cancelled, thus allowing it to release resources | 1727 | * such activity is cancelled, thus allowing it to release resources |
1728 | * that the callbacks might use. The caller must already have made sure | 1728 | * that the callbacks might use. The caller must already have made sure |
1729 | * that its ->make_request_fn will not re-add plugging prior to calling | 1729 | * that its ->make_request_fn will not re-add plugging prior to calling |
1730 | * this function. | 1730 | * this function. |
1731 | * | 1731 | * |
1732 | */ | 1732 | */ |
1733 | void blk_sync_queue(struct request_queue *q) | 1733 | void blk_sync_queue(struct request_queue *q) |
1734 | { | 1734 | { |
1735 | del_timer_sync(&q->unplug_timer); | 1735 | del_timer_sync(&q->unplug_timer); |
1736 | } | 1736 | } |
1737 | EXPORT_SYMBOL(blk_sync_queue); | 1737 | EXPORT_SYMBOL(blk_sync_queue); |
1738 | 1738 | ||
1739 | /** | 1739 | /** |
1740 | * blk_run_queue - run a single device queue | 1740 | * blk_run_queue - run a single device queue |
1741 | * @q: The queue to run | 1741 | * @q: The queue to run |
1742 | */ | 1742 | */ |
1743 | void blk_run_queue(struct request_queue *q) | 1743 | void blk_run_queue(struct request_queue *q) |
1744 | { | 1744 | { |
1745 | unsigned long flags; | 1745 | unsigned long flags; |
1746 | 1746 | ||
1747 | spin_lock_irqsave(q->queue_lock, flags); | 1747 | spin_lock_irqsave(q->queue_lock, flags); |
1748 | blk_remove_plug(q); | 1748 | blk_remove_plug(q); |
1749 | 1749 | ||
1750 | /* | 1750 | /* |
1751 | * Only recurse once to avoid overrunning the stack, let the unplug | 1751 | * Only recurse once to avoid overrunning the stack, let the unplug |
1752 | * handling reinvoke the handler shortly if we already got there. | 1752 | * handling reinvoke the handler shortly if we already got there. |
1753 | */ | 1753 | */ |
1754 | if (!elv_queue_empty(q)) { | 1754 | if (!elv_queue_empty(q)) { |
1755 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { | 1755 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { |
1756 | q->request_fn(q); | 1756 | q->request_fn(q); |
1757 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); | 1757 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); |
1758 | } else { | 1758 | } else { |
1759 | blk_plug_device(q); | 1759 | blk_plug_device(q); |
1760 | kblockd_schedule_work(&q->unplug_work); | 1760 | kblockd_schedule_work(&q->unplug_work); |
1761 | } | 1761 | } |
1762 | } | 1762 | } |
1763 | 1763 | ||
1764 | spin_unlock_irqrestore(q->queue_lock, flags); | 1764 | spin_unlock_irqrestore(q->queue_lock, flags); |
1765 | } | 1765 | } |
1766 | EXPORT_SYMBOL(blk_run_queue); | 1766 | EXPORT_SYMBOL(blk_run_queue); |
1767 | 1767 | ||
1768 | /** | 1768 | /** |
1769 | * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed | 1769 | * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed |
1770 | * @kobj: the kobj belonging of the request queue to be released | 1770 | * @kobj: the kobj belonging of the request queue to be released |
1771 | * | 1771 | * |
1772 | * Description: | 1772 | * Description: |
1773 | * blk_cleanup_queue is the pair to blk_init_queue() or | 1773 | * blk_cleanup_queue is the pair to blk_init_queue() or |
1774 | * blk_queue_make_request(). It should be called when a request queue is | 1774 | * blk_queue_make_request(). It should be called when a request queue is |
1775 | * being released; typically when a block device is being de-registered. | 1775 | * being released; typically when a block device is being de-registered. |
1776 | * Currently, its primary task it to free all the &struct request | 1776 | * Currently, its primary task it to free all the &struct request |
1777 | * structures that were allocated to the queue and the queue itself. | 1777 | * structures that were allocated to the queue and the queue itself. |
1778 | * | 1778 | * |
1779 | * Caveat: | 1779 | * Caveat: |
1780 | * Hopefully the low level driver will have finished any | 1780 | * Hopefully the low level driver will have finished any |
1781 | * outstanding requests first... | 1781 | * outstanding requests first... |
1782 | **/ | 1782 | **/ |
1783 | static void blk_release_queue(struct kobject *kobj) | 1783 | static void blk_release_queue(struct kobject *kobj) |
1784 | { | 1784 | { |
1785 | struct request_queue *q = | 1785 | struct request_queue *q = |
1786 | container_of(kobj, struct request_queue, kobj); | 1786 | container_of(kobj, struct request_queue, kobj); |
1787 | struct request_list *rl = &q->rq; | 1787 | struct request_list *rl = &q->rq; |
1788 | 1788 | ||
1789 | blk_sync_queue(q); | 1789 | blk_sync_queue(q); |
1790 | 1790 | ||
1791 | if (rl->rq_pool) | 1791 | if (rl->rq_pool) |
1792 | mempool_destroy(rl->rq_pool); | 1792 | mempool_destroy(rl->rq_pool); |
1793 | 1793 | ||
1794 | if (q->queue_tags) | 1794 | if (q->queue_tags) |
1795 | __blk_queue_free_tags(q); | 1795 | __blk_queue_free_tags(q); |
1796 | 1796 | ||
1797 | blk_trace_shutdown(q); | 1797 | blk_trace_shutdown(q); |
1798 | 1798 | ||
1799 | kmem_cache_free(requestq_cachep, q); | 1799 | kmem_cache_free(requestq_cachep, q); |
1800 | } | 1800 | } |
1801 | 1801 | ||
1802 | void blk_put_queue(struct request_queue *q) | 1802 | void blk_put_queue(struct request_queue *q) |
1803 | { | 1803 | { |
1804 | kobject_put(&q->kobj); | 1804 | kobject_put(&q->kobj); |
1805 | } | 1805 | } |
1806 | EXPORT_SYMBOL(blk_put_queue); | 1806 | EXPORT_SYMBOL(blk_put_queue); |
1807 | 1807 | ||
1808 | void blk_cleanup_queue(struct request_queue * q) | 1808 | void blk_cleanup_queue(struct request_queue * q) |
1809 | { | 1809 | { |
1810 | mutex_lock(&q->sysfs_lock); | 1810 | mutex_lock(&q->sysfs_lock); |
1811 | set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); | 1811 | set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); |
1812 | mutex_unlock(&q->sysfs_lock); | 1812 | mutex_unlock(&q->sysfs_lock); |
1813 | 1813 | ||
1814 | if (q->elevator) | 1814 | if (q->elevator) |
1815 | elevator_exit(q->elevator); | 1815 | elevator_exit(q->elevator); |
1816 | 1816 | ||
1817 | blk_put_queue(q); | 1817 | blk_put_queue(q); |
1818 | } | 1818 | } |
1819 | 1819 | ||
1820 | EXPORT_SYMBOL(blk_cleanup_queue); | 1820 | EXPORT_SYMBOL(blk_cleanup_queue); |
1821 | 1821 | ||
1822 | static int blk_init_free_list(struct request_queue *q) | 1822 | static int blk_init_free_list(struct request_queue *q) |
1823 | { | 1823 | { |
1824 | struct request_list *rl = &q->rq; | 1824 | struct request_list *rl = &q->rq; |
1825 | 1825 | ||
1826 | rl->count[READ] = rl->count[WRITE] = 0; | 1826 | rl->count[READ] = rl->count[WRITE] = 0; |
1827 | rl->starved[READ] = rl->starved[WRITE] = 0; | 1827 | rl->starved[READ] = rl->starved[WRITE] = 0; |
1828 | rl->elvpriv = 0; | 1828 | rl->elvpriv = 0; |
1829 | init_waitqueue_head(&rl->wait[READ]); | 1829 | init_waitqueue_head(&rl->wait[READ]); |
1830 | init_waitqueue_head(&rl->wait[WRITE]); | 1830 | init_waitqueue_head(&rl->wait[WRITE]); |
1831 | 1831 | ||
1832 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 1832 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
1833 | mempool_free_slab, request_cachep, q->node); | 1833 | mempool_free_slab, request_cachep, q->node); |
1834 | 1834 | ||
1835 | if (!rl->rq_pool) | 1835 | if (!rl->rq_pool) |
1836 | return -ENOMEM; | 1836 | return -ENOMEM; |
1837 | 1837 | ||
1838 | return 0; | 1838 | return 0; |
1839 | } | 1839 | } |
1840 | 1840 | ||
1841 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) | 1841 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) |
1842 | { | 1842 | { |
1843 | return blk_alloc_queue_node(gfp_mask, -1); | 1843 | return blk_alloc_queue_node(gfp_mask, -1); |
1844 | } | 1844 | } |
1845 | EXPORT_SYMBOL(blk_alloc_queue); | 1845 | EXPORT_SYMBOL(blk_alloc_queue); |
1846 | 1846 | ||
1847 | static struct kobj_type queue_ktype; | 1847 | static struct kobj_type queue_ktype; |
1848 | 1848 | ||
1849 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | 1849 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) |
1850 | { | 1850 | { |
1851 | struct request_queue *q; | 1851 | struct request_queue *q; |
1852 | 1852 | ||
1853 | q = kmem_cache_alloc_node(requestq_cachep, | 1853 | q = kmem_cache_alloc_node(requestq_cachep, |
1854 | gfp_mask | __GFP_ZERO, node_id); | 1854 | gfp_mask | __GFP_ZERO, node_id); |
1855 | if (!q) | 1855 | if (!q) |
1856 | return NULL; | 1856 | return NULL; |
1857 | 1857 | ||
1858 | init_timer(&q->unplug_timer); | 1858 | init_timer(&q->unplug_timer); |
1859 | 1859 | ||
1860 | kobject_set_name(&q->kobj, "%s", "queue"); | 1860 | kobject_set_name(&q->kobj, "%s", "queue"); |
1861 | q->kobj.ktype = &queue_ktype; | 1861 | q->kobj.ktype = &queue_ktype; |
1862 | kobject_init(&q->kobj); | 1862 | kobject_init(&q->kobj); |
1863 | 1863 | ||
1864 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; | 1864 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; |
1865 | q->backing_dev_info.unplug_io_data = q; | 1865 | q->backing_dev_info.unplug_io_data = q; |
1866 | 1866 | ||
1867 | mutex_init(&q->sysfs_lock); | 1867 | mutex_init(&q->sysfs_lock); |
1868 | 1868 | ||
1869 | return q; | 1869 | return q; |
1870 | } | 1870 | } |
1871 | EXPORT_SYMBOL(blk_alloc_queue_node); | 1871 | EXPORT_SYMBOL(blk_alloc_queue_node); |
1872 | 1872 | ||
1873 | /** | 1873 | /** |
1874 | * blk_init_queue - prepare a request queue for use with a block device | 1874 | * blk_init_queue - prepare a request queue for use with a block device |
1875 | * @rfn: The function to be called to process requests that have been | 1875 | * @rfn: The function to be called to process requests that have been |
1876 | * placed on the queue. | 1876 | * placed on the queue. |
1877 | * @lock: Request queue spin lock | 1877 | * @lock: Request queue spin lock |
1878 | * | 1878 | * |
1879 | * Description: | 1879 | * Description: |
1880 | * If a block device wishes to use the standard request handling procedures, | 1880 | * If a block device wishes to use the standard request handling procedures, |
1881 | * which sorts requests and coalesces adjacent requests, then it must | 1881 | * which sorts requests and coalesces adjacent requests, then it must |
1882 | * call blk_init_queue(). The function @rfn will be called when there | 1882 | * call blk_init_queue(). The function @rfn will be called when there |
1883 | * are requests on the queue that need to be processed. If the device | 1883 | * are requests on the queue that need to be processed. If the device |
1884 | * supports plugging, then @rfn may not be called immediately when requests | 1884 | * supports plugging, then @rfn may not be called immediately when requests |
1885 | * are available on the queue, but may be called at some time later instead. | 1885 | * are available on the queue, but may be called at some time later instead. |
1886 | * Plugged queues are generally unplugged when a buffer belonging to one | 1886 | * Plugged queues are generally unplugged when a buffer belonging to one |
1887 | * of the requests on the queue is needed, or due to memory pressure. | 1887 | * of the requests on the queue is needed, or due to memory pressure. |
1888 | * | 1888 | * |
1889 | * @rfn is not required, or even expected, to remove all requests off the | 1889 | * @rfn is not required, or even expected, to remove all requests off the |
1890 | * queue, but only as many as it can handle at a time. If it does leave | 1890 | * queue, but only as many as it can handle at a time. If it does leave |
1891 | * requests on the queue, it is responsible for arranging that the requests | 1891 | * requests on the queue, it is responsible for arranging that the requests |
1892 | * get dealt with eventually. | 1892 | * get dealt with eventually. |
1893 | * | 1893 | * |
1894 | * The queue spin lock must be held while manipulating the requests on the | 1894 | * The queue spin lock must be held while manipulating the requests on the |
1895 | * request queue; this lock will be taken also from interrupt context, so irq | 1895 | * request queue; this lock will be taken also from interrupt context, so irq |
1896 | * disabling is needed for it. | 1896 | * disabling is needed for it. |
1897 | * | 1897 | * |
1898 | * Function returns a pointer to the initialized request queue, or NULL if | 1898 | * Function returns a pointer to the initialized request queue, or NULL if |
1899 | * it didn't succeed. | 1899 | * it didn't succeed. |
1900 | * | 1900 | * |
1901 | * Note: | 1901 | * Note: |
1902 | * blk_init_queue() must be paired with a blk_cleanup_queue() call | 1902 | * blk_init_queue() must be paired with a blk_cleanup_queue() call |
1903 | * when the block device is deactivated (such as at module unload). | 1903 | * when the block device is deactivated (such as at module unload). |
1904 | **/ | 1904 | **/ |
1905 | 1905 | ||
1906 | struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) | 1906 | struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) |
1907 | { | 1907 | { |
1908 | return blk_init_queue_node(rfn, lock, -1); | 1908 | return blk_init_queue_node(rfn, lock, -1); |
1909 | } | 1909 | } |
1910 | EXPORT_SYMBOL(blk_init_queue); | 1910 | EXPORT_SYMBOL(blk_init_queue); |
1911 | 1911 | ||
1912 | struct request_queue * | 1912 | struct request_queue * |
1913 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | 1913 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) |
1914 | { | 1914 | { |
1915 | struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); | 1915 | struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); |
1916 | 1916 | ||
1917 | if (!q) | 1917 | if (!q) |
1918 | return NULL; | 1918 | return NULL; |
1919 | 1919 | ||
1920 | q->node = node_id; | 1920 | q->node = node_id; |
1921 | if (blk_init_free_list(q)) { | 1921 | if (blk_init_free_list(q)) { |
1922 | kmem_cache_free(requestq_cachep, q); | 1922 | kmem_cache_free(requestq_cachep, q); |
1923 | return NULL; | 1923 | return NULL; |
1924 | } | 1924 | } |
1925 | 1925 | ||
1926 | /* | 1926 | /* |
1927 | * if caller didn't supply a lock, they get per-queue locking with | 1927 | * if caller didn't supply a lock, they get per-queue locking with |
1928 | * our embedded lock | 1928 | * our embedded lock |
1929 | */ | 1929 | */ |
1930 | if (!lock) { | 1930 | if (!lock) { |
1931 | spin_lock_init(&q->__queue_lock); | 1931 | spin_lock_init(&q->__queue_lock); |
1932 | lock = &q->__queue_lock; | 1932 | lock = &q->__queue_lock; |
1933 | } | 1933 | } |
1934 | 1934 | ||
1935 | q->request_fn = rfn; | 1935 | q->request_fn = rfn; |
1936 | q->prep_rq_fn = NULL; | 1936 | q->prep_rq_fn = NULL; |
1937 | q->unplug_fn = generic_unplug_device; | 1937 | q->unplug_fn = generic_unplug_device; |
1938 | q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); | 1938 | q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); |
1939 | q->queue_lock = lock; | 1939 | q->queue_lock = lock; |
1940 | 1940 | ||
1941 | blk_queue_segment_boundary(q, 0xffffffff); | 1941 | blk_queue_segment_boundary(q, 0xffffffff); |
1942 | 1942 | ||
1943 | blk_queue_make_request(q, __make_request); | 1943 | blk_queue_make_request(q, __make_request); |
1944 | blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); | 1944 | blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); |
1945 | 1945 | ||
1946 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); | 1946 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); |
1947 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); | 1947 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); |
1948 | 1948 | ||
1949 | q->sg_reserved_size = INT_MAX; | 1949 | q->sg_reserved_size = INT_MAX; |
1950 | 1950 | ||
1951 | /* | 1951 | /* |
1952 | * all done | 1952 | * all done |
1953 | */ | 1953 | */ |
1954 | if (!elevator_init(q, NULL)) { | 1954 | if (!elevator_init(q, NULL)) { |
1955 | blk_queue_congestion_threshold(q); | 1955 | blk_queue_congestion_threshold(q); |
1956 | return q; | 1956 | return q; |
1957 | } | 1957 | } |
1958 | 1958 | ||
1959 | blk_put_queue(q); | 1959 | blk_put_queue(q); |
1960 | return NULL; | 1960 | return NULL; |
1961 | } | 1961 | } |
1962 | EXPORT_SYMBOL(blk_init_queue_node); | 1962 | EXPORT_SYMBOL(blk_init_queue_node); |
1963 | 1963 | ||
1964 | int blk_get_queue(struct request_queue *q) | 1964 | int blk_get_queue(struct request_queue *q) |
1965 | { | 1965 | { |
1966 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | 1966 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { |
1967 | kobject_get(&q->kobj); | 1967 | kobject_get(&q->kobj); |
1968 | return 0; | 1968 | return 0; |
1969 | } | 1969 | } |
1970 | 1970 | ||
1971 | return 1; | 1971 | return 1; |
1972 | } | 1972 | } |
1973 | 1973 | ||
1974 | EXPORT_SYMBOL(blk_get_queue); | 1974 | EXPORT_SYMBOL(blk_get_queue); |
1975 | 1975 | ||
1976 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 1976 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
1977 | { | 1977 | { |
1978 | if (rq->cmd_flags & REQ_ELVPRIV) | 1978 | if (rq->cmd_flags & REQ_ELVPRIV) |
1979 | elv_put_request(q, rq); | 1979 | elv_put_request(q, rq); |
1980 | mempool_free(rq, q->rq.rq_pool); | 1980 | mempool_free(rq, q->rq.rq_pool); |
1981 | } | 1981 | } |
1982 | 1982 | ||
1983 | static struct request * | 1983 | static struct request * |
1984 | blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) | 1984 | blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) |
1985 | { | 1985 | { |
1986 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 1986 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
1987 | 1987 | ||
1988 | if (!rq) | 1988 | if (!rq) |
1989 | return NULL; | 1989 | return NULL; |
1990 | 1990 | ||
1991 | /* | 1991 | /* |
1992 | * first three bits are identical in rq->cmd_flags and bio->bi_rw, | 1992 | * first three bits are identical in rq->cmd_flags and bio->bi_rw, |
1993 | * see bio.h and blkdev.h | 1993 | * see bio.h and blkdev.h |
1994 | */ | 1994 | */ |
1995 | rq->cmd_flags = rw | REQ_ALLOCED; | 1995 | rq->cmd_flags = rw | REQ_ALLOCED; |
1996 | 1996 | ||
1997 | if (priv) { | 1997 | if (priv) { |
1998 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { | 1998 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { |
1999 | mempool_free(rq, q->rq.rq_pool); | 1999 | mempool_free(rq, q->rq.rq_pool); |
2000 | return NULL; | 2000 | return NULL; |
2001 | } | 2001 | } |
2002 | rq->cmd_flags |= REQ_ELVPRIV; | 2002 | rq->cmd_flags |= REQ_ELVPRIV; |
2003 | } | 2003 | } |
2004 | 2004 | ||
2005 | return rq; | 2005 | return rq; |
2006 | } | 2006 | } |
2007 | 2007 | ||
2008 | /* | 2008 | /* |
2009 | * ioc_batching returns true if the ioc is a valid batching request and | 2009 | * ioc_batching returns true if the ioc is a valid batching request and |
2010 | * should be given priority access to a request. | 2010 | * should be given priority access to a request. |
2011 | */ | 2011 | */ |
2012 | static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) | 2012 | static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) |
2013 | { | 2013 | { |
2014 | if (!ioc) | 2014 | if (!ioc) |
2015 | return 0; | 2015 | return 0; |
2016 | 2016 | ||
2017 | /* | 2017 | /* |
2018 | * Make sure the process is able to allocate at least 1 request | 2018 | * Make sure the process is able to allocate at least 1 request |
2019 | * even if the batch times out, otherwise we could theoretically | 2019 | * even if the batch times out, otherwise we could theoretically |
2020 | * lose wakeups. | 2020 | * lose wakeups. |
2021 | */ | 2021 | */ |
2022 | return ioc->nr_batch_requests == q->nr_batching || | 2022 | return ioc->nr_batch_requests == q->nr_batching || |
2023 | (ioc->nr_batch_requests > 0 | 2023 | (ioc->nr_batch_requests > 0 |
2024 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); | 2024 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); |
2025 | } | 2025 | } |
2026 | 2026 | ||
2027 | /* | 2027 | /* |
2028 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This | 2028 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This |
2029 | * will cause the process to be a "batcher" on all queues in the system. This | 2029 | * will cause the process to be a "batcher" on all queues in the system. This |
2030 | * is the behaviour we want though - once it gets a wakeup it should be given | 2030 | * is the behaviour we want though - once it gets a wakeup it should be given |
2031 | * a nice run. | 2031 | * a nice run. |
2032 | */ | 2032 | */ |
2033 | static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) | 2033 | static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) |
2034 | { | 2034 | { |
2035 | if (!ioc || ioc_batching(q, ioc)) | 2035 | if (!ioc || ioc_batching(q, ioc)) |
2036 | return; | 2036 | return; |
2037 | 2037 | ||
2038 | ioc->nr_batch_requests = q->nr_batching; | 2038 | ioc->nr_batch_requests = q->nr_batching; |
2039 | ioc->last_waited = jiffies; | 2039 | ioc->last_waited = jiffies; |
2040 | } | 2040 | } |
2041 | 2041 | ||
2042 | static void __freed_request(struct request_queue *q, int rw) | 2042 | static void __freed_request(struct request_queue *q, int rw) |
2043 | { | 2043 | { |
2044 | struct request_list *rl = &q->rq; | 2044 | struct request_list *rl = &q->rq; |
2045 | 2045 | ||
2046 | if (rl->count[rw] < queue_congestion_off_threshold(q)) | 2046 | if (rl->count[rw] < queue_congestion_off_threshold(q)) |
2047 | blk_clear_queue_congested(q, rw); | 2047 | blk_clear_queue_congested(q, rw); |
2048 | 2048 | ||
2049 | if (rl->count[rw] + 1 <= q->nr_requests) { | 2049 | if (rl->count[rw] + 1 <= q->nr_requests) { |
2050 | if (waitqueue_active(&rl->wait[rw])) | 2050 | if (waitqueue_active(&rl->wait[rw])) |
2051 | wake_up(&rl->wait[rw]); | 2051 | wake_up(&rl->wait[rw]); |
2052 | 2052 | ||
2053 | blk_clear_queue_full(q, rw); | 2053 | blk_clear_queue_full(q, rw); |
2054 | } | 2054 | } |
2055 | } | 2055 | } |
2056 | 2056 | ||
2057 | /* | 2057 | /* |
2058 | * A request has just been released. Account for it, update the full and | 2058 | * A request has just been released. Account for it, update the full and |
2059 | * congestion status, wake up any waiters. Called under q->queue_lock. | 2059 | * congestion status, wake up any waiters. Called under q->queue_lock. |
2060 | */ | 2060 | */ |
2061 | static void freed_request(struct request_queue *q, int rw, int priv) | 2061 | static void freed_request(struct request_queue *q, int rw, int priv) |
2062 | { | 2062 | { |
2063 | struct request_list *rl = &q->rq; | 2063 | struct request_list *rl = &q->rq; |
2064 | 2064 | ||
2065 | rl->count[rw]--; | 2065 | rl->count[rw]--; |
2066 | if (priv) | 2066 | if (priv) |
2067 | rl->elvpriv--; | 2067 | rl->elvpriv--; |
2068 | 2068 | ||
2069 | __freed_request(q, rw); | 2069 | __freed_request(q, rw); |
2070 | 2070 | ||
2071 | if (unlikely(rl->starved[rw ^ 1])) | 2071 | if (unlikely(rl->starved[rw ^ 1])) |
2072 | __freed_request(q, rw ^ 1); | 2072 | __freed_request(q, rw ^ 1); |
2073 | } | 2073 | } |
2074 | 2074 | ||
2075 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) | 2075 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) |
2076 | /* | 2076 | /* |
2077 | * Get a free request, queue_lock must be held. | 2077 | * Get a free request, queue_lock must be held. |
2078 | * Returns NULL on failure, with queue_lock held. | 2078 | * Returns NULL on failure, with queue_lock held. |
2079 | * Returns !NULL on success, with queue_lock *not held*. | 2079 | * Returns !NULL on success, with queue_lock *not held*. |
2080 | */ | 2080 | */ |
2081 | static struct request *get_request(struct request_queue *q, int rw_flags, | 2081 | static struct request *get_request(struct request_queue *q, int rw_flags, |
2082 | struct bio *bio, gfp_t gfp_mask) | 2082 | struct bio *bio, gfp_t gfp_mask) |
2083 | { | 2083 | { |
2084 | struct request *rq = NULL; | 2084 | struct request *rq = NULL; |
2085 | struct request_list *rl = &q->rq; | 2085 | struct request_list *rl = &q->rq; |
2086 | struct io_context *ioc = NULL; | 2086 | struct io_context *ioc = NULL; |
2087 | const int rw = rw_flags & 0x01; | 2087 | const int rw = rw_flags & 0x01; |
2088 | int may_queue, priv; | 2088 | int may_queue, priv; |
2089 | 2089 | ||
2090 | may_queue = elv_may_queue(q, rw_flags); | 2090 | may_queue = elv_may_queue(q, rw_flags); |
2091 | if (may_queue == ELV_MQUEUE_NO) | 2091 | if (may_queue == ELV_MQUEUE_NO) |
2092 | goto rq_starved; | 2092 | goto rq_starved; |
2093 | 2093 | ||
2094 | if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { | 2094 | if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { |
2095 | if (rl->count[rw]+1 >= q->nr_requests) { | 2095 | if (rl->count[rw]+1 >= q->nr_requests) { |
2096 | ioc = current_io_context(GFP_ATOMIC, q->node); | 2096 | ioc = current_io_context(GFP_ATOMIC, q->node); |
2097 | /* | 2097 | /* |
2098 | * The queue will fill after this allocation, so set | 2098 | * The queue will fill after this allocation, so set |
2099 | * it as full, and mark this process as "batching". | 2099 | * it as full, and mark this process as "batching". |
2100 | * This process will be allowed to complete a batch of | 2100 | * This process will be allowed to complete a batch of |
2101 | * requests, others will be blocked. | 2101 | * requests, others will be blocked. |
2102 | */ | 2102 | */ |
2103 | if (!blk_queue_full(q, rw)) { | 2103 | if (!blk_queue_full(q, rw)) { |
2104 | ioc_set_batching(q, ioc); | 2104 | ioc_set_batching(q, ioc); |
2105 | blk_set_queue_full(q, rw); | 2105 | blk_set_queue_full(q, rw); |
2106 | } else { | 2106 | } else { |
2107 | if (may_queue != ELV_MQUEUE_MUST | 2107 | if (may_queue != ELV_MQUEUE_MUST |
2108 | && !ioc_batching(q, ioc)) { | 2108 | && !ioc_batching(q, ioc)) { |
2109 | /* | 2109 | /* |
2110 | * The queue is full and the allocating | 2110 | * The queue is full and the allocating |
2111 | * process is not a "batcher", and not | 2111 | * process is not a "batcher", and not |
2112 | * exempted by the IO scheduler | 2112 | * exempted by the IO scheduler |
2113 | */ | 2113 | */ |
2114 | goto out; | 2114 | goto out; |
2115 | } | 2115 | } |
2116 | } | 2116 | } |
2117 | } | 2117 | } |
2118 | blk_set_queue_congested(q, rw); | 2118 | blk_set_queue_congested(q, rw); |
2119 | } | 2119 | } |
2120 | 2120 | ||
2121 | /* | 2121 | /* |
2122 | * Only allow batching queuers to allocate up to 50% over the defined | 2122 | * Only allow batching queuers to allocate up to 50% over the defined |
2123 | * limit of requests, otherwise we could have thousands of requests | 2123 | * limit of requests, otherwise we could have thousands of requests |
2124 | * allocated with any setting of ->nr_requests | 2124 | * allocated with any setting of ->nr_requests |
2125 | */ | 2125 | */ |
2126 | if (rl->count[rw] >= (3 * q->nr_requests / 2)) | 2126 | if (rl->count[rw] >= (3 * q->nr_requests / 2)) |
2127 | goto out; | 2127 | goto out; |
2128 | 2128 | ||
2129 | rl->count[rw]++; | 2129 | rl->count[rw]++; |
2130 | rl->starved[rw] = 0; | 2130 | rl->starved[rw] = 0; |
2131 | 2131 | ||
2132 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 2132 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
2133 | if (priv) | 2133 | if (priv) |
2134 | rl->elvpriv++; | 2134 | rl->elvpriv++; |
2135 | 2135 | ||
2136 | spin_unlock_irq(q->queue_lock); | 2136 | spin_unlock_irq(q->queue_lock); |
2137 | 2137 | ||
2138 | rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); | 2138 | rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); |
2139 | if (unlikely(!rq)) { | 2139 | if (unlikely(!rq)) { |
2140 | /* | 2140 | /* |
2141 | * Allocation failed presumably due to memory. Undo anything | 2141 | * Allocation failed presumably due to memory. Undo anything |
2142 | * we might have messed up. | 2142 | * we might have messed up. |
2143 | * | 2143 | * |
2144 | * Allocating task should really be put onto the front of the | 2144 | * Allocating task should really be put onto the front of the |
2145 | * wait queue, but this is pretty rare. | 2145 | * wait queue, but this is pretty rare. |
2146 | */ | 2146 | */ |
2147 | spin_lock_irq(q->queue_lock); | 2147 | spin_lock_irq(q->queue_lock); |
2148 | freed_request(q, rw, priv); | 2148 | freed_request(q, rw, priv); |
2149 | 2149 | ||
2150 | /* | 2150 | /* |
2151 | * in the very unlikely event that allocation failed and no | 2151 | * in the very unlikely event that allocation failed and no |
2152 | * requests for this direction was pending, mark us starved | 2152 | * requests for this direction was pending, mark us starved |
2153 | * so that freeing of a request in the other direction will | 2153 | * so that freeing of a request in the other direction will |
2154 | * notice us. another possible fix would be to split the | 2154 | * notice us. another possible fix would be to split the |
2155 | * rq mempool into READ and WRITE | 2155 | * rq mempool into READ and WRITE |
2156 | */ | 2156 | */ |
2157 | rq_starved: | 2157 | rq_starved: |
2158 | if (unlikely(rl->count[rw] == 0)) | 2158 | if (unlikely(rl->count[rw] == 0)) |
2159 | rl->starved[rw] = 1; | 2159 | rl->starved[rw] = 1; |
2160 | 2160 | ||
2161 | goto out; | 2161 | goto out; |
2162 | } | 2162 | } |
2163 | 2163 | ||
2164 | /* | 2164 | /* |
2165 | * ioc may be NULL here, and ioc_batching will be false. That's | 2165 | * ioc may be NULL here, and ioc_batching will be false. That's |
2166 | * OK, if the queue is under the request limit then requests need | 2166 | * OK, if the queue is under the request limit then requests need |
2167 | * not count toward the nr_batch_requests limit. There will always | 2167 | * not count toward the nr_batch_requests limit. There will always |
2168 | * be some limit enforced by BLK_BATCH_TIME. | 2168 | * be some limit enforced by BLK_BATCH_TIME. |
2169 | */ | 2169 | */ |
2170 | if (ioc_batching(q, ioc)) | 2170 | if (ioc_batching(q, ioc)) |
2171 | ioc->nr_batch_requests--; | 2171 | ioc->nr_batch_requests--; |
2172 | 2172 | ||
2173 | rq_init(q, rq); | 2173 | rq_init(q, rq); |
2174 | 2174 | ||
2175 | blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); | 2175 | blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); |
2176 | out: | 2176 | out: |
2177 | return rq; | 2177 | return rq; |
2178 | } | 2178 | } |
2179 | 2179 | ||
2180 | /* | 2180 | /* |
2181 | * No available requests for this queue, unplug the device and wait for some | 2181 | * No available requests for this queue, unplug the device and wait for some |
2182 | * requests to become available. | 2182 | * requests to become available. |
2183 | * | 2183 | * |
2184 | * Called with q->queue_lock held, and returns with it unlocked. | 2184 | * Called with q->queue_lock held, and returns with it unlocked. |
2185 | */ | 2185 | */ |
2186 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, | 2186 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, |
2187 | struct bio *bio) | 2187 | struct bio *bio) |
2188 | { | 2188 | { |
2189 | const int rw = rw_flags & 0x01; | 2189 | const int rw = rw_flags & 0x01; |
2190 | struct request *rq; | 2190 | struct request *rq; |
2191 | 2191 | ||
2192 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 2192 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
2193 | while (!rq) { | 2193 | while (!rq) { |
2194 | DEFINE_WAIT(wait); | 2194 | DEFINE_WAIT(wait); |
2195 | struct request_list *rl = &q->rq; | 2195 | struct request_list *rl = &q->rq; |
2196 | 2196 | ||
2197 | prepare_to_wait_exclusive(&rl->wait[rw], &wait, | 2197 | prepare_to_wait_exclusive(&rl->wait[rw], &wait, |
2198 | TASK_UNINTERRUPTIBLE); | 2198 | TASK_UNINTERRUPTIBLE); |
2199 | 2199 | ||
2200 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 2200 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
2201 | 2201 | ||
2202 | if (!rq) { | 2202 | if (!rq) { |
2203 | struct io_context *ioc; | 2203 | struct io_context *ioc; |
2204 | 2204 | ||
2205 | blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); | 2205 | blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); |
2206 | 2206 | ||
2207 | __generic_unplug_device(q); | 2207 | __generic_unplug_device(q); |
2208 | spin_unlock_irq(q->queue_lock); | 2208 | spin_unlock_irq(q->queue_lock); |
2209 | io_schedule(); | 2209 | io_schedule(); |
2210 | 2210 | ||
2211 | /* | 2211 | /* |
2212 | * After sleeping, we become a "batching" process and | 2212 | * After sleeping, we become a "batching" process and |
2213 | * will be able to allocate at least one request, and | 2213 | * will be able to allocate at least one request, and |
2214 | * up to a big batch of them for a small period time. | 2214 | * up to a big batch of them for a small period time. |
2215 | * See ioc_batching, ioc_set_batching | 2215 | * See ioc_batching, ioc_set_batching |
2216 | */ | 2216 | */ |
2217 | ioc = current_io_context(GFP_NOIO, q->node); | 2217 | ioc = current_io_context(GFP_NOIO, q->node); |
2218 | ioc_set_batching(q, ioc); | 2218 | ioc_set_batching(q, ioc); |
2219 | 2219 | ||
2220 | spin_lock_irq(q->queue_lock); | 2220 | spin_lock_irq(q->queue_lock); |
2221 | } | 2221 | } |
2222 | finish_wait(&rl->wait[rw], &wait); | 2222 | finish_wait(&rl->wait[rw], &wait); |
2223 | } | 2223 | } |
2224 | 2224 | ||
2225 | return rq; | 2225 | return rq; |
2226 | } | 2226 | } |
2227 | 2227 | ||
2228 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 2228 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
2229 | { | 2229 | { |
2230 | struct request *rq; | 2230 | struct request *rq; |
2231 | 2231 | ||
2232 | BUG_ON(rw != READ && rw != WRITE); | 2232 | BUG_ON(rw != READ && rw != WRITE); |
2233 | 2233 | ||
2234 | spin_lock_irq(q->queue_lock); | 2234 | spin_lock_irq(q->queue_lock); |
2235 | if (gfp_mask & __GFP_WAIT) { | 2235 | if (gfp_mask & __GFP_WAIT) { |
2236 | rq = get_request_wait(q, rw, NULL); | 2236 | rq = get_request_wait(q, rw, NULL); |
2237 | } else { | 2237 | } else { |
2238 | rq = get_request(q, rw, NULL, gfp_mask); | 2238 | rq = get_request(q, rw, NULL, gfp_mask); |
2239 | if (!rq) | 2239 | if (!rq) |
2240 | spin_unlock_irq(q->queue_lock); | 2240 | spin_unlock_irq(q->queue_lock); |
2241 | } | 2241 | } |
2242 | /* q->queue_lock is unlocked at this point */ | 2242 | /* q->queue_lock is unlocked at this point */ |
2243 | 2243 | ||
2244 | return rq; | 2244 | return rq; |
2245 | } | 2245 | } |
2246 | EXPORT_SYMBOL(blk_get_request); | 2246 | EXPORT_SYMBOL(blk_get_request); |
2247 | 2247 | ||
2248 | /** | 2248 | /** |
2249 | * blk_start_queueing - initiate dispatch of requests to device | 2249 | * blk_start_queueing - initiate dispatch of requests to device |
2250 | * @q: request queue to kick into gear | 2250 | * @q: request queue to kick into gear |
2251 | * | 2251 | * |
2252 | * This is basically a helper to remove the need to know whether a queue | 2252 | * This is basically a helper to remove the need to know whether a queue |
2253 | * is plugged or not if someone just wants to initiate dispatch of requests | 2253 | * is plugged or not if someone just wants to initiate dispatch of requests |
2254 | * for this queue. | 2254 | * for this queue. |
2255 | * | 2255 | * |
2256 | * The queue lock must be held with interrupts disabled. | 2256 | * The queue lock must be held with interrupts disabled. |
2257 | */ | 2257 | */ |
2258 | void blk_start_queueing(struct request_queue *q) | 2258 | void blk_start_queueing(struct request_queue *q) |
2259 | { | 2259 | { |
2260 | if (!blk_queue_plugged(q)) | 2260 | if (!blk_queue_plugged(q)) |
2261 | q->request_fn(q); | 2261 | q->request_fn(q); |
2262 | else | 2262 | else |
2263 | __generic_unplug_device(q); | 2263 | __generic_unplug_device(q); |
2264 | } | 2264 | } |
2265 | EXPORT_SYMBOL(blk_start_queueing); | 2265 | EXPORT_SYMBOL(blk_start_queueing); |
2266 | 2266 | ||
2267 | /** | 2267 | /** |
2268 | * blk_requeue_request - put a request back on queue | 2268 | * blk_requeue_request - put a request back on queue |
2269 | * @q: request queue where request should be inserted | 2269 | * @q: request queue where request should be inserted |
2270 | * @rq: request to be inserted | 2270 | * @rq: request to be inserted |
2271 | * | 2271 | * |
2272 | * Description: | 2272 | * Description: |
2273 | * Drivers often keep queueing requests until the hardware cannot accept | 2273 | * Drivers often keep queueing requests until the hardware cannot accept |
2274 | * more, when that condition happens we need to put the request back | 2274 | * more, when that condition happens we need to put the request back |
2275 | * on the queue. Must be called with queue lock held. | 2275 | * on the queue. Must be called with queue lock held. |
2276 | */ | 2276 | */ |
2277 | void blk_requeue_request(struct request_queue *q, struct request *rq) | 2277 | void blk_requeue_request(struct request_queue *q, struct request *rq) |
2278 | { | 2278 | { |
2279 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); | 2279 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); |
2280 | 2280 | ||
2281 | if (blk_rq_tagged(rq)) | 2281 | if (blk_rq_tagged(rq)) |
2282 | blk_queue_end_tag(q, rq); | 2282 | blk_queue_end_tag(q, rq); |
2283 | 2283 | ||
2284 | elv_requeue_request(q, rq); | 2284 | elv_requeue_request(q, rq); |
2285 | } | 2285 | } |
2286 | 2286 | ||
2287 | EXPORT_SYMBOL(blk_requeue_request); | 2287 | EXPORT_SYMBOL(blk_requeue_request); |
2288 | 2288 | ||
2289 | /** | 2289 | /** |
2290 | * blk_insert_request - insert a special request in to a request queue | 2290 | * blk_insert_request - insert a special request in to a request queue |
2291 | * @q: request queue where request should be inserted | 2291 | * @q: request queue where request should be inserted |
2292 | * @rq: request to be inserted | 2292 | * @rq: request to be inserted |
2293 | * @at_head: insert request at head or tail of queue | 2293 | * @at_head: insert request at head or tail of queue |
2294 | * @data: private data | 2294 | * @data: private data |
2295 | * | 2295 | * |
2296 | * Description: | 2296 | * Description: |
2297 | * Many block devices need to execute commands asynchronously, so they don't | 2297 | * Many block devices need to execute commands asynchronously, so they don't |
2298 | * block the whole kernel from preemption during request execution. This is | 2298 | * block the whole kernel from preemption during request execution. This is |
2299 | * accomplished normally by inserting aritficial requests tagged as | 2299 | * accomplished normally by inserting aritficial requests tagged as |
2300 | * REQ_SPECIAL in to the corresponding request queue, and letting them be | 2300 | * REQ_SPECIAL in to the corresponding request queue, and letting them be |
2301 | * scheduled for actual execution by the request queue. | 2301 | * scheduled for actual execution by the request queue. |
2302 | * | 2302 | * |
2303 | * We have the option of inserting the head or the tail of the queue. | 2303 | * We have the option of inserting the head or the tail of the queue. |
2304 | * Typically we use the tail for new ioctls and so forth. We use the head | 2304 | * Typically we use the tail for new ioctls and so forth. We use the head |
2305 | * of the queue for things like a QUEUE_FULL message from a device, or a | 2305 | * of the queue for things like a QUEUE_FULL message from a device, or a |
2306 | * host that is unable to accept a particular command. | 2306 | * host that is unable to accept a particular command. |
2307 | */ | 2307 | */ |
2308 | void blk_insert_request(struct request_queue *q, struct request *rq, | 2308 | void blk_insert_request(struct request_queue *q, struct request *rq, |
2309 | int at_head, void *data) | 2309 | int at_head, void *data) |
2310 | { | 2310 | { |
2311 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 2311 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
2312 | unsigned long flags; | 2312 | unsigned long flags; |
2313 | 2313 | ||
2314 | /* | 2314 | /* |
2315 | * tell I/O scheduler that this isn't a regular read/write (ie it | 2315 | * tell I/O scheduler that this isn't a regular read/write (ie it |
2316 | * must not attempt merges on this) and that it acts as a soft | 2316 | * must not attempt merges on this) and that it acts as a soft |
2317 | * barrier | 2317 | * barrier |
2318 | */ | 2318 | */ |
2319 | rq->cmd_type = REQ_TYPE_SPECIAL; | 2319 | rq->cmd_type = REQ_TYPE_SPECIAL; |
2320 | rq->cmd_flags |= REQ_SOFTBARRIER; | 2320 | rq->cmd_flags |= REQ_SOFTBARRIER; |
2321 | 2321 | ||
2322 | rq->special = data; | 2322 | rq->special = data; |
2323 | 2323 | ||
2324 | spin_lock_irqsave(q->queue_lock, flags); | 2324 | spin_lock_irqsave(q->queue_lock, flags); |
2325 | 2325 | ||
2326 | /* | 2326 | /* |
2327 | * If command is tagged, release the tag | 2327 | * If command is tagged, release the tag |
2328 | */ | 2328 | */ |
2329 | if (blk_rq_tagged(rq)) | 2329 | if (blk_rq_tagged(rq)) |
2330 | blk_queue_end_tag(q, rq); | 2330 | blk_queue_end_tag(q, rq); |
2331 | 2331 | ||
2332 | drive_stat_acct(rq, rq->nr_sectors, 1); | 2332 | drive_stat_acct(rq, rq->nr_sectors, 1); |
2333 | __elv_add_request(q, rq, where, 0); | 2333 | __elv_add_request(q, rq, where, 0); |
2334 | blk_start_queueing(q); | 2334 | blk_start_queueing(q); |
2335 | spin_unlock_irqrestore(q->queue_lock, flags); | 2335 | spin_unlock_irqrestore(q->queue_lock, flags); |
2336 | } | 2336 | } |
2337 | 2337 | ||
2338 | EXPORT_SYMBOL(blk_insert_request); | 2338 | EXPORT_SYMBOL(blk_insert_request); |
2339 | 2339 | ||
2340 | static int __blk_rq_unmap_user(struct bio *bio) | 2340 | static int __blk_rq_unmap_user(struct bio *bio) |
2341 | { | 2341 | { |
2342 | int ret = 0; | 2342 | int ret = 0; |
2343 | 2343 | ||
2344 | if (bio) { | 2344 | if (bio) { |
2345 | if (bio_flagged(bio, BIO_USER_MAPPED)) | 2345 | if (bio_flagged(bio, BIO_USER_MAPPED)) |
2346 | bio_unmap_user(bio); | 2346 | bio_unmap_user(bio); |
2347 | else | 2347 | else |
2348 | ret = bio_uncopy_user(bio); | 2348 | ret = bio_uncopy_user(bio); |
2349 | } | 2349 | } |
2350 | 2350 | ||
2351 | return ret; | 2351 | return ret; |
2352 | } | 2352 | } |
2353 | 2353 | ||
2354 | int blk_rq_append_bio(struct request_queue *q, struct request *rq, | 2354 | int blk_rq_append_bio(struct request_queue *q, struct request *rq, |
2355 | struct bio *bio) | 2355 | struct bio *bio) |
2356 | { | 2356 | { |
2357 | if (!rq->bio) | 2357 | if (!rq->bio) |
2358 | blk_rq_bio_prep(q, rq, bio); | 2358 | blk_rq_bio_prep(q, rq, bio); |
2359 | else if (!ll_back_merge_fn(q, rq, bio)) | 2359 | else if (!ll_back_merge_fn(q, rq, bio)) |
2360 | return -EINVAL; | 2360 | return -EINVAL; |
2361 | else { | 2361 | else { |
2362 | rq->biotail->bi_next = bio; | 2362 | rq->biotail->bi_next = bio; |
2363 | rq->biotail = bio; | 2363 | rq->biotail = bio; |
2364 | 2364 | ||
2365 | rq->data_len += bio->bi_size; | 2365 | rq->data_len += bio->bi_size; |
2366 | } | 2366 | } |
2367 | return 0; | 2367 | return 0; |
2368 | } | 2368 | } |
2369 | EXPORT_SYMBOL(blk_rq_append_bio); | 2369 | EXPORT_SYMBOL(blk_rq_append_bio); |
2370 | 2370 | ||
2371 | static int __blk_rq_map_user(struct request_queue *q, struct request *rq, | 2371 | static int __blk_rq_map_user(struct request_queue *q, struct request *rq, |
2372 | void __user *ubuf, unsigned int len) | 2372 | void __user *ubuf, unsigned int len) |
2373 | { | 2373 | { |
2374 | unsigned long uaddr; | 2374 | unsigned long uaddr; |
2375 | struct bio *bio, *orig_bio; | 2375 | struct bio *bio, *orig_bio; |
2376 | int reading, ret; | 2376 | int reading, ret; |
2377 | 2377 | ||
2378 | reading = rq_data_dir(rq) == READ; | 2378 | reading = rq_data_dir(rq) == READ; |
2379 | 2379 | ||
2380 | /* | 2380 | /* |
2381 | * if alignment requirement is satisfied, map in user pages for | 2381 | * if alignment requirement is satisfied, map in user pages for |
2382 | * direct dma. else, set up kernel bounce buffers | 2382 | * direct dma. else, set up kernel bounce buffers |
2383 | */ | 2383 | */ |
2384 | uaddr = (unsigned long) ubuf; | 2384 | uaddr = (unsigned long) ubuf; |
2385 | if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) | 2385 | if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) |
2386 | bio = bio_map_user(q, NULL, uaddr, len, reading); | 2386 | bio = bio_map_user(q, NULL, uaddr, len, reading); |
2387 | else | 2387 | else |
2388 | bio = bio_copy_user(q, uaddr, len, reading); | 2388 | bio = bio_copy_user(q, uaddr, len, reading); |
2389 | 2389 | ||
2390 | if (IS_ERR(bio)) | 2390 | if (IS_ERR(bio)) |
2391 | return PTR_ERR(bio); | 2391 | return PTR_ERR(bio); |
2392 | 2392 | ||
2393 | orig_bio = bio; | 2393 | orig_bio = bio; |
2394 | blk_queue_bounce(q, &bio); | 2394 | blk_queue_bounce(q, &bio); |
2395 | 2395 | ||
2396 | /* | 2396 | /* |
2397 | * We link the bounce buffer in and could have to traverse it | 2397 | * We link the bounce buffer in and could have to traverse it |
2398 | * later so we have to get a ref to prevent it from being freed | 2398 | * later so we have to get a ref to prevent it from being freed |
2399 | */ | 2399 | */ |
2400 | bio_get(bio); | 2400 | bio_get(bio); |
2401 | 2401 | ||
2402 | ret = blk_rq_append_bio(q, rq, bio); | 2402 | ret = blk_rq_append_bio(q, rq, bio); |
2403 | if (!ret) | 2403 | if (!ret) |
2404 | return bio->bi_size; | 2404 | return bio->bi_size; |
2405 | 2405 | ||
2406 | /* if it was boucned we must call the end io function */ | 2406 | /* if it was boucned we must call the end io function */ |
2407 | bio_endio(bio, 0); | 2407 | bio_endio(bio, 0); |
2408 | __blk_rq_unmap_user(orig_bio); | 2408 | __blk_rq_unmap_user(orig_bio); |
2409 | bio_put(bio); | 2409 | bio_put(bio); |
2410 | return ret; | 2410 | return ret; |
2411 | } | 2411 | } |
2412 | 2412 | ||
2413 | /** | 2413 | /** |
2414 | * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage | 2414 | * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage |
2415 | * @q: request queue where request should be inserted | 2415 | * @q: request queue where request should be inserted |
2416 | * @rq: request structure to fill | 2416 | * @rq: request structure to fill |
2417 | * @ubuf: the user buffer | 2417 | * @ubuf: the user buffer |
2418 | * @len: length of user data | 2418 | * @len: length of user data |
2419 | * | 2419 | * |
2420 | * Description: | 2420 | * Description: |
2421 | * Data will be mapped directly for zero copy io, if possible. Otherwise | 2421 | * Data will be mapped directly for zero copy io, if possible. Otherwise |
2422 | * a kernel bounce buffer is used. | 2422 | * a kernel bounce buffer is used. |
2423 | * | 2423 | * |
2424 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | 2424 | * A matching blk_rq_unmap_user() must be issued at the end of io, while |
2425 | * still in process context. | 2425 | * still in process context. |
2426 | * | 2426 | * |
2427 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | 2427 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() |
2428 | * before being submitted to the device, as pages mapped may be out of | 2428 | * before being submitted to the device, as pages mapped may be out of |
2429 | * reach. It's the callers responsibility to make sure this happens. The | 2429 | * reach. It's the callers responsibility to make sure this happens. The |
2430 | * original bio must be passed back in to blk_rq_unmap_user() for proper | 2430 | * original bio must be passed back in to blk_rq_unmap_user() for proper |
2431 | * unmapping. | 2431 | * unmapping. |
2432 | */ | 2432 | */ |
2433 | int blk_rq_map_user(struct request_queue *q, struct request *rq, | 2433 | int blk_rq_map_user(struct request_queue *q, struct request *rq, |
2434 | void __user *ubuf, unsigned long len) | 2434 | void __user *ubuf, unsigned long len) |
2435 | { | 2435 | { |
2436 | unsigned long bytes_read = 0; | 2436 | unsigned long bytes_read = 0; |
2437 | struct bio *bio = NULL; | 2437 | struct bio *bio = NULL; |
2438 | int ret; | 2438 | int ret; |
2439 | 2439 | ||
2440 | if (len > (q->max_hw_sectors << 9)) | 2440 | if (len > (q->max_hw_sectors << 9)) |
2441 | return -EINVAL; | 2441 | return -EINVAL; |
2442 | if (!len || !ubuf) | 2442 | if (!len || !ubuf) |
2443 | return -EINVAL; | 2443 | return -EINVAL; |
2444 | 2444 | ||
2445 | while (bytes_read != len) { | 2445 | while (bytes_read != len) { |
2446 | unsigned long map_len, end, start; | 2446 | unsigned long map_len, end, start; |
2447 | 2447 | ||
2448 | map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); | 2448 | map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); |
2449 | end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) | 2449 | end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) |
2450 | >> PAGE_SHIFT; | 2450 | >> PAGE_SHIFT; |
2451 | start = (unsigned long)ubuf >> PAGE_SHIFT; | 2451 | start = (unsigned long)ubuf >> PAGE_SHIFT; |
2452 | 2452 | ||
2453 | /* | 2453 | /* |
2454 | * A bad offset could cause us to require BIO_MAX_PAGES + 1 | 2454 | * A bad offset could cause us to require BIO_MAX_PAGES + 1 |
2455 | * pages. If this happens we just lower the requested | 2455 | * pages. If this happens we just lower the requested |
2456 | * mapping len by a page so that we can fit | 2456 | * mapping len by a page so that we can fit |
2457 | */ | 2457 | */ |
2458 | if (end - start > BIO_MAX_PAGES) | 2458 | if (end - start > BIO_MAX_PAGES) |
2459 | map_len -= PAGE_SIZE; | 2459 | map_len -= PAGE_SIZE; |
2460 | 2460 | ||
2461 | ret = __blk_rq_map_user(q, rq, ubuf, map_len); | 2461 | ret = __blk_rq_map_user(q, rq, ubuf, map_len); |
2462 | if (ret < 0) | 2462 | if (ret < 0) |
2463 | goto unmap_rq; | 2463 | goto unmap_rq; |
2464 | if (!bio) | 2464 | if (!bio) |
2465 | bio = rq->bio; | 2465 | bio = rq->bio; |
2466 | bytes_read += ret; | 2466 | bytes_read += ret; |
2467 | ubuf += ret; | 2467 | ubuf += ret; |
2468 | } | 2468 | } |
2469 | 2469 | ||
2470 | rq->buffer = rq->data = NULL; | 2470 | rq->buffer = rq->data = NULL; |
2471 | return 0; | 2471 | return 0; |
2472 | unmap_rq: | 2472 | unmap_rq: |
2473 | blk_rq_unmap_user(bio); | 2473 | blk_rq_unmap_user(bio); |
2474 | return ret; | 2474 | return ret; |
2475 | } | 2475 | } |
2476 | 2476 | ||
2477 | EXPORT_SYMBOL(blk_rq_map_user); | 2477 | EXPORT_SYMBOL(blk_rq_map_user); |
2478 | 2478 | ||
2479 | /** | 2479 | /** |
2480 | * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage | 2480 | * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage |
2481 | * @q: request queue where request should be inserted | 2481 | * @q: request queue where request should be inserted |
2482 | * @rq: request to map data to | 2482 | * @rq: request to map data to |
2483 | * @iov: pointer to the iovec | 2483 | * @iov: pointer to the iovec |
2484 | * @iov_count: number of elements in the iovec | 2484 | * @iov_count: number of elements in the iovec |
2485 | * @len: I/O byte count | 2485 | * @len: I/O byte count |
2486 | * | 2486 | * |
2487 | * Description: | 2487 | * Description: |
2488 | * Data will be mapped directly for zero copy io, if possible. Otherwise | 2488 | * Data will be mapped directly for zero copy io, if possible. Otherwise |
2489 | * a kernel bounce buffer is used. | 2489 | * a kernel bounce buffer is used. |
2490 | * | 2490 | * |
2491 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | 2491 | * A matching blk_rq_unmap_user() must be issued at the end of io, while |
2492 | * still in process context. | 2492 | * still in process context. |
2493 | * | 2493 | * |
2494 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | 2494 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() |
2495 | * before being submitted to the device, as pages mapped may be out of | 2495 | * before being submitted to the device, as pages mapped may be out of |
2496 | * reach. It's the callers responsibility to make sure this happens. The | 2496 | * reach. It's the callers responsibility to make sure this happens. The |
2497 | * original bio must be passed back in to blk_rq_unmap_user() for proper | 2497 | * original bio must be passed back in to blk_rq_unmap_user() for proper |
2498 | * unmapping. | 2498 | * unmapping. |
2499 | */ | 2499 | */ |
2500 | int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | 2500 | int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, |
2501 | struct sg_iovec *iov, int iov_count, unsigned int len) | 2501 | struct sg_iovec *iov, int iov_count, unsigned int len) |
2502 | { | 2502 | { |
2503 | struct bio *bio; | 2503 | struct bio *bio; |
2504 | 2504 | ||
2505 | if (!iov || iov_count <= 0) | 2505 | if (!iov || iov_count <= 0) |
2506 | return -EINVAL; | 2506 | return -EINVAL; |
2507 | 2507 | ||
2508 | /* we don't allow misaligned data like bio_map_user() does. If the | 2508 | /* we don't allow misaligned data like bio_map_user() does. If the |
2509 | * user is using sg, they're expected to know the alignment constraints | 2509 | * user is using sg, they're expected to know the alignment constraints |
2510 | * and respect them accordingly */ | 2510 | * and respect them accordingly */ |
2511 | bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); | 2511 | bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); |
2512 | if (IS_ERR(bio)) | 2512 | if (IS_ERR(bio)) |
2513 | return PTR_ERR(bio); | 2513 | return PTR_ERR(bio); |
2514 | 2514 | ||
2515 | if (bio->bi_size != len) { | 2515 | if (bio->bi_size != len) { |
2516 | bio_endio(bio, 0); | 2516 | bio_endio(bio, 0); |
2517 | bio_unmap_user(bio); | 2517 | bio_unmap_user(bio); |
2518 | return -EINVAL; | 2518 | return -EINVAL; |
2519 | } | 2519 | } |
2520 | 2520 | ||
2521 | bio_get(bio); | 2521 | bio_get(bio); |
2522 | blk_rq_bio_prep(q, rq, bio); | 2522 | blk_rq_bio_prep(q, rq, bio); |
2523 | rq->buffer = rq->data = NULL; | 2523 | rq->buffer = rq->data = NULL; |
2524 | return 0; | 2524 | return 0; |
2525 | } | 2525 | } |
2526 | 2526 | ||
2527 | EXPORT_SYMBOL(blk_rq_map_user_iov); | 2527 | EXPORT_SYMBOL(blk_rq_map_user_iov); |
2528 | 2528 | ||
2529 | /** | 2529 | /** |
2530 | * blk_rq_unmap_user - unmap a request with user data | 2530 | * blk_rq_unmap_user - unmap a request with user data |
2531 | * @bio: start of bio list | 2531 | * @bio: start of bio list |
2532 | * | 2532 | * |
2533 | * Description: | 2533 | * Description: |
2534 | * Unmap a rq previously mapped by blk_rq_map_user(). The caller must | 2534 | * Unmap a rq previously mapped by blk_rq_map_user(). The caller must |
2535 | * supply the original rq->bio from the blk_rq_map_user() return, since | 2535 | * supply the original rq->bio from the blk_rq_map_user() return, since |
2536 | * the io completion may have changed rq->bio. | 2536 | * the io completion may have changed rq->bio. |
2537 | */ | 2537 | */ |
2538 | int blk_rq_unmap_user(struct bio *bio) | 2538 | int blk_rq_unmap_user(struct bio *bio) |
2539 | { | 2539 | { |
2540 | struct bio *mapped_bio; | 2540 | struct bio *mapped_bio; |
2541 | int ret = 0, ret2; | 2541 | int ret = 0, ret2; |
2542 | 2542 | ||
2543 | while (bio) { | 2543 | while (bio) { |
2544 | mapped_bio = bio; | 2544 | mapped_bio = bio; |
2545 | if (unlikely(bio_flagged(bio, BIO_BOUNCED))) | 2545 | if (unlikely(bio_flagged(bio, BIO_BOUNCED))) |
2546 | mapped_bio = bio->bi_private; | 2546 | mapped_bio = bio->bi_private; |
2547 | 2547 | ||
2548 | ret2 = __blk_rq_unmap_user(mapped_bio); | 2548 | ret2 = __blk_rq_unmap_user(mapped_bio); |
2549 | if (ret2 && !ret) | 2549 | if (ret2 && !ret) |
2550 | ret = ret2; | 2550 | ret = ret2; |
2551 | 2551 | ||
2552 | mapped_bio = bio; | 2552 | mapped_bio = bio; |
2553 | bio = bio->bi_next; | 2553 | bio = bio->bi_next; |
2554 | bio_put(mapped_bio); | 2554 | bio_put(mapped_bio); |
2555 | } | 2555 | } |
2556 | 2556 | ||
2557 | return ret; | 2557 | return ret; |
2558 | } | 2558 | } |
2559 | 2559 | ||
2560 | EXPORT_SYMBOL(blk_rq_unmap_user); | 2560 | EXPORT_SYMBOL(blk_rq_unmap_user); |
2561 | 2561 | ||
2562 | /** | 2562 | /** |
2563 | * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage | 2563 | * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage |
2564 | * @q: request queue where request should be inserted | 2564 | * @q: request queue where request should be inserted |
2565 | * @rq: request to fill | 2565 | * @rq: request to fill |
2566 | * @kbuf: the kernel buffer | 2566 | * @kbuf: the kernel buffer |
2567 | * @len: length of user data | 2567 | * @len: length of user data |
2568 | * @gfp_mask: memory allocation flags | 2568 | * @gfp_mask: memory allocation flags |
2569 | */ | 2569 | */ |
2570 | int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, | 2570 | int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, |
2571 | unsigned int len, gfp_t gfp_mask) | 2571 | unsigned int len, gfp_t gfp_mask) |
2572 | { | 2572 | { |
2573 | struct bio *bio; | 2573 | struct bio *bio; |
2574 | 2574 | ||
2575 | if (len > (q->max_hw_sectors << 9)) | 2575 | if (len > (q->max_hw_sectors << 9)) |
2576 | return -EINVAL; | 2576 | return -EINVAL; |
2577 | if (!len || !kbuf) | 2577 | if (!len || !kbuf) |
2578 | return -EINVAL; | 2578 | return -EINVAL; |
2579 | 2579 | ||
2580 | bio = bio_map_kern(q, kbuf, len, gfp_mask); | 2580 | bio = bio_map_kern(q, kbuf, len, gfp_mask); |
2581 | if (IS_ERR(bio)) | 2581 | if (IS_ERR(bio)) |
2582 | return PTR_ERR(bio); | 2582 | return PTR_ERR(bio); |
2583 | 2583 | ||
2584 | if (rq_data_dir(rq) == WRITE) | 2584 | if (rq_data_dir(rq) == WRITE) |
2585 | bio->bi_rw |= (1 << BIO_RW); | 2585 | bio->bi_rw |= (1 << BIO_RW); |
2586 | 2586 | ||
2587 | blk_rq_bio_prep(q, rq, bio); | 2587 | blk_rq_bio_prep(q, rq, bio); |
2588 | blk_queue_bounce(q, &rq->bio); | 2588 | blk_queue_bounce(q, &rq->bio); |
2589 | rq->buffer = rq->data = NULL; | 2589 | rq->buffer = rq->data = NULL; |
2590 | return 0; | 2590 | return 0; |
2591 | } | 2591 | } |
2592 | 2592 | ||
2593 | EXPORT_SYMBOL(blk_rq_map_kern); | 2593 | EXPORT_SYMBOL(blk_rq_map_kern); |
2594 | 2594 | ||
2595 | /** | 2595 | /** |
2596 | * blk_execute_rq_nowait - insert a request into queue for execution | 2596 | * blk_execute_rq_nowait - insert a request into queue for execution |
2597 | * @q: queue to insert the request in | 2597 | * @q: queue to insert the request in |
2598 | * @bd_disk: matching gendisk | 2598 | * @bd_disk: matching gendisk |
2599 | * @rq: request to insert | 2599 | * @rq: request to insert |
2600 | * @at_head: insert request at head or tail of queue | 2600 | * @at_head: insert request at head or tail of queue |
2601 | * @done: I/O completion handler | 2601 | * @done: I/O completion handler |
2602 | * | 2602 | * |
2603 | * Description: | 2603 | * Description: |
2604 | * Insert a fully prepared request at the back of the io scheduler queue | 2604 | * Insert a fully prepared request at the back of the io scheduler queue |
2605 | * for execution. Don't wait for completion. | 2605 | * for execution. Don't wait for completion. |
2606 | */ | 2606 | */ |
2607 | void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | 2607 | void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, |
2608 | struct request *rq, int at_head, | 2608 | struct request *rq, int at_head, |
2609 | rq_end_io_fn *done) | 2609 | rq_end_io_fn *done) |
2610 | { | 2610 | { |
2611 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 2611 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
2612 | 2612 | ||
2613 | rq->rq_disk = bd_disk; | 2613 | rq->rq_disk = bd_disk; |
2614 | rq->cmd_flags |= REQ_NOMERGE; | 2614 | rq->cmd_flags |= REQ_NOMERGE; |
2615 | rq->end_io = done; | 2615 | rq->end_io = done; |
2616 | WARN_ON(irqs_disabled()); | 2616 | WARN_ON(irqs_disabled()); |
2617 | spin_lock_irq(q->queue_lock); | 2617 | spin_lock_irq(q->queue_lock); |
2618 | __elv_add_request(q, rq, where, 1); | 2618 | __elv_add_request(q, rq, where, 1); |
2619 | __generic_unplug_device(q); | 2619 | __generic_unplug_device(q); |
2620 | spin_unlock_irq(q->queue_lock); | 2620 | spin_unlock_irq(q->queue_lock); |
2621 | } | 2621 | } |
2622 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); | 2622 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); |
2623 | 2623 | ||
2624 | /** | 2624 | /** |
2625 | * blk_execute_rq - insert a request into queue for execution | 2625 | * blk_execute_rq - insert a request into queue for execution |
2626 | * @q: queue to insert the request in | 2626 | * @q: queue to insert the request in |
2627 | * @bd_disk: matching gendisk | 2627 | * @bd_disk: matching gendisk |
2628 | * @rq: request to insert | 2628 | * @rq: request to insert |
2629 | * @at_head: insert request at head or tail of queue | 2629 | * @at_head: insert request at head or tail of queue |
2630 | * | 2630 | * |
2631 | * Description: | 2631 | * Description: |
2632 | * Insert a fully prepared request at the back of the io scheduler queue | 2632 | * Insert a fully prepared request at the back of the io scheduler queue |
2633 | * for execution and wait for completion. | 2633 | * for execution and wait for completion. |
2634 | */ | 2634 | */ |
2635 | int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, | 2635 | int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, |
2636 | struct request *rq, int at_head) | 2636 | struct request *rq, int at_head) |
2637 | { | 2637 | { |
2638 | DECLARE_COMPLETION_ONSTACK(wait); | 2638 | DECLARE_COMPLETION_ONSTACK(wait); |
2639 | char sense[SCSI_SENSE_BUFFERSIZE]; | 2639 | char sense[SCSI_SENSE_BUFFERSIZE]; |
2640 | int err = 0; | 2640 | int err = 0; |
2641 | 2641 | ||
2642 | /* | 2642 | /* |
2643 | * we need an extra reference to the request, so we can look at | 2643 | * we need an extra reference to the request, so we can look at |
2644 | * it after io completion | 2644 | * it after io completion |
2645 | */ | 2645 | */ |
2646 | rq->ref_count++; | 2646 | rq->ref_count++; |
2647 | 2647 | ||
2648 | if (!rq->sense) { | 2648 | if (!rq->sense) { |
2649 | memset(sense, 0, sizeof(sense)); | 2649 | memset(sense, 0, sizeof(sense)); |
2650 | rq->sense = sense; | 2650 | rq->sense = sense; |
2651 | rq->sense_len = 0; | 2651 | rq->sense_len = 0; |
2652 | } | 2652 | } |
2653 | 2653 | ||
2654 | rq->end_io_data = &wait; | 2654 | rq->end_io_data = &wait; |
2655 | blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); | 2655 | blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); |
2656 | wait_for_completion(&wait); | 2656 | wait_for_completion(&wait); |
2657 | 2657 | ||
2658 | if (rq->errors) | 2658 | if (rq->errors) |
2659 | err = -EIO; | 2659 | err = -EIO; |
2660 | 2660 | ||
2661 | return err; | 2661 | return err; |
2662 | } | 2662 | } |
2663 | 2663 | ||
2664 | EXPORT_SYMBOL(blk_execute_rq); | 2664 | EXPORT_SYMBOL(blk_execute_rq); |
2665 | 2665 | ||
2666 | /** | 2666 | /** |
2667 | * blkdev_issue_flush - queue a flush | 2667 | * blkdev_issue_flush - queue a flush |
2668 | * @bdev: blockdev to issue flush for | 2668 | * @bdev: blockdev to issue flush for |
2669 | * @error_sector: error sector | 2669 | * @error_sector: error sector |
2670 | * | 2670 | * |
2671 | * Description: | 2671 | * Description: |
2672 | * Issue a flush for the block device in question. Caller can supply | 2672 | * Issue a flush for the block device in question. Caller can supply |
2673 | * room for storing the error offset in case of a flush error, if they | 2673 | * room for storing the error offset in case of a flush error, if they |
2674 | * wish to. Caller must run wait_for_completion() on its own. | 2674 | * wish to. Caller must run wait_for_completion() on its own. |
2675 | */ | 2675 | */ |
2676 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | 2676 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) |
2677 | { | 2677 | { |
2678 | struct request_queue *q; | 2678 | struct request_queue *q; |
2679 | 2679 | ||
2680 | if (bdev->bd_disk == NULL) | 2680 | if (bdev->bd_disk == NULL) |
2681 | return -ENXIO; | 2681 | return -ENXIO; |
2682 | 2682 | ||
2683 | q = bdev_get_queue(bdev); | 2683 | q = bdev_get_queue(bdev); |
2684 | if (!q) | 2684 | if (!q) |
2685 | return -ENXIO; | 2685 | return -ENXIO; |
2686 | if (!q->issue_flush_fn) | 2686 | if (!q->issue_flush_fn) |
2687 | return -EOPNOTSUPP; | 2687 | return -EOPNOTSUPP; |
2688 | 2688 | ||
2689 | return q->issue_flush_fn(q, bdev->bd_disk, error_sector); | 2689 | return q->issue_flush_fn(q, bdev->bd_disk, error_sector); |
2690 | } | 2690 | } |
2691 | 2691 | ||
2692 | EXPORT_SYMBOL(blkdev_issue_flush); | 2692 | EXPORT_SYMBOL(blkdev_issue_flush); |
2693 | 2693 | ||
2694 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) | 2694 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) |
2695 | { | 2695 | { |
2696 | int rw = rq_data_dir(rq); | 2696 | int rw = rq_data_dir(rq); |
2697 | 2697 | ||
2698 | if (!blk_fs_request(rq) || !rq->rq_disk) | 2698 | if (!blk_fs_request(rq) || !rq->rq_disk) |
2699 | return; | 2699 | return; |
2700 | 2700 | ||
2701 | if (!new_io) { | 2701 | if (!new_io) { |
2702 | __disk_stat_inc(rq->rq_disk, merges[rw]); | 2702 | __disk_stat_inc(rq->rq_disk, merges[rw]); |
2703 | } else { | 2703 | } else { |
2704 | disk_round_stats(rq->rq_disk); | 2704 | disk_round_stats(rq->rq_disk); |
2705 | rq->rq_disk->in_flight++; | 2705 | rq->rq_disk->in_flight++; |
2706 | } | 2706 | } |
2707 | } | 2707 | } |
2708 | 2708 | ||
2709 | /* | 2709 | /* |
2710 | * add-request adds a request to the linked list. | 2710 | * add-request adds a request to the linked list. |
2711 | * queue lock is held and interrupts disabled, as we muck with the | 2711 | * queue lock is held and interrupts disabled, as we muck with the |
2712 | * request queue list. | 2712 | * request queue list. |
2713 | */ | 2713 | */ |
2714 | static inline void add_request(struct request_queue * q, struct request * req) | 2714 | static inline void add_request(struct request_queue * q, struct request * req) |
2715 | { | 2715 | { |
2716 | drive_stat_acct(req, req->nr_sectors, 1); | 2716 | drive_stat_acct(req, req->nr_sectors, 1); |
2717 | 2717 | ||
2718 | /* | 2718 | /* |
2719 | * elevator indicated where it wants this request to be | 2719 | * elevator indicated where it wants this request to be |
2720 | * inserted at elevator_merge time | 2720 | * inserted at elevator_merge time |
2721 | */ | 2721 | */ |
2722 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); | 2722 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); |
2723 | } | 2723 | } |
2724 | 2724 | ||
2725 | /* | 2725 | /* |
2726 | * disk_round_stats() - Round off the performance stats on a struct | 2726 | * disk_round_stats() - Round off the performance stats on a struct |
2727 | * disk_stats. | 2727 | * disk_stats. |
2728 | * | 2728 | * |
2729 | * The average IO queue length and utilisation statistics are maintained | 2729 | * The average IO queue length and utilisation statistics are maintained |
2730 | * by observing the current state of the queue length and the amount of | 2730 | * by observing the current state of the queue length and the amount of |
2731 | * time it has been in this state for. | 2731 | * time it has been in this state for. |
2732 | * | 2732 | * |
2733 | * Normally, that accounting is done on IO completion, but that can result | 2733 | * Normally, that accounting is done on IO completion, but that can result |
2734 | * in more than a second's worth of IO being accounted for within any one | 2734 | * in more than a second's worth of IO being accounted for within any one |
2735 | * second, leading to >100% utilisation. To deal with that, we call this | 2735 | * second, leading to >100% utilisation. To deal with that, we call this |
2736 | * function to do a round-off before returning the results when reading | 2736 | * function to do a round-off before returning the results when reading |
2737 | * /proc/diskstats. This accounts immediately for all queue usage up to | 2737 | * /proc/diskstats. This accounts immediately for all queue usage up to |
2738 | * the current jiffies and restarts the counters again. | 2738 | * the current jiffies and restarts the counters again. |
2739 | */ | 2739 | */ |
2740 | void disk_round_stats(struct gendisk *disk) | 2740 | void disk_round_stats(struct gendisk *disk) |
2741 | { | 2741 | { |
2742 | unsigned long now = jiffies; | 2742 | unsigned long now = jiffies; |
2743 | 2743 | ||
2744 | if (now == disk->stamp) | 2744 | if (now == disk->stamp) |
2745 | return; | 2745 | return; |
2746 | 2746 | ||
2747 | if (disk->in_flight) { | 2747 | if (disk->in_flight) { |
2748 | __disk_stat_add(disk, time_in_queue, | 2748 | __disk_stat_add(disk, time_in_queue, |
2749 | disk->in_flight * (now - disk->stamp)); | 2749 | disk->in_flight * (now - disk->stamp)); |
2750 | __disk_stat_add(disk, io_ticks, (now - disk->stamp)); | 2750 | __disk_stat_add(disk, io_ticks, (now - disk->stamp)); |
2751 | } | 2751 | } |
2752 | disk->stamp = now; | 2752 | disk->stamp = now; |
2753 | } | 2753 | } |
2754 | 2754 | ||
2755 | EXPORT_SYMBOL_GPL(disk_round_stats); | 2755 | EXPORT_SYMBOL_GPL(disk_round_stats); |
2756 | 2756 | ||
2757 | /* | 2757 | /* |
2758 | * queue lock must be held | 2758 | * queue lock must be held |
2759 | */ | 2759 | */ |
2760 | void __blk_put_request(struct request_queue *q, struct request *req) | 2760 | void __blk_put_request(struct request_queue *q, struct request *req) |
2761 | { | 2761 | { |
2762 | if (unlikely(!q)) | 2762 | if (unlikely(!q)) |
2763 | return; | 2763 | return; |
2764 | if (unlikely(--req->ref_count)) | 2764 | if (unlikely(--req->ref_count)) |
2765 | return; | 2765 | return; |
2766 | 2766 | ||
2767 | elv_completed_request(q, req); | 2767 | elv_completed_request(q, req); |
2768 | 2768 | ||
2769 | /* | 2769 | /* |
2770 | * Request may not have originated from ll_rw_blk. if not, | 2770 | * Request may not have originated from ll_rw_blk. if not, |
2771 | * it didn't come out of our reserved rq pools | 2771 | * it didn't come out of our reserved rq pools |
2772 | */ | 2772 | */ |
2773 | if (req->cmd_flags & REQ_ALLOCED) { | 2773 | if (req->cmd_flags & REQ_ALLOCED) { |
2774 | int rw = rq_data_dir(req); | 2774 | int rw = rq_data_dir(req); |
2775 | int priv = req->cmd_flags & REQ_ELVPRIV; | 2775 | int priv = req->cmd_flags & REQ_ELVPRIV; |
2776 | 2776 | ||
2777 | BUG_ON(!list_empty(&req->queuelist)); | 2777 | BUG_ON(!list_empty(&req->queuelist)); |
2778 | BUG_ON(!hlist_unhashed(&req->hash)); | 2778 | BUG_ON(!hlist_unhashed(&req->hash)); |
2779 | 2779 | ||
2780 | blk_free_request(q, req); | 2780 | blk_free_request(q, req); |
2781 | freed_request(q, rw, priv); | 2781 | freed_request(q, rw, priv); |
2782 | } | 2782 | } |
2783 | } | 2783 | } |
2784 | 2784 | ||
2785 | EXPORT_SYMBOL_GPL(__blk_put_request); | 2785 | EXPORT_SYMBOL_GPL(__blk_put_request); |
2786 | 2786 | ||
2787 | void blk_put_request(struct request *req) | 2787 | void blk_put_request(struct request *req) |
2788 | { | 2788 | { |
2789 | unsigned long flags; | 2789 | unsigned long flags; |
2790 | struct request_queue *q = req->q; | 2790 | struct request_queue *q = req->q; |
2791 | 2791 | ||
2792 | /* | 2792 | /* |
2793 | * Gee, IDE calls in w/ NULL q. Fix IDE and remove the | 2793 | * Gee, IDE calls in w/ NULL q. Fix IDE and remove the |
2794 | * following if (q) test. | 2794 | * following if (q) test. |
2795 | */ | 2795 | */ |
2796 | if (q) { | 2796 | if (q) { |
2797 | spin_lock_irqsave(q->queue_lock, flags); | 2797 | spin_lock_irqsave(q->queue_lock, flags); |
2798 | __blk_put_request(q, req); | 2798 | __blk_put_request(q, req); |
2799 | spin_unlock_irqrestore(q->queue_lock, flags); | 2799 | spin_unlock_irqrestore(q->queue_lock, flags); |
2800 | } | 2800 | } |
2801 | } | 2801 | } |
2802 | 2802 | ||
2803 | EXPORT_SYMBOL(blk_put_request); | 2803 | EXPORT_SYMBOL(blk_put_request); |
2804 | 2804 | ||
2805 | /** | 2805 | /** |
2806 | * blk_end_sync_rq - executes a completion event on a request | 2806 | * blk_end_sync_rq - executes a completion event on a request |
2807 | * @rq: request to complete | 2807 | * @rq: request to complete |
2808 | * @error: end io status of the request | 2808 | * @error: end io status of the request |
2809 | */ | 2809 | */ |
2810 | void blk_end_sync_rq(struct request *rq, int error) | 2810 | void blk_end_sync_rq(struct request *rq, int error) |
2811 | { | 2811 | { |
2812 | struct completion *waiting = rq->end_io_data; | 2812 | struct completion *waiting = rq->end_io_data; |
2813 | 2813 | ||
2814 | rq->end_io_data = NULL; | 2814 | rq->end_io_data = NULL; |
2815 | __blk_put_request(rq->q, rq); | 2815 | __blk_put_request(rq->q, rq); |
2816 | 2816 | ||
2817 | /* | 2817 | /* |
2818 | * complete last, if this is a stack request the process (and thus | 2818 | * complete last, if this is a stack request the process (and thus |
2819 | * the rq pointer) could be invalid right after this complete() | 2819 | * the rq pointer) could be invalid right after this complete() |
2820 | */ | 2820 | */ |
2821 | complete(waiting); | 2821 | complete(waiting); |
2822 | } | 2822 | } |
2823 | EXPORT_SYMBOL(blk_end_sync_rq); | 2823 | EXPORT_SYMBOL(blk_end_sync_rq); |
2824 | 2824 | ||
2825 | /* | 2825 | /* |
2826 | * Has to be called with the request spinlock acquired | 2826 | * Has to be called with the request spinlock acquired |
2827 | */ | 2827 | */ |
2828 | static int attempt_merge(struct request_queue *q, struct request *req, | 2828 | static int attempt_merge(struct request_queue *q, struct request *req, |
2829 | struct request *next) | 2829 | struct request *next) |
2830 | { | 2830 | { |
2831 | if (!rq_mergeable(req) || !rq_mergeable(next)) | 2831 | if (!rq_mergeable(req) || !rq_mergeable(next)) |
2832 | return 0; | 2832 | return 0; |
2833 | 2833 | ||
2834 | /* | 2834 | /* |
2835 | * not contiguous | 2835 | * not contiguous |
2836 | */ | 2836 | */ |
2837 | if (req->sector + req->nr_sectors != next->sector) | 2837 | if (req->sector + req->nr_sectors != next->sector) |
2838 | return 0; | 2838 | return 0; |
2839 | 2839 | ||
2840 | if (rq_data_dir(req) != rq_data_dir(next) | 2840 | if (rq_data_dir(req) != rq_data_dir(next) |
2841 | || req->rq_disk != next->rq_disk | 2841 | || req->rq_disk != next->rq_disk |
2842 | || next->special) | 2842 | || next->special) |
2843 | return 0; | 2843 | return 0; |
2844 | 2844 | ||
2845 | /* | 2845 | /* |
2846 | * If we are allowed to merge, then append bio list | 2846 | * If we are allowed to merge, then append bio list |
2847 | * from next to rq and release next. merge_requests_fn | 2847 | * from next to rq and release next. merge_requests_fn |
2848 | * will have updated segment counts, update sector | 2848 | * will have updated segment counts, update sector |
2849 | * counts here. | 2849 | * counts here. |
2850 | */ | 2850 | */ |
2851 | if (!ll_merge_requests_fn(q, req, next)) | 2851 | if (!ll_merge_requests_fn(q, req, next)) |
2852 | return 0; | 2852 | return 0; |
2853 | 2853 | ||
2854 | /* | 2854 | /* |
2855 | * At this point we have either done a back merge | 2855 | * At this point we have either done a back merge |
2856 | * or front merge. We need the smaller start_time of | 2856 | * or front merge. We need the smaller start_time of |
2857 | * the merged requests to be the current request | 2857 | * the merged requests to be the current request |
2858 | * for accounting purposes. | 2858 | * for accounting purposes. |
2859 | */ | 2859 | */ |
2860 | if (time_after(req->start_time, next->start_time)) | 2860 | if (time_after(req->start_time, next->start_time)) |
2861 | req->start_time = next->start_time; | 2861 | req->start_time = next->start_time; |
2862 | 2862 | ||
2863 | req->biotail->bi_next = next->bio; | 2863 | req->biotail->bi_next = next->bio; |
2864 | req->biotail = next->biotail; | 2864 | req->biotail = next->biotail; |
2865 | 2865 | ||
2866 | req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; | 2866 | req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; |
2867 | 2867 | ||
2868 | elv_merge_requests(q, req, next); | 2868 | elv_merge_requests(q, req, next); |
2869 | 2869 | ||
2870 | if (req->rq_disk) { | 2870 | if (req->rq_disk) { |
2871 | disk_round_stats(req->rq_disk); | 2871 | disk_round_stats(req->rq_disk); |
2872 | req->rq_disk->in_flight--; | 2872 | req->rq_disk->in_flight--; |
2873 | } | 2873 | } |
2874 | 2874 | ||
2875 | req->ioprio = ioprio_best(req->ioprio, next->ioprio); | 2875 | req->ioprio = ioprio_best(req->ioprio, next->ioprio); |
2876 | 2876 | ||
2877 | __blk_put_request(q, next); | 2877 | __blk_put_request(q, next); |
2878 | return 1; | 2878 | return 1; |
2879 | } | 2879 | } |
2880 | 2880 | ||
2881 | static inline int attempt_back_merge(struct request_queue *q, | 2881 | static inline int attempt_back_merge(struct request_queue *q, |
2882 | struct request *rq) | 2882 | struct request *rq) |
2883 | { | 2883 | { |
2884 | struct request *next = elv_latter_request(q, rq); | 2884 | struct request *next = elv_latter_request(q, rq); |
2885 | 2885 | ||
2886 | if (next) | 2886 | if (next) |
2887 | return attempt_merge(q, rq, next); | 2887 | return attempt_merge(q, rq, next); |
2888 | 2888 | ||
2889 | return 0; | 2889 | return 0; |
2890 | } | 2890 | } |
2891 | 2891 | ||
2892 | static inline int attempt_front_merge(struct request_queue *q, | 2892 | static inline int attempt_front_merge(struct request_queue *q, |
2893 | struct request *rq) | 2893 | struct request *rq) |
2894 | { | 2894 | { |
2895 | struct request *prev = elv_former_request(q, rq); | 2895 | struct request *prev = elv_former_request(q, rq); |
2896 | 2896 | ||
2897 | if (prev) | 2897 | if (prev) |
2898 | return attempt_merge(q, prev, rq); | 2898 | return attempt_merge(q, prev, rq); |
2899 | 2899 | ||
2900 | return 0; | 2900 | return 0; |
2901 | } | 2901 | } |
2902 | 2902 | ||
2903 | static void init_request_from_bio(struct request *req, struct bio *bio) | 2903 | static void init_request_from_bio(struct request *req, struct bio *bio) |
2904 | { | 2904 | { |
2905 | req->cmd_type = REQ_TYPE_FS; | 2905 | req->cmd_type = REQ_TYPE_FS; |
2906 | 2906 | ||
2907 | /* | 2907 | /* |
2908 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) | 2908 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) |
2909 | */ | 2909 | */ |
2910 | if (bio_rw_ahead(bio) || bio_failfast(bio)) | 2910 | if (bio_rw_ahead(bio) || bio_failfast(bio)) |
2911 | req->cmd_flags |= REQ_FAILFAST; | 2911 | req->cmd_flags |= REQ_FAILFAST; |
2912 | 2912 | ||
2913 | /* | 2913 | /* |
2914 | * REQ_BARRIER implies no merging, but lets make it explicit | 2914 | * REQ_BARRIER implies no merging, but lets make it explicit |
2915 | */ | 2915 | */ |
2916 | if (unlikely(bio_barrier(bio))) | 2916 | if (unlikely(bio_barrier(bio))) |
2917 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | 2917 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); |
2918 | 2918 | ||
2919 | if (bio_sync(bio)) | 2919 | if (bio_sync(bio)) |
2920 | req->cmd_flags |= REQ_RW_SYNC; | 2920 | req->cmd_flags |= REQ_RW_SYNC; |
2921 | if (bio_rw_meta(bio)) | 2921 | if (bio_rw_meta(bio)) |
2922 | req->cmd_flags |= REQ_RW_META; | 2922 | req->cmd_flags |= REQ_RW_META; |
2923 | 2923 | ||
2924 | req->errors = 0; | 2924 | req->errors = 0; |
2925 | req->hard_sector = req->sector = bio->bi_sector; | 2925 | req->hard_sector = req->sector = bio->bi_sector; |
2926 | req->ioprio = bio_prio(bio); | 2926 | req->ioprio = bio_prio(bio); |
2927 | req->start_time = jiffies; | 2927 | req->start_time = jiffies; |
2928 | blk_rq_bio_prep(req->q, req, bio); | 2928 | blk_rq_bio_prep(req->q, req, bio); |
2929 | } | 2929 | } |
2930 | 2930 | ||
2931 | static int __make_request(struct request_queue *q, struct bio *bio) | 2931 | static int __make_request(struct request_queue *q, struct bio *bio) |
2932 | { | 2932 | { |
2933 | struct request *req; | 2933 | struct request *req; |
2934 | int el_ret, nr_sectors, barrier, err; | 2934 | int el_ret, nr_sectors, barrier, err; |
2935 | const unsigned short prio = bio_prio(bio); | 2935 | const unsigned short prio = bio_prio(bio); |
2936 | const int sync = bio_sync(bio); | 2936 | const int sync = bio_sync(bio); |
2937 | int rw_flags; | 2937 | int rw_flags; |
2938 | 2938 | ||
2939 | nr_sectors = bio_sectors(bio); | 2939 | nr_sectors = bio_sectors(bio); |
2940 | 2940 | ||
2941 | /* | 2941 | /* |
2942 | * low level driver can indicate that it wants pages above a | 2942 | * low level driver can indicate that it wants pages above a |
2943 | * certain limit bounced to low memory (ie for highmem, or even | 2943 | * certain limit bounced to low memory (ie for highmem, or even |
2944 | * ISA dma in theory) | 2944 | * ISA dma in theory) |
2945 | */ | 2945 | */ |
2946 | blk_queue_bounce(q, &bio); | 2946 | blk_queue_bounce(q, &bio); |
2947 | 2947 | ||
2948 | barrier = bio_barrier(bio); | 2948 | barrier = bio_barrier(bio); |
2949 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { | 2949 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { |
2950 | err = -EOPNOTSUPP; | 2950 | err = -EOPNOTSUPP; |
2951 | goto end_io; | 2951 | goto end_io; |
2952 | } | 2952 | } |
2953 | 2953 | ||
2954 | spin_lock_irq(q->queue_lock); | 2954 | spin_lock_irq(q->queue_lock); |
2955 | 2955 | ||
2956 | if (unlikely(barrier) || elv_queue_empty(q)) | 2956 | if (unlikely(barrier) || elv_queue_empty(q)) |
2957 | goto get_rq; | 2957 | goto get_rq; |
2958 | 2958 | ||
2959 | el_ret = elv_merge(q, &req, bio); | 2959 | el_ret = elv_merge(q, &req, bio); |
2960 | switch (el_ret) { | 2960 | switch (el_ret) { |
2961 | case ELEVATOR_BACK_MERGE: | 2961 | case ELEVATOR_BACK_MERGE: |
2962 | BUG_ON(!rq_mergeable(req)); | 2962 | BUG_ON(!rq_mergeable(req)); |
2963 | 2963 | ||
2964 | if (!ll_back_merge_fn(q, req, bio)) | 2964 | if (!ll_back_merge_fn(q, req, bio)) |
2965 | break; | 2965 | break; |
2966 | 2966 | ||
2967 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); | 2967 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); |
2968 | 2968 | ||
2969 | req->biotail->bi_next = bio; | 2969 | req->biotail->bi_next = bio; |
2970 | req->biotail = bio; | 2970 | req->biotail = bio; |
2971 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; | 2971 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; |
2972 | req->ioprio = ioprio_best(req->ioprio, prio); | 2972 | req->ioprio = ioprio_best(req->ioprio, prio); |
2973 | drive_stat_acct(req, nr_sectors, 0); | 2973 | drive_stat_acct(req, nr_sectors, 0); |
2974 | if (!attempt_back_merge(q, req)) | 2974 | if (!attempt_back_merge(q, req)) |
2975 | elv_merged_request(q, req, el_ret); | 2975 | elv_merged_request(q, req, el_ret); |
2976 | goto out; | 2976 | goto out; |
2977 | 2977 | ||
2978 | case ELEVATOR_FRONT_MERGE: | 2978 | case ELEVATOR_FRONT_MERGE: |
2979 | BUG_ON(!rq_mergeable(req)); | 2979 | BUG_ON(!rq_mergeable(req)); |
2980 | 2980 | ||
2981 | if (!ll_front_merge_fn(q, req, bio)) | 2981 | if (!ll_front_merge_fn(q, req, bio)) |
2982 | break; | 2982 | break; |
2983 | 2983 | ||
2984 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); | 2984 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); |
2985 | 2985 | ||
2986 | bio->bi_next = req->bio; | 2986 | bio->bi_next = req->bio; |
2987 | req->bio = bio; | 2987 | req->bio = bio; |
2988 | 2988 | ||
2989 | /* | 2989 | /* |
2990 | * may not be valid. if the low level driver said | 2990 | * may not be valid. if the low level driver said |
2991 | * it didn't need a bounce buffer then it better | 2991 | * it didn't need a bounce buffer then it better |
2992 | * not touch req->buffer either... | 2992 | * not touch req->buffer either... |
2993 | */ | 2993 | */ |
2994 | req->buffer = bio_data(bio); | 2994 | req->buffer = bio_data(bio); |
2995 | req->current_nr_sectors = bio_cur_sectors(bio); | 2995 | req->current_nr_sectors = bio_cur_sectors(bio); |
2996 | req->hard_cur_sectors = req->current_nr_sectors; | 2996 | req->hard_cur_sectors = req->current_nr_sectors; |
2997 | req->sector = req->hard_sector = bio->bi_sector; | 2997 | req->sector = req->hard_sector = bio->bi_sector; |
2998 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; | 2998 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; |
2999 | req->ioprio = ioprio_best(req->ioprio, prio); | 2999 | req->ioprio = ioprio_best(req->ioprio, prio); |
3000 | drive_stat_acct(req, nr_sectors, 0); | 3000 | drive_stat_acct(req, nr_sectors, 0); |
3001 | if (!attempt_front_merge(q, req)) | 3001 | if (!attempt_front_merge(q, req)) |
3002 | elv_merged_request(q, req, el_ret); | 3002 | elv_merged_request(q, req, el_ret); |
3003 | goto out; | 3003 | goto out; |
3004 | 3004 | ||
3005 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ | 3005 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ |
3006 | default: | 3006 | default: |
3007 | ; | 3007 | ; |
3008 | } | 3008 | } |
3009 | 3009 | ||
3010 | get_rq: | 3010 | get_rq: |
3011 | /* | 3011 | /* |
3012 | * This sync check and mask will be re-done in init_request_from_bio(), | 3012 | * This sync check and mask will be re-done in init_request_from_bio(), |
3013 | * but we need to set it earlier to expose the sync flag to the | 3013 | * but we need to set it earlier to expose the sync flag to the |
3014 | * rq allocator and io schedulers. | 3014 | * rq allocator and io schedulers. |
3015 | */ | 3015 | */ |
3016 | rw_flags = bio_data_dir(bio); | 3016 | rw_flags = bio_data_dir(bio); |
3017 | if (sync) | 3017 | if (sync) |
3018 | rw_flags |= REQ_RW_SYNC; | 3018 | rw_flags |= REQ_RW_SYNC; |
3019 | 3019 | ||
3020 | /* | 3020 | /* |
3021 | * Grab a free request. This is might sleep but can not fail. | 3021 | * Grab a free request. This is might sleep but can not fail. |
3022 | * Returns with the queue unlocked. | 3022 | * Returns with the queue unlocked. |
3023 | */ | 3023 | */ |
3024 | req = get_request_wait(q, rw_flags, bio); | 3024 | req = get_request_wait(q, rw_flags, bio); |
3025 | 3025 | ||
3026 | /* | 3026 | /* |
3027 | * After dropping the lock and possibly sleeping here, our request | 3027 | * After dropping the lock and possibly sleeping here, our request |
3028 | * may now be mergeable after it had proven unmergeable (above). | 3028 | * may now be mergeable after it had proven unmergeable (above). |
3029 | * We don't worry about that case for efficiency. It won't happen | 3029 | * We don't worry about that case for efficiency. It won't happen |
3030 | * often, and the elevators are able to handle it. | 3030 | * often, and the elevators are able to handle it. |
3031 | */ | 3031 | */ |
3032 | init_request_from_bio(req, bio); | 3032 | init_request_from_bio(req, bio); |
3033 | 3033 | ||
3034 | spin_lock_irq(q->queue_lock); | 3034 | spin_lock_irq(q->queue_lock); |
3035 | if (elv_queue_empty(q)) | 3035 | if (elv_queue_empty(q)) |
3036 | blk_plug_device(q); | 3036 | blk_plug_device(q); |
3037 | add_request(q, req); | 3037 | add_request(q, req); |
3038 | out: | 3038 | out: |
3039 | if (sync) | 3039 | if (sync) |
3040 | __generic_unplug_device(q); | 3040 | __generic_unplug_device(q); |
3041 | 3041 | ||
3042 | spin_unlock_irq(q->queue_lock); | 3042 | spin_unlock_irq(q->queue_lock); |
3043 | return 0; | 3043 | return 0; |
3044 | 3044 | ||
3045 | end_io: | 3045 | end_io: |
3046 | bio_endio(bio, err); | 3046 | bio_endio(bio, err); |
3047 | return 0; | 3047 | return 0; |
3048 | } | 3048 | } |
3049 | 3049 | ||
3050 | /* | 3050 | /* |
3051 | * If bio->bi_dev is a partition, remap the location | 3051 | * If bio->bi_dev is a partition, remap the location |
3052 | */ | 3052 | */ |
3053 | static inline void blk_partition_remap(struct bio *bio) | 3053 | static inline void blk_partition_remap(struct bio *bio) |
3054 | { | 3054 | { |
3055 | struct block_device *bdev = bio->bi_bdev; | 3055 | struct block_device *bdev = bio->bi_bdev; |
3056 | 3056 | ||
3057 | if (bdev != bdev->bd_contains) { | 3057 | if (bdev != bdev->bd_contains) { |
3058 | struct hd_struct *p = bdev->bd_part; | 3058 | struct hd_struct *p = bdev->bd_part; |
3059 | const int rw = bio_data_dir(bio); | 3059 | const int rw = bio_data_dir(bio); |
3060 | 3060 | ||
3061 | p->sectors[rw] += bio_sectors(bio); | 3061 | p->sectors[rw] += bio_sectors(bio); |
3062 | p->ios[rw]++; | 3062 | p->ios[rw]++; |
3063 | 3063 | ||
3064 | bio->bi_sector += p->start_sect; | 3064 | bio->bi_sector += p->start_sect; |
3065 | bio->bi_bdev = bdev->bd_contains; | 3065 | bio->bi_bdev = bdev->bd_contains; |
3066 | 3066 | ||
3067 | blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, | 3067 | blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, |
3068 | bdev->bd_dev, bio->bi_sector, | 3068 | bdev->bd_dev, bio->bi_sector, |
3069 | bio->bi_sector - p->start_sect); | 3069 | bio->bi_sector - p->start_sect); |
3070 | } | 3070 | } |
3071 | } | 3071 | } |
3072 | 3072 | ||
3073 | static void handle_bad_sector(struct bio *bio) | 3073 | static void handle_bad_sector(struct bio *bio) |
3074 | { | 3074 | { |
3075 | char b[BDEVNAME_SIZE]; | 3075 | char b[BDEVNAME_SIZE]; |
3076 | 3076 | ||
3077 | printk(KERN_INFO "attempt to access beyond end of device\n"); | 3077 | printk(KERN_INFO "attempt to access beyond end of device\n"); |
3078 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", | 3078 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", |
3079 | bdevname(bio->bi_bdev, b), | 3079 | bdevname(bio->bi_bdev, b), |
3080 | bio->bi_rw, | 3080 | bio->bi_rw, |
3081 | (unsigned long long)bio->bi_sector + bio_sectors(bio), | 3081 | (unsigned long long)bio->bi_sector + bio_sectors(bio), |
3082 | (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); | 3082 | (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); |
3083 | 3083 | ||
3084 | set_bit(BIO_EOF, &bio->bi_flags); | 3084 | set_bit(BIO_EOF, &bio->bi_flags); |
3085 | } | 3085 | } |
3086 | 3086 | ||
3087 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 3087 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
3088 | 3088 | ||
3089 | static DECLARE_FAULT_ATTR(fail_make_request); | 3089 | static DECLARE_FAULT_ATTR(fail_make_request); |
3090 | 3090 | ||
3091 | static int __init setup_fail_make_request(char *str) | 3091 | static int __init setup_fail_make_request(char *str) |
3092 | { | 3092 | { |
3093 | return setup_fault_attr(&fail_make_request, str); | 3093 | return setup_fault_attr(&fail_make_request, str); |
3094 | } | 3094 | } |
3095 | __setup("fail_make_request=", setup_fail_make_request); | 3095 | __setup("fail_make_request=", setup_fail_make_request); |
3096 | 3096 | ||
3097 | static int should_fail_request(struct bio *bio) | 3097 | static int should_fail_request(struct bio *bio) |
3098 | { | 3098 | { |
3099 | if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || | 3099 | if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || |
3100 | (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) | 3100 | (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) |
3101 | return should_fail(&fail_make_request, bio->bi_size); | 3101 | return should_fail(&fail_make_request, bio->bi_size); |
3102 | 3102 | ||
3103 | return 0; | 3103 | return 0; |
3104 | } | 3104 | } |
3105 | 3105 | ||
3106 | static int __init fail_make_request_debugfs(void) | 3106 | static int __init fail_make_request_debugfs(void) |
3107 | { | 3107 | { |
3108 | return init_fault_attr_dentries(&fail_make_request, | 3108 | return init_fault_attr_dentries(&fail_make_request, |
3109 | "fail_make_request"); | 3109 | "fail_make_request"); |
3110 | } | 3110 | } |
3111 | 3111 | ||
3112 | late_initcall(fail_make_request_debugfs); | 3112 | late_initcall(fail_make_request_debugfs); |
3113 | 3113 | ||
3114 | #else /* CONFIG_FAIL_MAKE_REQUEST */ | 3114 | #else /* CONFIG_FAIL_MAKE_REQUEST */ |
3115 | 3115 | ||
3116 | static inline int should_fail_request(struct bio *bio) | 3116 | static inline int should_fail_request(struct bio *bio) |
3117 | { | 3117 | { |
3118 | return 0; | 3118 | return 0; |
3119 | } | 3119 | } |
3120 | 3120 | ||
3121 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ | 3121 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ |
3122 | 3122 | ||
3123 | /** | 3123 | /** |
3124 | * generic_make_request: hand a buffer to its device driver for I/O | 3124 | * generic_make_request: hand a buffer to its device driver for I/O |
3125 | * @bio: The bio describing the location in memory and on the device. | 3125 | * @bio: The bio describing the location in memory and on the device. |
3126 | * | 3126 | * |
3127 | * generic_make_request() is used to make I/O requests of block | 3127 | * generic_make_request() is used to make I/O requests of block |
3128 | * devices. It is passed a &struct bio, which describes the I/O that needs | 3128 | * devices. It is passed a &struct bio, which describes the I/O that needs |
3129 | * to be done. | 3129 | * to be done. |
3130 | * | 3130 | * |
3131 | * generic_make_request() does not return any status. The | 3131 | * generic_make_request() does not return any status. The |
3132 | * success/failure status of the request, along with notification of | 3132 | * success/failure status of the request, along with notification of |
3133 | * completion, is delivered asynchronously through the bio->bi_end_io | 3133 | * completion, is delivered asynchronously through the bio->bi_end_io |
3134 | * function described (one day) else where. | 3134 | * function described (one day) else where. |
3135 | * | 3135 | * |
3136 | * The caller of generic_make_request must make sure that bi_io_vec | 3136 | * The caller of generic_make_request must make sure that bi_io_vec |
3137 | * are set to describe the memory buffer, and that bi_dev and bi_sector are | 3137 | * are set to describe the memory buffer, and that bi_dev and bi_sector are |
3138 | * set to describe the device address, and the | 3138 | * set to describe the device address, and the |
3139 | * bi_end_io and optionally bi_private are set to describe how | 3139 | * bi_end_io and optionally bi_private are set to describe how |
3140 | * completion notification should be signaled. | 3140 | * completion notification should be signaled. |
3141 | * | 3141 | * |
3142 | * generic_make_request and the drivers it calls may use bi_next if this | 3142 | * generic_make_request and the drivers it calls may use bi_next if this |
3143 | * bio happens to be merged with someone else, and may change bi_dev and | 3143 | * bio happens to be merged with someone else, and may change bi_dev and |
3144 | * bi_sector for remaps as it sees fit. So the values of these fields | 3144 | * bi_sector for remaps as it sees fit. So the values of these fields |
3145 | * should NOT be depended on after the call to generic_make_request. | 3145 | * should NOT be depended on after the call to generic_make_request. |
3146 | */ | 3146 | */ |
3147 | static inline void __generic_make_request(struct bio *bio) | 3147 | static inline void __generic_make_request(struct bio *bio) |
3148 | { | 3148 | { |
3149 | struct request_queue *q; | 3149 | struct request_queue *q; |
3150 | sector_t maxsector; | 3150 | sector_t maxsector; |
3151 | sector_t old_sector; | 3151 | sector_t old_sector; |
3152 | int ret, nr_sectors = bio_sectors(bio); | 3152 | int ret, nr_sectors = bio_sectors(bio); |
3153 | dev_t old_dev; | 3153 | dev_t old_dev; |
3154 | 3154 | ||
3155 | might_sleep(); | 3155 | might_sleep(); |
3156 | /* Test device or partition size, when known. */ | 3156 | /* Test device or partition size, when known. */ |
3157 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; | 3157 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; |
3158 | if (maxsector) { | 3158 | if (maxsector) { |
3159 | sector_t sector = bio->bi_sector; | 3159 | sector_t sector = bio->bi_sector; |
3160 | 3160 | ||
3161 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { | 3161 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { |
3162 | /* | 3162 | /* |
3163 | * This may well happen - the kernel calls bread() | 3163 | * This may well happen - the kernel calls bread() |
3164 | * without checking the size of the device, e.g., when | 3164 | * without checking the size of the device, e.g., when |
3165 | * mounting a device. | 3165 | * mounting a device. |
3166 | */ | 3166 | */ |
3167 | handle_bad_sector(bio); | 3167 | handle_bad_sector(bio); |
3168 | goto end_io; | 3168 | goto end_io; |
3169 | } | 3169 | } |
3170 | } | 3170 | } |
3171 | 3171 | ||
3172 | /* | 3172 | /* |
3173 | * Resolve the mapping until finished. (drivers are | 3173 | * Resolve the mapping until finished. (drivers are |
3174 | * still free to implement/resolve their own stacking | 3174 | * still free to implement/resolve their own stacking |
3175 | * by explicitly returning 0) | 3175 | * by explicitly returning 0) |
3176 | * | 3176 | * |
3177 | * NOTE: we don't repeat the blk_size check for each new device. | 3177 | * NOTE: we don't repeat the blk_size check for each new device. |
3178 | * Stacking drivers are expected to know what they are doing. | 3178 | * Stacking drivers are expected to know what they are doing. |
3179 | */ | 3179 | */ |
3180 | old_sector = -1; | 3180 | old_sector = -1; |
3181 | old_dev = 0; | 3181 | old_dev = 0; |
3182 | do { | 3182 | do { |
3183 | char b[BDEVNAME_SIZE]; | 3183 | char b[BDEVNAME_SIZE]; |
3184 | 3184 | ||
3185 | q = bdev_get_queue(bio->bi_bdev); | 3185 | q = bdev_get_queue(bio->bi_bdev); |
3186 | if (!q) { | 3186 | if (!q) { |
3187 | printk(KERN_ERR | 3187 | printk(KERN_ERR |
3188 | "generic_make_request: Trying to access " | 3188 | "generic_make_request: Trying to access " |
3189 | "nonexistent block-device %s (%Lu)\n", | 3189 | "nonexistent block-device %s (%Lu)\n", |
3190 | bdevname(bio->bi_bdev, b), | 3190 | bdevname(bio->bi_bdev, b), |
3191 | (long long) bio->bi_sector); | 3191 | (long long) bio->bi_sector); |
3192 | end_io: | 3192 | end_io: |
3193 | bio_endio(bio, -EIO); | 3193 | bio_endio(bio, -EIO); |
3194 | break; | 3194 | break; |
3195 | } | 3195 | } |
3196 | 3196 | ||
3197 | if (unlikely(nr_sectors > q->max_hw_sectors)) { | 3197 | if (unlikely(nr_sectors > q->max_hw_sectors)) { |
3198 | printk("bio too big device %s (%u > %u)\n", | 3198 | printk("bio too big device %s (%u > %u)\n", |
3199 | bdevname(bio->bi_bdev, b), | 3199 | bdevname(bio->bi_bdev, b), |
3200 | bio_sectors(bio), | 3200 | bio_sectors(bio), |
3201 | q->max_hw_sectors); | 3201 | q->max_hw_sectors); |
3202 | goto end_io; | 3202 | goto end_io; |
3203 | } | 3203 | } |
3204 | 3204 | ||
3205 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 3205 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) |
3206 | goto end_io; | 3206 | goto end_io; |
3207 | 3207 | ||
3208 | if (should_fail_request(bio)) | 3208 | if (should_fail_request(bio)) |
3209 | goto end_io; | 3209 | goto end_io; |
3210 | 3210 | ||
3211 | /* | 3211 | /* |
3212 | * If this device has partitions, remap block n | 3212 | * If this device has partitions, remap block n |
3213 | * of partition p to block n+start(p) of the disk. | 3213 | * of partition p to block n+start(p) of the disk. |
3214 | */ | 3214 | */ |
3215 | blk_partition_remap(bio); | 3215 | blk_partition_remap(bio); |
3216 | 3216 | ||
3217 | if (old_sector != -1) | 3217 | if (old_sector != -1) |
3218 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, | 3218 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, |
3219 | old_sector); | 3219 | old_sector); |
3220 | 3220 | ||
3221 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); | 3221 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); |
3222 | 3222 | ||
3223 | old_sector = bio->bi_sector; | 3223 | old_sector = bio->bi_sector; |
3224 | old_dev = bio->bi_bdev->bd_dev; | 3224 | old_dev = bio->bi_bdev->bd_dev; |
3225 | 3225 | ||
3226 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; | 3226 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; |
3227 | if (maxsector) { | 3227 | if (maxsector) { |
3228 | sector_t sector = bio->bi_sector; | 3228 | sector_t sector = bio->bi_sector; |
3229 | 3229 | ||
3230 | if (maxsector < nr_sectors || | 3230 | if (maxsector < nr_sectors || |
3231 | maxsector - nr_sectors < sector) { | 3231 | maxsector - nr_sectors < sector) { |
3232 | /* | 3232 | /* |
3233 | * This may well happen - partitions are not | 3233 | * This may well happen - partitions are not |
3234 | * checked to make sure they are within the size | 3234 | * checked to make sure they are within the size |
3235 | * of the whole device. | 3235 | * of the whole device. |
3236 | */ | 3236 | */ |
3237 | handle_bad_sector(bio); | 3237 | handle_bad_sector(bio); |
3238 | goto end_io; | 3238 | goto end_io; |
3239 | } | 3239 | } |
3240 | } | 3240 | } |
3241 | 3241 | ||
3242 | ret = q->make_request_fn(q, bio); | 3242 | ret = q->make_request_fn(q, bio); |
3243 | } while (ret); | 3243 | } while (ret); |
3244 | } | 3244 | } |
3245 | 3245 | ||
3246 | /* | 3246 | /* |
3247 | * We only want one ->make_request_fn to be active at a time, | 3247 | * We only want one ->make_request_fn to be active at a time, |
3248 | * else stack usage with stacked devices could be a problem. | 3248 | * else stack usage with stacked devices could be a problem. |
3249 | * So use current->bio_{list,tail} to keep a list of requests | 3249 | * So use current->bio_{list,tail} to keep a list of requests |
3250 | * submited by a make_request_fn function. | 3250 | * submited by a make_request_fn function. |
3251 | * current->bio_tail is also used as a flag to say if | 3251 | * current->bio_tail is also used as a flag to say if |
3252 | * generic_make_request is currently active in this task or not. | 3252 | * generic_make_request is currently active in this task or not. |
3253 | * If it is NULL, then no make_request is active. If it is non-NULL, | 3253 | * If it is NULL, then no make_request is active. If it is non-NULL, |
3254 | * then a make_request is active, and new requests should be added | 3254 | * then a make_request is active, and new requests should be added |
3255 | * at the tail | 3255 | * at the tail |
3256 | */ | 3256 | */ |
3257 | void generic_make_request(struct bio *bio) | 3257 | void generic_make_request(struct bio *bio) |
3258 | { | 3258 | { |
3259 | if (current->bio_tail) { | 3259 | if (current->bio_tail) { |
3260 | /* make_request is active */ | 3260 | /* make_request is active */ |
3261 | *(current->bio_tail) = bio; | 3261 | *(current->bio_tail) = bio; |
3262 | bio->bi_next = NULL; | 3262 | bio->bi_next = NULL; |
3263 | current->bio_tail = &bio->bi_next; | 3263 | current->bio_tail = &bio->bi_next; |
3264 | return; | 3264 | return; |
3265 | } | 3265 | } |
3266 | /* following loop may be a bit non-obvious, and so deserves some | 3266 | /* following loop may be a bit non-obvious, and so deserves some |
3267 | * explanation. | 3267 | * explanation. |
3268 | * Before entering the loop, bio->bi_next is NULL (as all callers | 3268 | * Before entering the loop, bio->bi_next is NULL (as all callers |
3269 | * ensure that) so we have a list with a single bio. | 3269 | * ensure that) so we have a list with a single bio. |
3270 | * We pretend that we have just taken it off a longer list, so | 3270 | * We pretend that we have just taken it off a longer list, so |
3271 | * we assign bio_list to the next (which is NULL) and bio_tail | 3271 | * we assign bio_list to the next (which is NULL) and bio_tail |
3272 | * to &bio_list, thus initialising the bio_list of new bios to be | 3272 | * to &bio_list, thus initialising the bio_list of new bios to be |
3273 | * added. __generic_make_request may indeed add some more bios | 3273 | * added. __generic_make_request may indeed add some more bios |
3274 | * through a recursive call to generic_make_request. If it | 3274 | * through a recursive call to generic_make_request. If it |
3275 | * did, we find a non-NULL value in bio_list and re-enter the loop | 3275 | * did, we find a non-NULL value in bio_list and re-enter the loop |
3276 | * from the top. In this case we really did just take the bio | 3276 | * from the top. In this case we really did just take the bio |
3277 | * of the top of the list (no pretending) and so fixup bio_list and | 3277 | * of the top of the list (no pretending) and so fixup bio_list and |
3278 | * bio_tail or bi_next, and call into __generic_make_request again. | 3278 | * bio_tail or bi_next, and call into __generic_make_request again. |
3279 | * | 3279 | * |
3280 | * The loop was structured like this to make only one call to | 3280 | * The loop was structured like this to make only one call to |
3281 | * __generic_make_request (which is important as it is large and | 3281 | * __generic_make_request (which is important as it is large and |
3282 | * inlined) and to keep the structure simple. | 3282 | * inlined) and to keep the structure simple. |
3283 | */ | 3283 | */ |
3284 | BUG_ON(bio->bi_next); | 3284 | BUG_ON(bio->bi_next); |
3285 | do { | 3285 | do { |
3286 | current->bio_list = bio->bi_next; | 3286 | current->bio_list = bio->bi_next; |
3287 | if (bio->bi_next == NULL) | 3287 | if (bio->bi_next == NULL) |
3288 | current->bio_tail = ¤t->bio_list; | 3288 | current->bio_tail = ¤t->bio_list; |
3289 | else | 3289 | else |
3290 | bio->bi_next = NULL; | 3290 | bio->bi_next = NULL; |
3291 | __generic_make_request(bio); | 3291 | __generic_make_request(bio); |
3292 | bio = current->bio_list; | 3292 | bio = current->bio_list; |
3293 | } while (bio); | 3293 | } while (bio); |
3294 | current->bio_tail = NULL; /* deactivate */ | 3294 | current->bio_tail = NULL; /* deactivate */ |
3295 | } | 3295 | } |
3296 | 3296 | ||
3297 | EXPORT_SYMBOL(generic_make_request); | 3297 | EXPORT_SYMBOL(generic_make_request); |
3298 | 3298 | ||
3299 | /** | 3299 | /** |
3300 | * submit_bio: submit a bio to the block device layer for I/O | 3300 | * submit_bio: submit a bio to the block device layer for I/O |
3301 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | 3301 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) |
3302 | * @bio: The &struct bio which describes the I/O | 3302 | * @bio: The &struct bio which describes the I/O |
3303 | * | 3303 | * |
3304 | * submit_bio() is very similar in purpose to generic_make_request(), and | 3304 | * submit_bio() is very similar in purpose to generic_make_request(), and |
3305 | * uses that function to do most of the work. Both are fairly rough | 3305 | * uses that function to do most of the work. Both are fairly rough |
3306 | * interfaces, @bio must be presetup and ready for I/O. | 3306 | * interfaces, @bio must be presetup and ready for I/O. |
3307 | * | 3307 | * |
3308 | */ | 3308 | */ |
3309 | void submit_bio(int rw, struct bio *bio) | 3309 | void submit_bio(int rw, struct bio *bio) |
3310 | { | 3310 | { |
3311 | int count = bio_sectors(bio); | 3311 | int count = bio_sectors(bio); |
3312 | 3312 | ||
3313 | BIO_BUG_ON(!bio->bi_size); | 3313 | BIO_BUG_ON(!bio->bi_size); |
3314 | BIO_BUG_ON(!bio->bi_io_vec); | 3314 | BIO_BUG_ON(!bio->bi_io_vec); |
3315 | bio->bi_rw |= rw; | 3315 | bio->bi_rw |= rw; |
3316 | if (rw & WRITE) { | 3316 | if (rw & WRITE) { |
3317 | count_vm_events(PGPGOUT, count); | 3317 | count_vm_events(PGPGOUT, count); |
3318 | } else { | 3318 | } else { |
3319 | task_io_account_read(bio->bi_size); | 3319 | task_io_account_read(bio->bi_size); |
3320 | count_vm_events(PGPGIN, count); | 3320 | count_vm_events(PGPGIN, count); |
3321 | } | 3321 | } |
3322 | 3322 | ||
3323 | if (unlikely(block_dump)) { | 3323 | if (unlikely(block_dump)) { |
3324 | char b[BDEVNAME_SIZE]; | 3324 | char b[BDEVNAME_SIZE]; |
3325 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | 3325 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", |
3326 | current->comm, current->pid, | 3326 | current->comm, current->pid, |
3327 | (rw & WRITE) ? "WRITE" : "READ", | 3327 | (rw & WRITE) ? "WRITE" : "READ", |
3328 | (unsigned long long)bio->bi_sector, | 3328 | (unsigned long long)bio->bi_sector, |
3329 | bdevname(bio->bi_bdev,b)); | 3329 | bdevname(bio->bi_bdev,b)); |
3330 | } | 3330 | } |
3331 | 3331 | ||
3332 | generic_make_request(bio); | 3332 | generic_make_request(bio); |
3333 | } | 3333 | } |
3334 | 3334 | ||
3335 | EXPORT_SYMBOL(submit_bio); | 3335 | EXPORT_SYMBOL(submit_bio); |
3336 | 3336 | ||
3337 | static void blk_recalc_rq_sectors(struct request *rq, int nsect) | 3337 | static void blk_recalc_rq_sectors(struct request *rq, int nsect) |
3338 | { | 3338 | { |
3339 | if (blk_fs_request(rq)) { | 3339 | if (blk_fs_request(rq)) { |
3340 | rq->hard_sector += nsect; | 3340 | rq->hard_sector += nsect; |
3341 | rq->hard_nr_sectors -= nsect; | 3341 | rq->hard_nr_sectors -= nsect; |
3342 | 3342 | ||
3343 | /* | 3343 | /* |
3344 | * Move the I/O submission pointers ahead if required. | 3344 | * Move the I/O submission pointers ahead if required. |
3345 | */ | 3345 | */ |
3346 | if ((rq->nr_sectors >= rq->hard_nr_sectors) && | 3346 | if ((rq->nr_sectors >= rq->hard_nr_sectors) && |
3347 | (rq->sector <= rq->hard_sector)) { | 3347 | (rq->sector <= rq->hard_sector)) { |
3348 | rq->sector = rq->hard_sector; | 3348 | rq->sector = rq->hard_sector; |
3349 | rq->nr_sectors = rq->hard_nr_sectors; | 3349 | rq->nr_sectors = rq->hard_nr_sectors; |
3350 | rq->hard_cur_sectors = bio_cur_sectors(rq->bio); | 3350 | rq->hard_cur_sectors = bio_cur_sectors(rq->bio); |
3351 | rq->current_nr_sectors = rq->hard_cur_sectors; | 3351 | rq->current_nr_sectors = rq->hard_cur_sectors; |
3352 | rq->buffer = bio_data(rq->bio); | 3352 | rq->buffer = bio_data(rq->bio); |
3353 | } | 3353 | } |
3354 | 3354 | ||
3355 | /* | 3355 | /* |
3356 | * if total number of sectors is less than the first segment | 3356 | * if total number of sectors is less than the first segment |
3357 | * size, something has gone terribly wrong | 3357 | * size, something has gone terribly wrong |
3358 | */ | 3358 | */ |
3359 | if (rq->nr_sectors < rq->current_nr_sectors) { | 3359 | if (rq->nr_sectors < rq->current_nr_sectors) { |
3360 | printk("blk: request botched\n"); | 3360 | printk("blk: request botched\n"); |
3361 | rq->nr_sectors = rq->current_nr_sectors; | 3361 | rq->nr_sectors = rq->current_nr_sectors; |
3362 | } | 3362 | } |
3363 | } | 3363 | } |
3364 | } | 3364 | } |
3365 | 3365 | ||
3366 | static int __end_that_request_first(struct request *req, int uptodate, | 3366 | static int __end_that_request_first(struct request *req, int uptodate, |
3367 | int nr_bytes) | 3367 | int nr_bytes) |
3368 | { | 3368 | { |
3369 | int total_bytes, bio_nbytes, error, next_idx = 0; | 3369 | int total_bytes, bio_nbytes, error, next_idx = 0; |
3370 | struct bio *bio; | 3370 | struct bio *bio; |
3371 | 3371 | ||
3372 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); | 3372 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); |
3373 | 3373 | ||
3374 | /* | 3374 | /* |
3375 | * extend uptodate bool to allow < 0 value to be direct io error | 3375 | * extend uptodate bool to allow < 0 value to be direct io error |
3376 | */ | 3376 | */ |
3377 | error = 0; | 3377 | error = 0; |
3378 | if (end_io_error(uptodate)) | 3378 | if (end_io_error(uptodate)) |
3379 | error = !uptodate ? -EIO : uptodate; | 3379 | error = !uptodate ? -EIO : uptodate; |
3380 | 3380 | ||
3381 | /* | 3381 | /* |
3382 | * for a REQ_BLOCK_PC request, we want to carry any eventual | 3382 | * for a REQ_BLOCK_PC request, we want to carry any eventual |
3383 | * sense key with us all the way through | 3383 | * sense key with us all the way through |
3384 | */ | 3384 | */ |
3385 | if (!blk_pc_request(req)) | 3385 | if (!blk_pc_request(req)) |
3386 | req->errors = 0; | 3386 | req->errors = 0; |
3387 | 3387 | ||
3388 | if (!uptodate) { | 3388 | if (!uptodate) { |
3389 | if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) | 3389 | if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) |
3390 | printk("end_request: I/O error, dev %s, sector %llu\n", | 3390 | printk("end_request: I/O error, dev %s, sector %llu\n", |
3391 | req->rq_disk ? req->rq_disk->disk_name : "?", | 3391 | req->rq_disk ? req->rq_disk->disk_name : "?", |
3392 | (unsigned long long)req->sector); | 3392 | (unsigned long long)req->sector); |
3393 | } | 3393 | } |
3394 | 3394 | ||
3395 | if (blk_fs_request(req) && req->rq_disk) { | 3395 | if (blk_fs_request(req) && req->rq_disk) { |
3396 | const int rw = rq_data_dir(req); | 3396 | const int rw = rq_data_dir(req); |
3397 | 3397 | ||
3398 | disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); | 3398 | disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); |
3399 | } | 3399 | } |
3400 | 3400 | ||
3401 | total_bytes = bio_nbytes = 0; | 3401 | total_bytes = bio_nbytes = 0; |
3402 | while ((bio = req->bio) != NULL) { | 3402 | while ((bio = req->bio) != NULL) { |
3403 | int nbytes; | 3403 | int nbytes; |
3404 | 3404 | ||
3405 | if (nr_bytes >= bio->bi_size) { | 3405 | if (nr_bytes >= bio->bi_size) { |
3406 | req->bio = bio->bi_next; | 3406 | req->bio = bio->bi_next; |
3407 | nbytes = bio->bi_size; | 3407 | nbytes = bio->bi_size; |
3408 | req_bio_endio(req, bio, nbytes, error); | 3408 | req_bio_endio(req, bio, nbytes, error); |
3409 | next_idx = 0; | 3409 | next_idx = 0; |
3410 | bio_nbytes = 0; | 3410 | bio_nbytes = 0; |
3411 | } else { | 3411 | } else { |
3412 | int idx = bio->bi_idx + next_idx; | 3412 | int idx = bio->bi_idx + next_idx; |
3413 | 3413 | ||
3414 | if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { | 3414 | if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { |
3415 | blk_dump_rq_flags(req, "__end_that"); | 3415 | blk_dump_rq_flags(req, "__end_that"); |
3416 | printk("%s: bio idx %d >= vcnt %d\n", | 3416 | printk("%s: bio idx %d >= vcnt %d\n", |
3417 | __FUNCTION__, | 3417 | __FUNCTION__, |
3418 | bio->bi_idx, bio->bi_vcnt); | 3418 | bio->bi_idx, bio->bi_vcnt); |
3419 | break; | 3419 | break; |
3420 | } | 3420 | } |
3421 | 3421 | ||
3422 | nbytes = bio_iovec_idx(bio, idx)->bv_len; | 3422 | nbytes = bio_iovec_idx(bio, idx)->bv_len; |
3423 | BIO_BUG_ON(nbytes > bio->bi_size); | 3423 | BIO_BUG_ON(nbytes > bio->bi_size); |
3424 | 3424 | ||
3425 | /* | 3425 | /* |
3426 | * not a complete bvec done | 3426 | * not a complete bvec done |
3427 | */ | 3427 | */ |
3428 | if (unlikely(nbytes > nr_bytes)) { | 3428 | if (unlikely(nbytes > nr_bytes)) { |
3429 | bio_nbytes += nr_bytes; | 3429 | bio_nbytes += nr_bytes; |
3430 | total_bytes += nr_bytes; | 3430 | total_bytes += nr_bytes; |
3431 | break; | 3431 | break; |
3432 | } | 3432 | } |
3433 | 3433 | ||
3434 | /* | 3434 | /* |
3435 | * advance to the next vector | 3435 | * advance to the next vector |
3436 | */ | 3436 | */ |
3437 | next_idx++; | 3437 | next_idx++; |
3438 | bio_nbytes += nbytes; | 3438 | bio_nbytes += nbytes; |
3439 | } | 3439 | } |
3440 | 3440 | ||
3441 | total_bytes += nbytes; | 3441 | total_bytes += nbytes; |
3442 | nr_bytes -= nbytes; | 3442 | nr_bytes -= nbytes; |
3443 | 3443 | ||
3444 | if ((bio = req->bio)) { | 3444 | if ((bio = req->bio)) { |
3445 | /* | 3445 | /* |
3446 | * end more in this run, or just return 'not-done' | 3446 | * end more in this run, or just return 'not-done' |
3447 | */ | 3447 | */ |
3448 | if (unlikely(nr_bytes <= 0)) | 3448 | if (unlikely(nr_bytes <= 0)) |
3449 | break; | 3449 | break; |
3450 | } | 3450 | } |
3451 | } | 3451 | } |
3452 | 3452 | ||
3453 | /* | 3453 | /* |
3454 | * completely done | 3454 | * completely done |
3455 | */ | 3455 | */ |
3456 | if (!req->bio) | 3456 | if (!req->bio) |
3457 | return 0; | 3457 | return 0; |
3458 | 3458 | ||
3459 | /* | 3459 | /* |
3460 | * if the request wasn't completed, update state | 3460 | * if the request wasn't completed, update state |
3461 | */ | 3461 | */ |
3462 | if (bio_nbytes) { | 3462 | if (bio_nbytes) { |
3463 | req_bio_endio(req, bio, bio_nbytes, error); | 3463 | req_bio_endio(req, bio, bio_nbytes, error); |
3464 | bio->bi_idx += next_idx; | 3464 | bio->bi_idx += next_idx; |
3465 | bio_iovec(bio)->bv_offset += nr_bytes; | 3465 | bio_iovec(bio)->bv_offset += nr_bytes; |
3466 | bio_iovec(bio)->bv_len -= nr_bytes; | 3466 | bio_iovec(bio)->bv_len -= nr_bytes; |
3467 | } | 3467 | } |
3468 | 3468 | ||
3469 | blk_recalc_rq_sectors(req, total_bytes >> 9); | 3469 | blk_recalc_rq_sectors(req, total_bytes >> 9); |
3470 | blk_recalc_rq_segments(req); | 3470 | blk_recalc_rq_segments(req); |
3471 | return 1; | 3471 | return 1; |
3472 | } | 3472 | } |
3473 | 3473 | ||
3474 | /** | 3474 | /** |
3475 | * end_that_request_first - end I/O on a request | 3475 | * end_that_request_first - end I/O on a request |
3476 | * @req: the request being processed | 3476 | * @req: the request being processed |
3477 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error | 3477 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error |
3478 | * @nr_sectors: number of sectors to end I/O on | 3478 | * @nr_sectors: number of sectors to end I/O on |
3479 | * | 3479 | * |
3480 | * Description: | 3480 | * Description: |
3481 | * Ends I/O on a number of sectors attached to @req, and sets it up | 3481 | * Ends I/O on a number of sectors attached to @req, and sets it up |
3482 | * for the next range of segments (if any) in the cluster. | 3482 | * for the next range of segments (if any) in the cluster. |
3483 | * | 3483 | * |
3484 | * Return: | 3484 | * Return: |
3485 | * 0 - we are done with this request, call end_that_request_last() | 3485 | * 0 - we are done with this request, call end_that_request_last() |
3486 | * 1 - still buffers pending for this request | 3486 | * 1 - still buffers pending for this request |
3487 | **/ | 3487 | **/ |
3488 | int end_that_request_first(struct request *req, int uptodate, int nr_sectors) | 3488 | int end_that_request_first(struct request *req, int uptodate, int nr_sectors) |
3489 | { | 3489 | { |
3490 | return __end_that_request_first(req, uptodate, nr_sectors << 9); | 3490 | return __end_that_request_first(req, uptodate, nr_sectors << 9); |
3491 | } | 3491 | } |
3492 | 3492 | ||
3493 | EXPORT_SYMBOL(end_that_request_first); | 3493 | EXPORT_SYMBOL(end_that_request_first); |
3494 | 3494 | ||
3495 | /** | 3495 | /** |
3496 | * end_that_request_chunk - end I/O on a request | 3496 | * end_that_request_chunk - end I/O on a request |
3497 | * @req: the request being processed | 3497 | * @req: the request being processed |
3498 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error | 3498 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error |
3499 | * @nr_bytes: number of bytes to complete | 3499 | * @nr_bytes: number of bytes to complete |
3500 | * | 3500 | * |
3501 | * Description: | 3501 | * Description: |
3502 | * Ends I/O on a number of bytes attached to @req, and sets it up | 3502 | * Ends I/O on a number of bytes attached to @req, and sets it up |
3503 | * for the next range of segments (if any). Like end_that_request_first(), | 3503 | * for the next range of segments (if any). Like end_that_request_first(), |
3504 | * but deals with bytes instead of sectors. | 3504 | * but deals with bytes instead of sectors. |
3505 | * | 3505 | * |
3506 | * Return: | 3506 | * Return: |
3507 | * 0 - we are done with this request, call end_that_request_last() | 3507 | * 0 - we are done with this request, call end_that_request_last() |
3508 | * 1 - still buffers pending for this request | 3508 | * 1 - still buffers pending for this request |
3509 | **/ | 3509 | **/ |
3510 | int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) | 3510 | int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) |
3511 | { | 3511 | { |
3512 | return __end_that_request_first(req, uptodate, nr_bytes); | 3512 | return __end_that_request_first(req, uptodate, nr_bytes); |
3513 | } | 3513 | } |
3514 | 3514 | ||
3515 | EXPORT_SYMBOL(end_that_request_chunk); | 3515 | EXPORT_SYMBOL(end_that_request_chunk); |
3516 | 3516 | ||
3517 | /* | 3517 | /* |
3518 | * splice the completion data to a local structure and hand off to | 3518 | * splice the completion data to a local structure and hand off to |
3519 | * process_completion_queue() to complete the requests | 3519 | * process_completion_queue() to complete the requests |
3520 | */ | 3520 | */ |
3521 | static void blk_done_softirq(struct softirq_action *h) | 3521 | static void blk_done_softirq(struct softirq_action *h) |
3522 | { | 3522 | { |
3523 | struct list_head *cpu_list, local_list; | 3523 | struct list_head *cpu_list, local_list; |
3524 | 3524 | ||
3525 | local_irq_disable(); | 3525 | local_irq_disable(); |
3526 | cpu_list = &__get_cpu_var(blk_cpu_done); | 3526 | cpu_list = &__get_cpu_var(blk_cpu_done); |
3527 | list_replace_init(cpu_list, &local_list); | 3527 | list_replace_init(cpu_list, &local_list); |
3528 | local_irq_enable(); | 3528 | local_irq_enable(); |
3529 | 3529 | ||
3530 | while (!list_empty(&local_list)) { | 3530 | while (!list_empty(&local_list)) { |
3531 | struct request *rq = list_entry(local_list.next, struct request, donelist); | 3531 | struct request *rq = list_entry(local_list.next, struct request, donelist); |
3532 | 3532 | ||
3533 | list_del_init(&rq->donelist); | 3533 | list_del_init(&rq->donelist); |
3534 | rq->q->softirq_done_fn(rq); | 3534 | rq->q->softirq_done_fn(rq); |
3535 | } | 3535 | } |
3536 | } | 3536 | } |
3537 | 3537 | ||
3538 | static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action, | 3538 | static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action, |
3539 | void *hcpu) | 3539 | void *hcpu) |
3540 | { | 3540 | { |
3541 | /* | 3541 | /* |
3542 | * If a CPU goes away, splice its entries to the current CPU | 3542 | * If a CPU goes away, splice its entries to the current CPU |
3543 | * and trigger a run of the softirq | 3543 | * and trigger a run of the softirq |
3544 | */ | 3544 | */ |
3545 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | 3545 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { |
3546 | int cpu = (unsigned long) hcpu; | 3546 | int cpu = (unsigned long) hcpu; |
3547 | 3547 | ||
3548 | local_irq_disable(); | 3548 | local_irq_disable(); |
3549 | list_splice_init(&per_cpu(blk_cpu_done, cpu), | 3549 | list_splice_init(&per_cpu(blk_cpu_done, cpu), |
3550 | &__get_cpu_var(blk_cpu_done)); | 3550 | &__get_cpu_var(blk_cpu_done)); |
3551 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | 3551 | raise_softirq_irqoff(BLOCK_SOFTIRQ); |
3552 | local_irq_enable(); | 3552 | local_irq_enable(); |
3553 | } | 3553 | } |
3554 | 3554 | ||
3555 | return NOTIFY_OK; | 3555 | return NOTIFY_OK; |
3556 | } | 3556 | } |
3557 | 3557 | ||
3558 | 3558 | ||
3559 | static struct notifier_block blk_cpu_notifier __cpuinitdata = { | 3559 | static struct notifier_block blk_cpu_notifier __cpuinitdata = { |
3560 | .notifier_call = blk_cpu_notify, | 3560 | .notifier_call = blk_cpu_notify, |
3561 | }; | 3561 | }; |
3562 | 3562 | ||
3563 | /** | 3563 | /** |
3564 | * blk_complete_request - end I/O on a request | 3564 | * blk_complete_request - end I/O on a request |
3565 | * @req: the request being processed | 3565 | * @req: the request being processed |
3566 | * | 3566 | * |
3567 | * Description: | 3567 | * Description: |
3568 | * Ends all I/O on a request. It does not handle partial completions, | 3568 | * Ends all I/O on a request. It does not handle partial completions, |
3569 | * unless the driver actually implements this in its completion callback | 3569 | * unless the driver actually implements this in its completion callback |
3570 | * through requeueing. The actual completion happens out-of-order, | 3570 | * through requeueing. The actual completion happens out-of-order, |
3571 | * through a softirq handler. The user must have registered a completion | 3571 | * through a softirq handler. The user must have registered a completion |
3572 | * callback through blk_queue_softirq_done(). | 3572 | * callback through blk_queue_softirq_done(). |
3573 | **/ | 3573 | **/ |
3574 | 3574 | ||
3575 | void blk_complete_request(struct request *req) | 3575 | void blk_complete_request(struct request *req) |
3576 | { | 3576 | { |
3577 | struct list_head *cpu_list; | 3577 | struct list_head *cpu_list; |
3578 | unsigned long flags; | 3578 | unsigned long flags; |
3579 | 3579 | ||
3580 | BUG_ON(!req->q->softirq_done_fn); | 3580 | BUG_ON(!req->q->softirq_done_fn); |
3581 | 3581 | ||
3582 | local_irq_save(flags); | 3582 | local_irq_save(flags); |
3583 | 3583 | ||
3584 | cpu_list = &__get_cpu_var(blk_cpu_done); | 3584 | cpu_list = &__get_cpu_var(blk_cpu_done); |
3585 | list_add_tail(&req->donelist, cpu_list); | 3585 | list_add_tail(&req->donelist, cpu_list); |
3586 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | 3586 | raise_softirq_irqoff(BLOCK_SOFTIRQ); |
3587 | 3587 | ||
3588 | local_irq_restore(flags); | 3588 | local_irq_restore(flags); |
3589 | } | 3589 | } |
3590 | 3590 | ||
3591 | EXPORT_SYMBOL(blk_complete_request); | 3591 | EXPORT_SYMBOL(blk_complete_request); |
3592 | 3592 | ||
3593 | /* | 3593 | /* |
3594 | * queue lock must be held | 3594 | * queue lock must be held |
3595 | */ | 3595 | */ |
3596 | void end_that_request_last(struct request *req, int uptodate) | 3596 | void end_that_request_last(struct request *req, int uptodate) |
3597 | { | 3597 | { |
3598 | struct gendisk *disk = req->rq_disk; | 3598 | struct gendisk *disk = req->rq_disk; |
3599 | int error; | 3599 | int error; |
3600 | 3600 | ||
3601 | /* | 3601 | /* |
3602 | * extend uptodate bool to allow < 0 value to be direct io error | 3602 | * extend uptodate bool to allow < 0 value to be direct io error |
3603 | */ | 3603 | */ |
3604 | error = 0; | 3604 | error = 0; |
3605 | if (end_io_error(uptodate)) | 3605 | if (end_io_error(uptodate)) |
3606 | error = !uptodate ? -EIO : uptodate; | 3606 | error = !uptodate ? -EIO : uptodate; |
3607 | 3607 | ||
3608 | if (unlikely(laptop_mode) && blk_fs_request(req)) | 3608 | if (unlikely(laptop_mode) && blk_fs_request(req)) |
3609 | laptop_io_completion(); | 3609 | laptop_io_completion(); |
3610 | 3610 | ||
3611 | /* | 3611 | /* |
3612 | * Account IO completion. bar_rq isn't accounted as a normal | 3612 | * Account IO completion. bar_rq isn't accounted as a normal |
3613 | * IO on queueing nor completion. Accounting the containing | 3613 | * IO on queueing nor completion. Accounting the containing |
3614 | * request is enough. | 3614 | * request is enough. |
3615 | */ | 3615 | */ |
3616 | if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { | 3616 | if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { |
3617 | unsigned long duration = jiffies - req->start_time; | 3617 | unsigned long duration = jiffies - req->start_time; |
3618 | const int rw = rq_data_dir(req); | 3618 | const int rw = rq_data_dir(req); |
3619 | 3619 | ||
3620 | __disk_stat_inc(disk, ios[rw]); | 3620 | __disk_stat_inc(disk, ios[rw]); |
3621 | __disk_stat_add(disk, ticks[rw], duration); | 3621 | __disk_stat_add(disk, ticks[rw], duration); |
3622 | disk_round_stats(disk); | 3622 | disk_round_stats(disk); |
3623 | disk->in_flight--; | 3623 | disk->in_flight--; |
3624 | } | 3624 | } |
3625 | if (req->end_io) | 3625 | if (req->end_io) |
3626 | req->end_io(req, error); | 3626 | req->end_io(req, error); |
3627 | else | 3627 | else |
3628 | __blk_put_request(req->q, req); | 3628 | __blk_put_request(req->q, req); |
3629 | } | 3629 | } |
3630 | 3630 | ||
3631 | EXPORT_SYMBOL(end_that_request_last); | 3631 | EXPORT_SYMBOL(end_that_request_last); |
3632 | 3632 | ||
3633 | void end_request(struct request *req, int uptodate) | 3633 | static inline void __end_request(struct request *rq, int uptodate, |
3634 | unsigned int nr_bytes, int dequeue) | ||
3634 | { | 3635 | { |
3635 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { | 3636 | if (!end_that_request_chunk(rq, uptodate, nr_bytes)) { |
3636 | add_disk_randomness(req->rq_disk); | 3637 | if (dequeue) |
3637 | blkdev_dequeue_request(req); | 3638 | blkdev_dequeue_request(rq); |
3638 | end_that_request_last(req, uptodate); | 3639 | add_disk_randomness(rq->rq_disk); |
3640 | end_that_request_last(rq, uptodate); | ||
3639 | } | 3641 | } |
3640 | } | 3642 | } |
3641 | 3643 | ||
3644 | static unsigned int rq_byte_size(struct request *rq) | ||
3645 | { | ||
3646 | if (blk_fs_request(rq)) | ||
3647 | return rq->hard_nr_sectors << 9; | ||
3648 | |||
3649 | return rq->data_len; | ||
3650 | } | ||
3651 | |||
3652 | /** | ||
3653 | * end_queued_request - end all I/O on a queued request | ||
3654 | * @rq: the request being processed | ||
3655 | * @uptodate: error value or 0/1 uptodate flag | ||
3656 | * | ||
3657 | * Description: | ||
3658 | * Ends all I/O on a request, and removes it from the block layer queues. | ||
3659 | * Not suitable for normal IO completion, unless the driver still has | ||
3660 | * the request attached to the block layer. | ||
3661 | * | ||
3662 | **/ | ||
3663 | void end_queued_request(struct request *rq, int uptodate) | ||
3664 | { | ||
3665 | __end_request(rq, uptodate, rq_byte_size(rq), 1); | ||
3666 | } | ||
3667 | EXPORT_SYMBOL(end_queued_request); | ||
3668 | |||
3669 | /** | ||
3670 | * end_dequeued_request - end all I/O on a dequeued request | ||
3671 | * @rq: the request being processed | ||
3672 | * @uptodate: error value or 0/1 uptodate flag | ||
3673 | * | ||
3674 | * Description: | ||
3675 | * Ends all I/O on a request. The request must already have been | ||
3676 | * dequeued using blkdev_dequeue_request(), as is normally the case | ||
3677 | * for most drivers. | ||
3678 | * | ||
3679 | **/ | ||
3680 | void end_dequeued_request(struct request *rq, int uptodate) | ||
3681 | { | ||
3682 | __end_request(rq, uptodate, rq_byte_size(rq), 0); | ||
3683 | } | ||
3684 | EXPORT_SYMBOL(end_dequeued_request); | ||
3685 | |||
3686 | |||
3687 | /** | ||
3688 | * end_request - end I/O on the current segment of the request | ||
3689 | * @rq: the request being processed | ||
3690 | * @uptodate: error value or 0/1 uptodate flag | ||
3691 | * | ||
3692 | * Description: | ||
3693 | * Ends I/O on the current segment of a request. If that is the only | ||
3694 | * remaining segment, the request is also completed and freed. | ||
3695 | * | ||
3696 | * This is a remnant of how older block drivers handled IO completions. | ||
3697 | * Modern drivers typically end IO on the full request in one go, unless | ||
3698 | * they have a residual value to account for. For that case this function | ||
3699 | * isn't really useful, unless the residual just happens to be the | ||
3700 | * full current segment. In other words, don't use this function in new | ||
3701 | * code. Either use end_request_completely(), or the | ||
3702 | * end_that_request_chunk() (along with end_that_request_last()) for | ||
3703 | * partial completions. | ||
3704 | * | ||
3705 | **/ | ||
3706 | void end_request(struct request *req, int uptodate) | ||
3707 | { | ||
3708 | __end_request(req, uptodate, req->hard_cur_sectors << 9, 1); | ||
3709 | } | ||
3642 | EXPORT_SYMBOL(end_request); | 3710 | EXPORT_SYMBOL(end_request); |
3643 | 3711 | ||
3644 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 3712 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
3645 | struct bio *bio) | 3713 | struct bio *bio) |
3646 | { | 3714 | { |
3647 | /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ | 3715 | /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ |
3648 | rq->cmd_flags |= (bio->bi_rw & 3); | 3716 | rq->cmd_flags |= (bio->bi_rw & 3); |
3649 | 3717 | ||
3650 | rq->nr_phys_segments = bio_phys_segments(q, bio); | 3718 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
3651 | rq->nr_hw_segments = bio_hw_segments(q, bio); | 3719 | rq->nr_hw_segments = bio_hw_segments(q, bio); |
3652 | rq->current_nr_sectors = bio_cur_sectors(bio); | 3720 | rq->current_nr_sectors = bio_cur_sectors(bio); |
3653 | rq->hard_cur_sectors = rq->current_nr_sectors; | 3721 | rq->hard_cur_sectors = rq->current_nr_sectors; |
3654 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); | 3722 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); |
3655 | rq->buffer = bio_data(bio); | 3723 | rq->buffer = bio_data(bio); |
3656 | rq->data_len = bio->bi_size; | 3724 | rq->data_len = bio->bi_size; |
3657 | 3725 | ||
3658 | rq->bio = rq->biotail = bio; | 3726 | rq->bio = rq->biotail = bio; |
3659 | 3727 | ||
3660 | if (bio->bi_bdev) | 3728 | if (bio->bi_bdev) |
3661 | rq->rq_disk = bio->bi_bdev->bd_disk; | 3729 | rq->rq_disk = bio->bi_bdev->bd_disk; |
3662 | } | 3730 | } |
3663 | 3731 | ||
3664 | int kblockd_schedule_work(struct work_struct *work) | 3732 | int kblockd_schedule_work(struct work_struct *work) |
3665 | { | 3733 | { |
3666 | return queue_work(kblockd_workqueue, work); | 3734 | return queue_work(kblockd_workqueue, work); |
3667 | } | 3735 | } |
3668 | 3736 | ||
3669 | EXPORT_SYMBOL(kblockd_schedule_work); | 3737 | EXPORT_SYMBOL(kblockd_schedule_work); |
3670 | 3738 | ||
3671 | void kblockd_flush_work(struct work_struct *work) | 3739 | void kblockd_flush_work(struct work_struct *work) |
3672 | { | 3740 | { |
3673 | cancel_work_sync(work); | 3741 | cancel_work_sync(work); |
3674 | } | 3742 | } |
3675 | EXPORT_SYMBOL(kblockd_flush_work); | 3743 | EXPORT_SYMBOL(kblockd_flush_work); |
3676 | 3744 | ||
3677 | int __init blk_dev_init(void) | 3745 | int __init blk_dev_init(void) |
3678 | { | 3746 | { |
3679 | int i; | 3747 | int i; |
3680 | 3748 | ||
3681 | kblockd_workqueue = create_workqueue("kblockd"); | 3749 | kblockd_workqueue = create_workqueue("kblockd"); |
3682 | if (!kblockd_workqueue) | 3750 | if (!kblockd_workqueue) |
3683 | panic("Failed to create kblockd\n"); | 3751 | panic("Failed to create kblockd\n"); |
3684 | 3752 | ||
3685 | request_cachep = kmem_cache_create("blkdev_requests", | 3753 | request_cachep = kmem_cache_create("blkdev_requests", |
3686 | sizeof(struct request), 0, SLAB_PANIC, NULL); | 3754 | sizeof(struct request), 0, SLAB_PANIC, NULL); |
3687 | 3755 | ||
3688 | requestq_cachep = kmem_cache_create("blkdev_queue", | 3756 | requestq_cachep = kmem_cache_create("blkdev_queue", |
3689 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); | 3757 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); |
3690 | 3758 | ||
3691 | iocontext_cachep = kmem_cache_create("blkdev_ioc", | 3759 | iocontext_cachep = kmem_cache_create("blkdev_ioc", |
3692 | sizeof(struct io_context), 0, SLAB_PANIC, NULL); | 3760 | sizeof(struct io_context), 0, SLAB_PANIC, NULL); |
3693 | 3761 | ||
3694 | for_each_possible_cpu(i) | 3762 | for_each_possible_cpu(i) |
3695 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | 3763 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); |
3696 | 3764 | ||
3697 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); | 3765 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); |
3698 | register_hotcpu_notifier(&blk_cpu_notifier); | 3766 | register_hotcpu_notifier(&blk_cpu_notifier); |
3699 | 3767 | ||
3700 | blk_max_low_pfn = max_low_pfn - 1; | 3768 | blk_max_low_pfn = max_low_pfn - 1; |
3701 | blk_max_pfn = max_pfn - 1; | 3769 | blk_max_pfn = max_pfn - 1; |
3702 | 3770 | ||
3703 | return 0; | 3771 | return 0; |
3704 | } | 3772 | } |
3705 | 3773 | ||
3706 | /* | 3774 | /* |
3707 | * IO Context helper functions | 3775 | * IO Context helper functions |
3708 | */ | 3776 | */ |
3709 | void put_io_context(struct io_context *ioc) | 3777 | void put_io_context(struct io_context *ioc) |
3710 | { | 3778 | { |
3711 | if (ioc == NULL) | 3779 | if (ioc == NULL) |
3712 | return; | 3780 | return; |
3713 | 3781 | ||
3714 | BUG_ON(atomic_read(&ioc->refcount) == 0); | 3782 | BUG_ON(atomic_read(&ioc->refcount) == 0); |
3715 | 3783 | ||
3716 | if (atomic_dec_and_test(&ioc->refcount)) { | 3784 | if (atomic_dec_and_test(&ioc->refcount)) { |
3717 | struct cfq_io_context *cic; | 3785 | struct cfq_io_context *cic; |
3718 | 3786 | ||
3719 | rcu_read_lock(); | 3787 | rcu_read_lock(); |
3720 | if (ioc->aic && ioc->aic->dtor) | 3788 | if (ioc->aic && ioc->aic->dtor) |
3721 | ioc->aic->dtor(ioc->aic); | 3789 | ioc->aic->dtor(ioc->aic); |
3722 | if (ioc->cic_root.rb_node != NULL) { | 3790 | if (ioc->cic_root.rb_node != NULL) { |
3723 | struct rb_node *n = rb_first(&ioc->cic_root); | 3791 | struct rb_node *n = rb_first(&ioc->cic_root); |
3724 | 3792 | ||
3725 | cic = rb_entry(n, struct cfq_io_context, rb_node); | 3793 | cic = rb_entry(n, struct cfq_io_context, rb_node); |
3726 | cic->dtor(ioc); | 3794 | cic->dtor(ioc); |
3727 | } | 3795 | } |
3728 | rcu_read_unlock(); | 3796 | rcu_read_unlock(); |
3729 | 3797 | ||
3730 | kmem_cache_free(iocontext_cachep, ioc); | 3798 | kmem_cache_free(iocontext_cachep, ioc); |
3731 | } | 3799 | } |
3732 | } | 3800 | } |
3733 | EXPORT_SYMBOL(put_io_context); | 3801 | EXPORT_SYMBOL(put_io_context); |
3734 | 3802 | ||
3735 | /* Called by the exitting task */ | 3803 | /* Called by the exitting task */ |
3736 | void exit_io_context(void) | 3804 | void exit_io_context(void) |
3737 | { | 3805 | { |
3738 | struct io_context *ioc; | 3806 | struct io_context *ioc; |
3739 | struct cfq_io_context *cic; | 3807 | struct cfq_io_context *cic; |
3740 | 3808 | ||
3741 | task_lock(current); | 3809 | task_lock(current); |
3742 | ioc = current->io_context; | 3810 | ioc = current->io_context; |
3743 | current->io_context = NULL; | 3811 | current->io_context = NULL; |
3744 | task_unlock(current); | 3812 | task_unlock(current); |
3745 | 3813 | ||
3746 | ioc->task = NULL; | 3814 | ioc->task = NULL; |
3747 | if (ioc->aic && ioc->aic->exit) | 3815 | if (ioc->aic && ioc->aic->exit) |
3748 | ioc->aic->exit(ioc->aic); | 3816 | ioc->aic->exit(ioc->aic); |
3749 | if (ioc->cic_root.rb_node != NULL) { | 3817 | if (ioc->cic_root.rb_node != NULL) { |
3750 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); | 3818 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); |
3751 | cic->exit(ioc); | 3819 | cic->exit(ioc); |
3752 | } | 3820 | } |
3753 | 3821 | ||
3754 | put_io_context(ioc); | 3822 | put_io_context(ioc); |
3755 | } | 3823 | } |
3756 | 3824 | ||
3757 | /* | 3825 | /* |
3758 | * If the current task has no IO context then create one and initialise it. | 3826 | * If the current task has no IO context then create one and initialise it. |
3759 | * Otherwise, return its existing IO context. | 3827 | * Otherwise, return its existing IO context. |
3760 | * | 3828 | * |
3761 | * This returned IO context doesn't have a specifically elevated refcount, | 3829 | * This returned IO context doesn't have a specifically elevated refcount, |
3762 | * but since the current task itself holds a reference, the context can be | 3830 | * but since the current task itself holds a reference, the context can be |
3763 | * used in general code, so long as it stays within `current` context. | 3831 | * used in general code, so long as it stays within `current` context. |
3764 | */ | 3832 | */ |
3765 | static struct io_context *current_io_context(gfp_t gfp_flags, int node) | 3833 | static struct io_context *current_io_context(gfp_t gfp_flags, int node) |
3766 | { | 3834 | { |
3767 | struct task_struct *tsk = current; | 3835 | struct task_struct *tsk = current; |
3768 | struct io_context *ret; | 3836 | struct io_context *ret; |
3769 | 3837 | ||
3770 | ret = tsk->io_context; | 3838 | ret = tsk->io_context; |
3771 | if (likely(ret)) | 3839 | if (likely(ret)) |
3772 | return ret; | 3840 | return ret; |
3773 | 3841 | ||
3774 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | 3842 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); |
3775 | if (ret) { | 3843 | if (ret) { |
3776 | atomic_set(&ret->refcount, 1); | 3844 | atomic_set(&ret->refcount, 1); |
3777 | ret->task = current; | 3845 | ret->task = current; |
3778 | ret->ioprio_changed = 0; | 3846 | ret->ioprio_changed = 0; |
3779 | ret->last_waited = jiffies; /* doesn't matter... */ | 3847 | ret->last_waited = jiffies; /* doesn't matter... */ |
3780 | ret->nr_batch_requests = 0; /* because this is 0 */ | 3848 | ret->nr_batch_requests = 0; /* because this is 0 */ |
3781 | ret->aic = NULL; | 3849 | ret->aic = NULL; |
3782 | ret->cic_root.rb_node = NULL; | 3850 | ret->cic_root.rb_node = NULL; |
3783 | ret->ioc_data = NULL; | 3851 | ret->ioc_data = NULL; |
3784 | /* make sure set_task_ioprio() sees the settings above */ | 3852 | /* make sure set_task_ioprio() sees the settings above */ |
3785 | smp_wmb(); | 3853 | smp_wmb(); |
3786 | tsk->io_context = ret; | 3854 | tsk->io_context = ret; |
3787 | } | 3855 | } |
3788 | 3856 | ||
3789 | return ret; | 3857 | return ret; |
3790 | } | 3858 | } |
3791 | 3859 | ||
3792 | /* | 3860 | /* |
3793 | * If the current task has no IO context then create one and initialise it. | 3861 | * If the current task has no IO context then create one and initialise it. |
3794 | * If it does have a context, take a ref on it. | 3862 | * If it does have a context, take a ref on it. |
3795 | * | 3863 | * |
3796 | * This is always called in the context of the task which submitted the I/O. | 3864 | * This is always called in the context of the task which submitted the I/O. |
3797 | */ | 3865 | */ |
3798 | struct io_context *get_io_context(gfp_t gfp_flags, int node) | 3866 | struct io_context *get_io_context(gfp_t gfp_flags, int node) |
3799 | { | 3867 | { |
3800 | struct io_context *ret; | 3868 | struct io_context *ret; |
3801 | ret = current_io_context(gfp_flags, node); | 3869 | ret = current_io_context(gfp_flags, node); |
3802 | if (likely(ret)) | 3870 | if (likely(ret)) |
3803 | atomic_inc(&ret->refcount); | 3871 | atomic_inc(&ret->refcount); |
3804 | return ret; | 3872 | return ret; |
3805 | } | 3873 | } |
3806 | EXPORT_SYMBOL(get_io_context); | 3874 | EXPORT_SYMBOL(get_io_context); |
3807 | 3875 | ||
3808 | void copy_io_context(struct io_context **pdst, struct io_context **psrc) | 3876 | void copy_io_context(struct io_context **pdst, struct io_context **psrc) |
3809 | { | 3877 | { |
3810 | struct io_context *src = *psrc; | 3878 | struct io_context *src = *psrc; |
3811 | struct io_context *dst = *pdst; | 3879 | struct io_context *dst = *pdst; |
3812 | 3880 | ||
3813 | if (src) { | 3881 | if (src) { |
3814 | BUG_ON(atomic_read(&src->refcount) == 0); | 3882 | BUG_ON(atomic_read(&src->refcount) == 0); |
3815 | atomic_inc(&src->refcount); | 3883 | atomic_inc(&src->refcount); |
3816 | put_io_context(dst); | 3884 | put_io_context(dst); |
3817 | *pdst = src; | 3885 | *pdst = src; |
3818 | } | 3886 | } |
3819 | } | 3887 | } |
3820 | EXPORT_SYMBOL(copy_io_context); | 3888 | EXPORT_SYMBOL(copy_io_context); |
3821 | 3889 | ||
3822 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) | 3890 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) |
3823 | { | 3891 | { |
3824 | struct io_context *temp; | 3892 | struct io_context *temp; |
3825 | temp = *ioc1; | 3893 | temp = *ioc1; |
3826 | *ioc1 = *ioc2; | 3894 | *ioc1 = *ioc2; |
3827 | *ioc2 = temp; | 3895 | *ioc2 = temp; |
3828 | } | 3896 | } |
3829 | EXPORT_SYMBOL(swap_io_context); | 3897 | EXPORT_SYMBOL(swap_io_context); |
3830 | 3898 | ||
3831 | /* | 3899 | /* |
3832 | * sysfs parts below | 3900 | * sysfs parts below |
3833 | */ | 3901 | */ |
3834 | struct queue_sysfs_entry { | 3902 | struct queue_sysfs_entry { |
3835 | struct attribute attr; | 3903 | struct attribute attr; |
3836 | ssize_t (*show)(struct request_queue *, char *); | 3904 | ssize_t (*show)(struct request_queue *, char *); |
3837 | ssize_t (*store)(struct request_queue *, const char *, size_t); | 3905 | ssize_t (*store)(struct request_queue *, const char *, size_t); |
3838 | }; | 3906 | }; |
3839 | 3907 | ||
3840 | static ssize_t | 3908 | static ssize_t |
3841 | queue_var_show(unsigned int var, char *page) | 3909 | queue_var_show(unsigned int var, char *page) |
3842 | { | 3910 | { |
3843 | return sprintf(page, "%d\n", var); | 3911 | return sprintf(page, "%d\n", var); |
3844 | } | 3912 | } |
3845 | 3913 | ||
3846 | static ssize_t | 3914 | static ssize_t |
3847 | queue_var_store(unsigned long *var, const char *page, size_t count) | 3915 | queue_var_store(unsigned long *var, const char *page, size_t count) |
3848 | { | 3916 | { |
3849 | char *p = (char *) page; | 3917 | char *p = (char *) page; |
3850 | 3918 | ||
3851 | *var = simple_strtoul(p, &p, 10); | 3919 | *var = simple_strtoul(p, &p, 10); |
3852 | return count; | 3920 | return count; |
3853 | } | 3921 | } |
3854 | 3922 | ||
3855 | static ssize_t queue_requests_show(struct request_queue *q, char *page) | 3923 | static ssize_t queue_requests_show(struct request_queue *q, char *page) |
3856 | { | 3924 | { |
3857 | return queue_var_show(q->nr_requests, (page)); | 3925 | return queue_var_show(q->nr_requests, (page)); |
3858 | } | 3926 | } |
3859 | 3927 | ||
3860 | static ssize_t | 3928 | static ssize_t |
3861 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | 3929 | queue_requests_store(struct request_queue *q, const char *page, size_t count) |
3862 | { | 3930 | { |
3863 | struct request_list *rl = &q->rq; | 3931 | struct request_list *rl = &q->rq; |
3864 | unsigned long nr; | 3932 | unsigned long nr; |
3865 | int ret = queue_var_store(&nr, page, count); | 3933 | int ret = queue_var_store(&nr, page, count); |
3866 | if (nr < BLKDEV_MIN_RQ) | 3934 | if (nr < BLKDEV_MIN_RQ) |
3867 | nr = BLKDEV_MIN_RQ; | 3935 | nr = BLKDEV_MIN_RQ; |
3868 | 3936 | ||
3869 | spin_lock_irq(q->queue_lock); | 3937 | spin_lock_irq(q->queue_lock); |
3870 | q->nr_requests = nr; | 3938 | q->nr_requests = nr; |
3871 | blk_queue_congestion_threshold(q); | 3939 | blk_queue_congestion_threshold(q); |
3872 | 3940 | ||
3873 | if (rl->count[READ] >= queue_congestion_on_threshold(q)) | 3941 | if (rl->count[READ] >= queue_congestion_on_threshold(q)) |
3874 | blk_set_queue_congested(q, READ); | 3942 | blk_set_queue_congested(q, READ); |
3875 | else if (rl->count[READ] < queue_congestion_off_threshold(q)) | 3943 | else if (rl->count[READ] < queue_congestion_off_threshold(q)) |
3876 | blk_clear_queue_congested(q, READ); | 3944 | blk_clear_queue_congested(q, READ); |
3877 | 3945 | ||
3878 | if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) | 3946 | if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) |
3879 | blk_set_queue_congested(q, WRITE); | 3947 | blk_set_queue_congested(q, WRITE); |
3880 | else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) | 3948 | else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) |
3881 | blk_clear_queue_congested(q, WRITE); | 3949 | blk_clear_queue_congested(q, WRITE); |
3882 | 3950 | ||
3883 | if (rl->count[READ] >= q->nr_requests) { | 3951 | if (rl->count[READ] >= q->nr_requests) { |
3884 | blk_set_queue_full(q, READ); | 3952 | blk_set_queue_full(q, READ); |
3885 | } else if (rl->count[READ]+1 <= q->nr_requests) { | 3953 | } else if (rl->count[READ]+1 <= q->nr_requests) { |
3886 | blk_clear_queue_full(q, READ); | 3954 | blk_clear_queue_full(q, READ); |
3887 | wake_up(&rl->wait[READ]); | 3955 | wake_up(&rl->wait[READ]); |
3888 | } | 3956 | } |
3889 | 3957 | ||
3890 | if (rl->count[WRITE] >= q->nr_requests) { | 3958 | if (rl->count[WRITE] >= q->nr_requests) { |
3891 | blk_set_queue_full(q, WRITE); | 3959 | blk_set_queue_full(q, WRITE); |
3892 | } else if (rl->count[WRITE]+1 <= q->nr_requests) { | 3960 | } else if (rl->count[WRITE]+1 <= q->nr_requests) { |
3893 | blk_clear_queue_full(q, WRITE); | 3961 | blk_clear_queue_full(q, WRITE); |
3894 | wake_up(&rl->wait[WRITE]); | 3962 | wake_up(&rl->wait[WRITE]); |
3895 | } | 3963 | } |
3896 | spin_unlock_irq(q->queue_lock); | 3964 | spin_unlock_irq(q->queue_lock); |
3897 | return ret; | 3965 | return ret; |
3898 | } | 3966 | } |
3899 | 3967 | ||
3900 | static ssize_t queue_ra_show(struct request_queue *q, char *page) | 3968 | static ssize_t queue_ra_show(struct request_queue *q, char *page) |
3901 | { | 3969 | { |
3902 | int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); | 3970 | int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); |
3903 | 3971 | ||
3904 | return queue_var_show(ra_kb, (page)); | 3972 | return queue_var_show(ra_kb, (page)); |
3905 | } | 3973 | } |
3906 | 3974 | ||
3907 | static ssize_t | 3975 | static ssize_t |
3908 | queue_ra_store(struct request_queue *q, const char *page, size_t count) | 3976 | queue_ra_store(struct request_queue *q, const char *page, size_t count) |
3909 | { | 3977 | { |
3910 | unsigned long ra_kb; | 3978 | unsigned long ra_kb; |
3911 | ssize_t ret = queue_var_store(&ra_kb, page, count); | 3979 | ssize_t ret = queue_var_store(&ra_kb, page, count); |
3912 | 3980 | ||
3913 | spin_lock_irq(q->queue_lock); | 3981 | spin_lock_irq(q->queue_lock); |
3914 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); | 3982 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); |
3915 | spin_unlock_irq(q->queue_lock); | 3983 | spin_unlock_irq(q->queue_lock); |
3916 | 3984 | ||
3917 | return ret; | 3985 | return ret; |
3918 | } | 3986 | } |
3919 | 3987 | ||
3920 | static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) | 3988 | static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) |
3921 | { | 3989 | { |
3922 | int max_sectors_kb = q->max_sectors >> 1; | 3990 | int max_sectors_kb = q->max_sectors >> 1; |
3923 | 3991 | ||
3924 | return queue_var_show(max_sectors_kb, (page)); | 3992 | return queue_var_show(max_sectors_kb, (page)); |
3925 | } | 3993 | } |
3926 | 3994 | ||
3927 | static ssize_t | 3995 | static ssize_t |
3928 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | 3996 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) |
3929 | { | 3997 | { |
3930 | unsigned long max_sectors_kb, | 3998 | unsigned long max_sectors_kb, |
3931 | max_hw_sectors_kb = q->max_hw_sectors >> 1, | 3999 | max_hw_sectors_kb = q->max_hw_sectors >> 1, |
3932 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); | 4000 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); |
3933 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); | 4001 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); |
3934 | int ra_kb; | 4002 | int ra_kb; |
3935 | 4003 | ||
3936 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) | 4004 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) |
3937 | return -EINVAL; | 4005 | return -EINVAL; |
3938 | /* | 4006 | /* |
3939 | * Take the queue lock to update the readahead and max_sectors | 4007 | * Take the queue lock to update the readahead and max_sectors |
3940 | * values synchronously: | 4008 | * values synchronously: |
3941 | */ | 4009 | */ |
3942 | spin_lock_irq(q->queue_lock); | 4010 | spin_lock_irq(q->queue_lock); |
3943 | /* | 4011 | /* |
3944 | * Trim readahead window as well, if necessary: | 4012 | * Trim readahead window as well, if necessary: |
3945 | */ | 4013 | */ |
3946 | ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); | 4014 | ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); |
3947 | if (ra_kb > max_sectors_kb) | 4015 | if (ra_kb > max_sectors_kb) |
3948 | q->backing_dev_info.ra_pages = | 4016 | q->backing_dev_info.ra_pages = |
3949 | max_sectors_kb >> (PAGE_CACHE_SHIFT - 10); | 4017 | max_sectors_kb >> (PAGE_CACHE_SHIFT - 10); |
3950 | 4018 | ||
3951 | q->max_sectors = max_sectors_kb << 1; | 4019 | q->max_sectors = max_sectors_kb << 1; |
3952 | spin_unlock_irq(q->queue_lock); | 4020 | spin_unlock_irq(q->queue_lock); |
3953 | 4021 | ||
3954 | return ret; | 4022 | return ret; |
3955 | } | 4023 | } |
3956 | 4024 | ||
3957 | static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) | 4025 | static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) |
3958 | { | 4026 | { |
3959 | int max_hw_sectors_kb = q->max_hw_sectors >> 1; | 4027 | int max_hw_sectors_kb = q->max_hw_sectors >> 1; |
3960 | 4028 | ||
3961 | return queue_var_show(max_hw_sectors_kb, (page)); | 4029 | return queue_var_show(max_hw_sectors_kb, (page)); |
3962 | } | 4030 | } |
3963 | 4031 | ||
3964 | 4032 | ||
3965 | static struct queue_sysfs_entry queue_requests_entry = { | 4033 | static struct queue_sysfs_entry queue_requests_entry = { |
3966 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, | 4034 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, |
3967 | .show = queue_requests_show, | 4035 | .show = queue_requests_show, |
3968 | .store = queue_requests_store, | 4036 | .store = queue_requests_store, |
3969 | }; | 4037 | }; |
3970 | 4038 | ||
3971 | static struct queue_sysfs_entry queue_ra_entry = { | 4039 | static struct queue_sysfs_entry queue_ra_entry = { |
3972 | .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, | 4040 | .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, |
3973 | .show = queue_ra_show, | 4041 | .show = queue_ra_show, |
3974 | .store = queue_ra_store, | 4042 | .store = queue_ra_store, |
3975 | }; | 4043 | }; |
3976 | 4044 | ||
3977 | static struct queue_sysfs_entry queue_max_sectors_entry = { | 4045 | static struct queue_sysfs_entry queue_max_sectors_entry = { |
3978 | .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, | 4046 | .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, |
3979 | .show = queue_max_sectors_show, | 4047 | .show = queue_max_sectors_show, |
3980 | .store = queue_max_sectors_store, | 4048 | .store = queue_max_sectors_store, |
3981 | }; | 4049 | }; |
3982 | 4050 | ||
3983 | static struct queue_sysfs_entry queue_max_hw_sectors_entry = { | 4051 | static struct queue_sysfs_entry queue_max_hw_sectors_entry = { |
3984 | .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, | 4052 | .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, |
3985 | .show = queue_max_hw_sectors_show, | 4053 | .show = queue_max_hw_sectors_show, |
3986 | }; | 4054 | }; |
3987 | 4055 | ||
3988 | static struct queue_sysfs_entry queue_iosched_entry = { | 4056 | static struct queue_sysfs_entry queue_iosched_entry = { |
3989 | .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, | 4057 | .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, |
3990 | .show = elv_iosched_show, | 4058 | .show = elv_iosched_show, |
3991 | .store = elv_iosched_store, | 4059 | .store = elv_iosched_store, |
3992 | }; | 4060 | }; |
3993 | 4061 | ||
3994 | static struct attribute *default_attrs[] = { | 4062 | static struct attribute *default_attrs[] = { |
3995 | &queue_requests_entry.attr, | 4063 | &queue_requests_entry.attr, |
3996 | &queue_ra_entry.attr, | 4064 | &queue_ra_entry.attr, |
3997 | &queue_max_hw_sectors_entry.attr, | 4065 | &queue_max_hw_sectors_entry.attr, |
3998 | &queue_max_sectors_entry.attr, | 4066 | &queue_max_sectors_entry.attr, |
3999 | &queue_iosched_entry.attr, | 4067 | &queue_iosched_entry.attr, |
4000 | NULL, | 4068 | NULL, |
4001 | }; | 4069 | }; |
4002 | 4070 | ||
4003 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) | 4071 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) |
4004 | 4072 | ||
4005 | static ssize_t | 4073 | static ssize_t |
4006 | queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | 4074 | queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) |
4007 | { | 4075 | { |
4008 | struct queue_sysfs_entry *entry = to_queue(attr); | 4076 | struct queue_sysfs_entry *entry = to_queue(attr); |
4009 | struct request_queue *q = | 4077 | struct request_queue *q = |
4010 | container_of(kobj, struct request_queue, kobj); | 4078 | container_of(kobj, struct request_queue, kobj); |
4011 | ssize_t res; | 4079 | ssize_t res; |
4012 | 4080 | ||
4013 | if (!entry->show) | 4081 | if (!entry->show) |
4014 | return -EIO; | 4082 | return -EIO; |
4015 | mutex_lock(&q->sysfs_lock); | 4083 | mutex_lock(&q->sysfs_lock); |
4016 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | 4084 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { |
4017 | mutex_unlock(&q->sysfs_lock); | 4085 | mutex_unlock(&q->sysfs_lock); |
4018 | return -ENOENT; | 4086 | return -ENOENT; |
4019 | } | 4087 | } |
4020 | res = entry->show(q, page); | 4088 | res = entry->show(q, page); |
4021 | mutex_unlock(&q->sysfs_lock); | 4089 | mutex_unlock(&q->sysfs_lock); |
4022 | return res; | 4090 | return res; |
4023 | } | 4091 | } |
4024 | 4092 | ||
4025 | static ssize_t | 4093 | static ssize_t |
4026 | queue_attr_store(struct kobject *kobj, struct attribute *attr, | 4094 | queue_attr_store(struct kobject *kobj, struct attribute *attr, |
4027 | const char *page, size_t length) | 4095 | const char *page, size_t length) |
4028 | { | 4096 | { |
4029 | struct queue_sysfs_entry *entry = to_queue(attr); | 4097 | struct queue_sysfs_entry *entry = to_queue(attr); |
4030 | struct request_queue *q = container_of(kobj, struct request_queue, kobj); | 4098 | struct request_queue *q = container_of(kobj, struct request_queue, kobj); |
4031 | 4099 | ||
4032 | ssize_t res; | 4100 | ssize_t res; |
4033 | 4101 | ||
4034 | if (!entry->store) | 4102 | if (!entry->store) |
4035 | return -EIO; | 4103 | return -EIO; |
4036 | mutex_lock(&q->sysfs_lock); | 4104 | mutex_lock(&q->sysfs_lock); |
4037 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | 4105 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { |
4038 | mutex_unlock(&q->sysfs_lock); | 4106 | mutex_unlock(&q->sysfs_lock); |
4039 | return -ENOENT; | 4107 | return -ENOENT; |
4040 | } | 4108 | } |
4041 | res = entry->store(q, page, length); | 4109 | res = entry->store(q, page, length); |
4042 | mutex_unlock(&q->sysfs_lock); | 4110 | mutex_unlock(&q->sysfs_lock); |
4043 | return res; | 4111 | return res; |
4044 | } | 4112 | } |
4045 | 4113 | ||
4046 | static struct sysfs_ops queue_sysfs_ops = { | 4114 | static struct sysfs_ops queue_sysfs_ops = { |
4047 | .show = queue_attr_show, | 4115 | .show = queue_attr_show, |
4048 | .store = queue_attr_store, | 4116 | .store = queue_attr_store, |
4049 | }; | 4117 | }; |
4050 | 4118 | ||
4051 | static struct kobj_type queue_ktype = { | 4119 | static struct kobj_type queue_ktype = { |
4052 | .sysfs_ops = &queue_sysfs_ops, | 4120 | .sysfs_ops = &queue_sysfs_ops, |
4053 | .default_attrs = default_attrs, | 4121 | .default_attrs = default_attrs, |
4054 | .release = blk_release_queue, | 4122 | .release = blk_release_queue, |
4055 | }; | 4123 | }; |
4056 | 4124 | ||
4057 | int blk_register_queue(struct gendisk *disk) | 4125 | int blk_register_queue(struct gendisk *disk) |
4058 | { | 4126 | { |
4059 | int ret; | 4127 | int ret; |
4060 | 4128 | ||
4061 | struct request_queue *q = disk->queue; | 4129 | struct request_queue *q = disk->queue; |
4062 | 4130 | ||
4063 | if (!q || !q->request_fn) | 4131 | if (!q || !q->request_fn) |
4064 | return -ENXIO; | 4132 | return -ENXIO; |
4065 | 4133 | ||
4066 | q->kobj.parent = kobject_get(&disk->kobj); | 4134 | q->kobj.parent = kobject_get(&disk->kobj); |
4067 | 4135 | ||
4068 | ret = kobject_add(&q->kobj); | 4136 | ret = kobject_add(&q->kobj); |
4069 | if (ret < 0) | 4137 | if (ret < 0) |
4070 | return ret; | 4138 | return ret; |
4071 | 4139 | ||
4072 | kobject_uevent(&q->kobj, KOBJ_ADD); | 4140 | kobject_uevent(&q->kobj, KOBJ_ADD); |
4073 | 4141 | ||
4074 | ret = elv_register_queue(q); | 4142 | ret = elv_register_queue(q); |
4075 | if (ret) { | 4143 | if (ret) { |
4076 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | 4144 | kobject_uevent(&q->kobj, KOBJ_REMOVE); |
4077 | kobject_del(&q->kobj); | 4145 | kobject_del(&q->kobj); |
4078 | return ret; | 4146 | return ret; |
4079 | } | 4147 | } |
4080 | 4148 | ||
4081 | return 0; | 4149 | return 0; |
4082 | } | 4150 | } |
4083 | 4151 | ||
4084 | void blk_unregister_queue(struct gendisk *disk) | 4152 | void blk_unregister_queue(struct gendisk *disk) |
4085 | { | 4153 | { |
4086 | struct request_queue *q = disk->queue; | 4154 | struct request_queue *q = disk->queue; |
4087 | 4155 | ||
4088 | if (q && q->request_fn) { | 4156 | if (q && q->request_fn) { |
4089 | elv_unregister_queue(q); | 4157 | elv_unregister_queue(q); |
4090 | 4158 | ||
4091 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | 4159 | kobject_uevent(&q->kobj, KOBJ_REMOVE); |
4092 | kobject_del(&q->kobj); | 4160 | kobject_del(&q->kobj); |
4093 | kobject_put(&disk->kobj); | 4161 | kobject_put(&disk->kobj); |
4094 | } | 4162 | } |
4095 | } | 4163 | } |
4096 | 4164 |
include/linux/blkdev.h
1 | #ifndef _LINUX_BLKDEV_H | 1 | #ifndef _LINUX_BLKDEV_H |
2 | #define _LINUX_BLKDEV_H | 2 | #define _LINUX_BLKDEV_H |
3 | 3 | ||
4 | #ifdef CONFIG_BLOCK | 4 | #ifdef CONFIG_BLOCK |
5 | 5 | ||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/major.h> | 7 | #include <linux/major.h> |
8 | #include <linux/genhd.h> | 8 | #include <linux/genhd.h> |
9 | #include <linux/list.h> | 9 | #include <linux/list.h> |
10 | #include <linux/timer.h> | 10 | #include <linux/timer.h> |
11 | #include <linux/workqueue.h> | 11 | #include <linux/workqueue.h> |
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/backing-dev.h> | 13 | #include <linux/backing-dev.h> |
14 | #include <linux/wait.h> | 14 | #include <linux/wait.h> |
15 | #include <linux/mempool.h> | 15 | #include <linux/mempool.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/stringify.h> | 18 | #include <linux/stringify.h> |
19 | #include <linux/bsg.h> | 19 | #include <linux/bsg.h> |
20 | 20 | ||
21 | #include <asm/scatterlist.h> | 21 | #include <asm/scatterlist.h> |
22 | 22 | ||
23 | struct scsi_ioctl_command; | 23 | struct scsi_ioctl_command; |
24 | 24 | ||
25 | struct request_queue; | 25 | struct request_queue; |
26 | typedef struct request_queue request_queue_t __deprecated; | 26 | typedef struct request_queue request_queue_t __deprecated; |
27 | struct elevator_queue; | 27 | struct elevator_queue; |
28 | typedef struct elevator_queue elevator_t; | 28 | typedef struct elevator_queue elevator_t; |
29 | struct request_pm_state; | 29 | struct request_pm_state; |
30 | struct blk_trace; | 30 | struct blk_trace; |
31 | struct request; | 31 | struct request; |
32 | struct sg_io_hdr; | 32 | struct sg_io_hdr; |
33 | 33 | ||
34 | #define BLKDEV_MIN_RQ 4 | 34 | #define BLKDEV_MIN_RQ 4 |
35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * This is the per-process anticipatory I/O scheduler state. | 38 | * This is the per-process anticipatory I/O scheduler state. |
39 | */ | 39 | */ |
40 | struct as_io_context { | 40 | struct as_io_context { |
41 | spinlock_t lock; | 41 | spinlock_t lock; |
42 | 42 | ||
43 | void (*dtor)(struct as_io_context *aic); /* destructor */ | 43 | void (*dtor)(struct as_io_context *aic); /* destructor */ |
44 | void (*exit)(struct as_io_context *aic); /* called on task exit */ | 44 | void (*exit)(struct as_io_context *aic); /* called on task exit */ |
45 | 45 | ||
46 | unsigned long state; | 46 | unsigned long state; |
47 | atomic_t nr_queued; /* queued reads & sync writes */ | 47 | atomic_t nr_queued; /* queued reads & sync writes */ |
48 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ | 48 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ |
49 | 49 | ||
50 | /* IO History tracking */ | 50 | /* IO History tracking */ |
51 | /* Thinktime */ | 51 | /* Thinktime */ |
52 | unsigned long last_end_request; | 52 | unsigned long last_end_request; |
53 | unsigned long ttime_total; | 53 | unsigned long ttime_total; |
54 | unsigned long ttime_samples; | 54 | unsigned long ttime_samples; |
55 | unsigned long ttime_mean; | 55 | unsigned long ttime_mean; |
56 | /* Layout pattern */ | 56 | /* Layout pattern */ |
57 | unsigned int seek_samples; | 57 | unsigned int seek_samples; |
58 | sector_t last_request_pos; | 58 | sector_t last_request_pos; |
59 | u64 seek_total; | 59 | u64 seek_total; |
60 | sector_t seek_mean; | 60 | sector_t seek_mean; |
61 | }; | 61 | }; |
62 | 62 | ||
63 | struct cfq_queue; | 63 | struct cfq_queue; |
64 | struct cfq_io_context { | 64 | struct cfq_io_context { |
65 | struct rb_node rb_node; | 65 | struct rb_node rb_node; |
66 | void *key; | 66 | void *key; |
67 | 67 | ||
68 | struct cfq_queue *cfqq[2]; | 68 | struct cfq_queue *cfqq[2]; |
69 | 69 | ||
70 | struct io_context *ioc; | 70 | struct io_context *ioc; |
71 | 71 | ||
72 | unsigned long last_end_request; | 72 | unsigned long last_end_request; |
73 | sector_t last_request_pos; | 73 | sector_t last_request_pos; |
74 | 74 | ||
75 | unsigned long ttime_total; | 75 | unsigned long ttime_total; |
76 | unsigned long ttime_samples; | 76 | unsigned long ttime_samples; |
77 | unsigned long ttime_mean; | 77 | unsigned long ttime_mean; |
78 | 78 | ||
79 | unsigned int seek_samples; | 79 | unsigned int seek_samples; |
80 | u64 seek_total; | 80 | u64 seek_total; |
81 | sector_t seek_mean; | 81 | sector_t seek_mean; |
82 | 82 | ||
83 | struct list_head queue_list; | 83 | struct list_head queue_list; |
84 | 84 | ||
85 | void (*dtor)(struct io_context *); /* destructor */ | 85 | void (*dtor)(struct io_context *); /* destructor */ |
86 | void (*exit)(struct io_context *); /* called on task exit */ | 86 | void (*exit)(struct io_context *); /* called on task exit */ |
87 | }; | 87 | }; |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * This is the per-process I/O subsystem state. It is refcounted and | 90 | * This is the per-process I/O subsystem state. It is refcounted and |
91 | * kmalloc'ed. Currently all fields are modified in process io context | 91 | * kmalloc'ed. Currently all fields are modified in process io context |
92 | * (apart from the atomic refcount), so require no locking. | 92 | * (apart from the atomic refcount), so require no locking. |
93 | */ | 93 | */ |
94 | struct io_context { | 94 | struct io_context { |
95 | atomic_t refcount; | 95 | atomic_t refcount; |
96 | struct task_struct *task; | 96 | struct task_struct *task; |
97 | 97 | ||
98 | unsigned int ioprio_changed; | 98 | unsigned int ioprio_changed; |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * For request batching | 101 | * For request batching |
102 | */ | 102 | */ |
103 | unsigned long last_waited; /* Time last woken after wait for request */ | 103 | unsigned long last_waited; /* Time last woken after wait for request */ |
104 | int nr_batch_requests; /* Number of requests left in the batch */ | 104 | int nr_batch_requests; /* Number of requests left in the batch */ |
105 | 105 | ||
106 | struct as_io_context *aic; | 106 | struct as_io_context *aic; |
107 | struct rb_root cic_root; | 107 | struct rb_root cic_root; |
108 | void *ioc_data; | 108 | void *ioc_data; |
109 | }; | 109 | }; |
110 | 110 | ||
111 | void put_io_context(struct io_context *ioc); | 111 | void put_io_context(struct io_context *ioc); |
112 | void exit_io_context(void); | 112 | void exit_io_context(void); |
113 | struct io_context *get_io_context(gfp_t gfp_flags, int node); | 113 | struct io_context *get_io_context(gfp_t gfp_flags, int node); |
114 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); | 114 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); |
115 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); | 115 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); |
116 | 116 | ||
117 | struct request; | 117 | struct request; |
118 | typedef void (rq_end_io_fn)(struct request *, int); | 118 | typedef void (rq_end_io_fn)(struct request *, int); |
119 | 119 | ||
120 | struct request_list { | 120 | struct request_list { |
121 | int count[2]; | 121 | int count[2]; |
122 | int starved[2]; | 122 | int starved[2]; |
123 | int elvpriv; | 123 | int elvpriv; |
124 | mempool_t *rq_pool; | 124 | mempool_t *rq_pool; |
125 | wait_queue_head_t wait[2]; | 125 | wait_queue_head_t wait[2]; |
126 | }; | 126 | }; |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * request command types | 129 | * request command types |
130 | */ | 130 | */ |
131 | enum rq_cmd_type_bits { | 131 | enum rq_cmd_type_bits { |
132 | REQ_TYPE_FS = 1, /* fs request */ | 132 | REQ_TYPE_FS = 1, /* fs request */ |
133 | REQ_TYPE_BLOCK_PC, /* scsi command */ | 133 | REQ_TYPE_BLOCK_PC, /* scsi command */ |
134 | REQ_TYPE_SENSE, /* sense request */ | 134 | REQ_TYPE_SENSE, /* sense request */ |
135 | REQ_TYPE_PM_SUSPEND, /* suspend request */ | 135 | REQ_TYPE_PM_SUSPEND, /* suspend request */ |
136 | REQ_TYPE_PM_RESUME, /* resume request */ | 136 | REQ_TYPE_PM_RESUME, /* resume request */ |
137 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ | 137 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ |
138 | REQ_TYPE_FLUSH, /* flush request */ | 138 | REQ_TYPE_FLUSH, /* flush request */ |
139 | REQ_TYPE_SPECIAL, /* driver defined type */ | 139 | REQ_TYPE_SPECIAL, /* driver defined type */ |
140 | REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ | 140 | REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ |
141 | /* | 141 | /* |
142 | * for ATA/ATAPI devices. this really doesn't belong here, ide should | 142 | * for ATA/ATAPI devices. this really doesn't belong here, ide should |
143 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver | 143 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver |
144 | * private REQ_LB opcodes to differentiate what type of request this is | 144 | * private REQ_LB opcodes to differentiate what type of request this is |
145 | */ | 145 | */ |
146 | REQ_TYPE_ATA_CMD, | 146 | REQ_TYPE_ATA_CMD, |
147 | REQ_TYPE_ATA_TASK, | 147 | REQ_TYPE_ATA_TASK, |
148 | REQ_TYPE_ATA_TASKFILE, | 148 | REQ_TYPE_ATA_TASKFILE, |
149 | REQ_TYPE_ATA_PC, | 149 | REQ_TYPE_ATA_PC, |
150 | }; | 150 | }; |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being | 153 | * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being |
154 | * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a | 154 | * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a |
155 | * SCSI cdb. | 155 | * SCSI cdb. |
156 | * | 156 | * |
157 | * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, | 157 | * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, |
158 | * typically to differentiate REQ_TYPE_SPECIAL requests. | 158 | * typically to differentiate REQ_TYPE_SPECIAL requests. |
159 | * | 159 | * |
160 | */ | 160 | */ |
161 | enum { | 161 | enum { |
162 | /* | 162 | /* |
163 | * just examples for now | 163 | * just examples for now |
164 | */ | 164 | */ |
165 | REQ_LB_OP_EJECT = 0x40, /* eject request */ | 165 | REQ_LB_OP_EJECT = 0x40, /* eject request */ |
166 | REQ_LB_OP_FLUSH = 0x41, /* flush device */ | 166 | REQ_LB_OP_FLUSH = 0x41, /* flush device */ |
167 | }; | 167 | }; |
168 | 168 | ||
169 | /* | 169 | /* |
170 | * request type modified bits. first three bits match BIO_RW* bits, important | 170 | * request type modified bits. first three bits match BIO_RW* bits, important |
171 | */ | 171 | */ |
172 | enum rq_flag_bits { | 172 | enum rq_flag_bits { |
173 | __REQ_RW, /* not set, read. set, write */ | 173 | __REQ_RW, /* not set, read. set, write */ |
174 | __REQ_FAILFAST, /* no low level driver retries */ | 174 | __REQ_FAILFAST, /* no low level driver retries */ |
175 | __REQ_SORTED, /* elevator knows about this request */ | 175 | __REQ_SORTED, /* elevator knows about this request */ |
176 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ | 176 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ |
177 | __REQ_HARDBARRIER, /* may not be passed by drive either */ | 177 | __REQ_HARDBARRIER, /* may not be passed by drive either */ |
178 | __REQ_FUA, /* forced unit access */ | 178 | __REQ_FUA, /* forced unit access */ |
179 | __REQ_NOMERGE, /* don't touch this for merging */ | 179 | __REQ_NOMERGE, /* don't touch this for merging */ |
180 | __REQ_STARTED, /* drive already may have started this one */ | 180 | __REQ_STARTED, /* drive already may have started this one */ |
181 | __REQ_DONTPREP, /* don't call prep for this one */ | 181 | __REQ_DONTPREP, /* don't call prep for this one */ |
182 | __REQ_QUEUED, /* uses queueing */ | 182 | __REQ_QUEUED, /* uses queueing */ |
183 | __REQ_ELVPRIV, /* elevator private data attached */ | 183 | __REQ_ELVPRIV, /* elevator private data attached */ |
184 | __REQ_FAILED, /* set if the request failed */ | 184 | __REQ_FAILED, /* set if the request failed */ |
185 | __REQ_QUIET, /* don't worry about errors */ | 185 | __REQ_QUIET, /* don't worry about errors */ |
186 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ | 186 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ |
187 | __REQ_ORDERED_COLOR, /* is before or after barrier */ | 187 | __REQ_ORDERED_COLOR, /* is before or after barrier */ |
188 | __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ | 188 | __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ |
189 | __REQ_ALLOCED, /* request came from our alloc pool */ | 189 | __REQ_ALLOCED, /* request came from our alloc pool */ |
190 | __REQ_RW_META, /* metadata io request */ | 190 | __REQ_RW_META, /* metadata io request */ |
191 | __REQ_NR_BITS, /* stops here */ | 191 | __REQ_NR_BITS, /* stops here */ |
192 | }; | 192 | }; |
193 | 193 | ||
194 | #define REQ_RW (1 << __REQ_RW) | 194 | #define REQ_RW (1 << __REQ_RW) |
195 | #define REQ_FAILFAST (1 << __REQ_FAILFAST) | 195 | #define REQ_FAILFAST (1 << __REQ_FAILFAST) |
196 | #define REQ_SORTED (1 << __REQ_SORTED) | 196 | #define REQ_SORTED (1 << __REQ_SORTED) |
197 | #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) | 197 | #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) |
198 | #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) | 198 | #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) |
199 | #define REQ_FUA (1 << __REQ_FUA) | 199 | #define REQ_FUA (1 << __REQ_FUA) |
200 | #define REQ_NOMERGE (1 << __REQ_NOMERGE) | 200 | #define REQ_NOMERGE (1 << __REQ_NOMERGE) |
201 | #define REQ_STARTED (1 << __REQ_STARTED) | 201 | #define REQ_STARTED (1 << __REQ_STARTED) |
202 | #define REQ_DONTPREP (1 << __REQ_DONTPREP) | 202 | #define REQ_DONTPREP (1 << __REQ_DONTPREP) |
203 | #define REQ_QUEUED (1 << __REQ_QUEUED) | 203 | #define REQ_QUEUED (1 << __REQ_QUEUED) |
204 | #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) | 204 | #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) |
205 | #define REQ_FAILED (1 << __REQ_FAILED) | 205 | #define REQ_FAILED (1 << __REQ_FAILED) |
206 | #define REQ_QUIET (1 << __REQ_QUIET) | 206 | #define REQ_QUIET (1 << __REQ_QUIET) |
207 | #define REQ_PREEMPT (1 << __REQ_PREEMPT) | 207 | #define REQ_PREEMPT (1 << __REQ_PREEMPT) |
208 | #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) | 208 | #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) |
209 | #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) | 209 | #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) |
210 | #define REQ_ALLOCED (1 << __REQ_ALLOCED) | 210 | #define REQ_ALLOCED (1 << __REQ_ALLOCED) |
211 | #define REQ_RW_META (1 << __REQ_RW_META) | 211 | #define REQ_RW_META (1 << __REQ_RW_META) |
212 | 212 | ||
213 | #define BLK_MAX_CDB 16 | 213 | #define BLK_MAX_CDB 16 |
214 | 214 | ||
215 | /* | 215 | /* |
216 | * try to put the fields that are referenced together in the same cacheline | 216 | * try to put the fields that are referenced together in the same cacheline |
217 | */ | 217 | */ |
218 | struct request { | 218 | struct request { |
219 | struct list_head queuelist; | 219 | struct list_head queuelist; |
220 | struct list_head donelist; | 220 | struct list_head donelist; |
221 | 221 | ||
222 | struct request_queue *q; | 222 | struct request_queue *q; |
223 | 223 | ||
224 | unsigned int cmd_flags; | 224 | unsigned int cmd_flags; |
225 | enum rq_cmd_type_bits cmd_type; | 225 | enum rq_cmd_type_bits cmd_type; |
226 | 226 | ||
227 | /* Maintain bio traversal state for part by part I/O submission. | 227 | /* Maintain bio traversal state for part by part I/O submission. |
228 | * hard_* are block layer internals, no driver should touch them! | 228 | * hard_* are block layer internals, no driver should touch them! |
229 | */ | 229 | */ |
230 | 230 | ||
231 | sector_t sector; /* next sector to submit */ | 231 | sector_t sector; /* next sector to submit */ |
232 | sector_t hard_sector; /* next sector to complete */ | 232 | sector_t hard_sector; /* next sector to complete */ |
233 | unsigned long nr_sectors; /* no. of sectors left to submit */ | 233 | unsigned long nr_sectors; /* no. of sectors left to submit */ |
234 | unsigned long hard_nr_sectors; /* no. of sectors left to complete */ | 234 | unsigned long hard_nr_sectors; /* no. of sectors left to complete */ |
235 | /* no. of sectors left to submit in the current segment */ | 235 | /* no. of sectors left to submit in the current segment */ |
236 | unsigned int current_nr_sectors; | 236 | unsigned int current_nr_sectors; |
237 | 237 | ||
238 | /* no. of sectors left to complete in the current segment */ | 238 | /* no. of sectors left to complete in the current segment */ |
239 | unsigned int hard_cur_sectors; | 239 | unsigned int hard_cur_sectors; |
240 | 240 | ||
241 | struct bio *bio; | 241 | struct bio *bio; |
242 | struct bio *biotail; | 242 | struct bio *biotail; |
243 | 243 | ||
244 | struct hlist_node hash; /* merge hash */ | 244 | struct hlist_node hash; /* merge hash */ |
245 | /* | 245 | /* |
246 | * The rb_node is only used inside the io scheduler, requests | 246 | * The rb_node is only used inside the io scheduler, requests |
247 | * are pruned when moved to the dispatch queue. So let the | 247 | * are pruned when moved to the dispatch queue. So let the |
248 | * completion_data share space with the rb_node. | 248 | * completion_data share space with the rb_node. |
249 | */ | 249 | */ |
250 | union { | 250 | union { |
251 | struct rb_node rb_node; /* sort/lookup */ | 251 | struct rb_node rb_node; /* sort/lookup */ |
252 | void *completion_data; | 252 | void *completion_data; |
253 | }; | 253 | }; |
254 | 254 | ||
255 | /* | 255 | /* |
256 | * two pointers are available for the IO schedulers, if they need | 256 | * two pointers are available for the IO schedulers, if they need |
257 | * more they have to dynamically allocate it. | 257 | * more they have to dynamically allocate it. |
258 | */ | 258 | */ |
259 | void *elevator_private; | 259 | void *elevator_private; |
260 | void *elevator_private2; | 260 | void *elevator_private2; |
261 | 261 | ||
262 | struct gendisk *rq_disk; | 262 | struct gendisk *rq_disk; |
263 | unsigned long start_time; | 263 | unsigned long start_time; |
264 | 264 | ||
265 | /* Number of scatter-gather DMA addr+len pairs after | 265 | /* Number of scatter-gather DMA addr+len pairs after |
266 | * physical address coalescing is performed. | 266 | * physical address coalescing is performed. |
267 | */ | 267 | */ |
268 | unsigned short nr_phys_segments; | 268 | unsigned short nr_phys_segments; |
269 | 269 | ||
270 | /* Number of scatter-gather addr+len pairs after | 270 | /* Number of scatter-gather addr+len pairs after |
271 | * physical and DMA remapping hardware coalescing is performed. | 271 | * physical and DMA remapping hardware coalescing is performed. |
272 | * This is the number of scatter-gather entries the driver | 272 | * This is the number of scatter-gather entries the driver |
273 | * will actually have to deal with after DMA mapping is done. | 273 | * will actually have to deal with after DMA mapping is done. |
274 | */ | 274 | */ |
275 | unsigned short nr_hw_segments; | 275 | unsigned short nr_hw_segments; |
276 | 276 | ||
277 | unsigned short ioprio; | 277 | unsigned short ioprio; |
278 | 278 | ||
279 | void *special; | 279 | void *special; |
280 | char *buffer; | 280 | char *buffer; |
281 | 281 | ||
282 | int tag; | 282 | int tag; |
283 | int errors; | 283 | int errors; |
284 | 284 | ||
285 | int ref_count; | 285 | int ref_count; |
286 | 286 | ||
287 | /* | 287 | /* |
288 | * when request is used as a packet command carrier | 288 | * when request is used as a packet command carrier |
289 | */ | 289 | */ |
290 | unsigned int cmd_len; | 290 | unsigned int cmd_len; |
291 | unsigned char cmd[BLK_MAX_CDB]; | 291 | unsigned char cmd[BLK_MAX_CDB]; |
292 | 292 | ||
293 | unsigned int data_len; | 293 | unsigned int data_len; |
294 | unsigned int sense_len; | 294 | unsigned int sense_len; |
295 | void *data; | 295 | void *data; |
296 | void *sense; | 296 | void *sense; |
297 | 297 | ||
298 | unsigned int timeout; | 298 | unsigned int timeout; |
299 | int retries; | 299 | int retries; |
300 | 300 | ||
301 | /* | 301 | /* |
302 | * completion callback. | 302 | * completion callback. |
303 | */ | 303 | */ |
304 | rq_end_io_fn *end_io; | 304 | rq_end_io_fn *end_io; |
305 | void *end_io_data; | 305 | void *end_io_data; |
306 | 306 | ||
307 | /* for bidi */ | 307 | /* for bidi */ |
308 | struct request *next_rq; | 308 | struct request *next_rq; |
309 | }; | 309 | }; |
310 | 310 | ||
311 | /* | 311 | /* |
312 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME | 312 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME |
313 | * requests. Some step values could eventually be made generic. | 313 | * requests. Some step values could eventually be made generic. |
314 | */ | 314 | */ |
315 | struct request_pm_state | 315 | struct request_pm_state |
316 | { | 316 | { |
317 | /* PM state machine step value, currently driver specific */ | 317 | /* PM state machine step value, currently driver specific */ |
318 | int pm_step; | 318 | int pm_step; |
319 | /* requested PM state value (S1, S2, S3, S4, ...) */ | 319 | /* requested PM state value (S1, S2, S3, S4, ...) */ |
320 | u32 pm_state; | 320 | u32 pm_state; |
321 | void* data; /* for driver use */ | 321 | void* data; /* for driver use */ |
322 | }; | 322 | }; |
323 | 323 | ||
324 | #include <linux/elevator.h> | 324 | #include <linux/elevator.h> |
325 | 325 | ||
326 | typedef void (request_fn_proc) (struct request_queue *q); | 326 | typedef void (request_fn_proc) (struct request_queue *q); |
327 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); | 327 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); |
328 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); | 328 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); |
329 | typedef void (unplug_fn) (struct request_queue *); | 329 | typedef void (unplug_fn) (struct request_queue *); |
330 | 330 | ||
331 | struct bio_vec; | 331 | struct bio_vec; |
332 | typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); | 332 | typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); |
333 | typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, sector_t *); | 333 | typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, sector_t *); |
334 | typedef void (prepare_flush_fn) (struct request_queue *, struct request *); | 334 | typedef void (prepare_flush_fn) (struct request_queue *, struct request *); |
335 | typedef void (softirq_done_fn)(struct request *); | 335 | typedef void (softirq_done_fn)(struct request *); |
336 | 336 | ||
337 | enum blk_queue_state { | 337 | enum blk_queue_state { |
338 | Queue_down, | 338 | Queue_down, |
339 | Queue_up, | 339 | Queue_up, |
340 | }; | 340 | }; |
341 | 341 | ||
342 | struct blk_queue_tag { | 342 | struct blk_queue_tag { |
343 | struct request **tag_index; /* map of busy tags */ | 343 | struct request **tag_index; /* map of busy tags */ |
344 | unsigned long *tag_map; /* bit map of free/busy tags */ | 344 | unsigned long *tag_map; /* bit map of free/busy tags */ |
345 | struct list_head busy_list; /* fifo list of busy tags */ | 345 | struct list_head busy_list; /* fifo list of busy tags */ |
346 | int busy; /* current depth */ | 346 | int busy; /* current depth */ |
347 | int max_depth; /* what we will send to device */ | 347 | int max_depth; /* what we will send to device */ |
348 | int real_max_depth; /* what the array can hold */ | 348 | int real_max_depth; /* what the array can hold */ |
349 | atomic_t refcnt; /* map can be shared */ | 349 | atomic_t refcnt; /* map can be shared */ |
350 | }; | 350 | }; |
351 | 351 | ||
352 | struct request_queue | 352 | struct request_queue |
353 | { | 353 | { |
354 | /* | 354 | /* |
355 | * Together with queue_head for cacheline sharing | 355 | * Together with queue_head for cacheline sharing |
356 | */ | 356 | */ |
357 | struct list_head queue_head; | 357 | struct list_head queue_head; |
358 | struct request *last_merge; | 358 | struct request *last_merge; |
359 | elevator_t *elevator; | 359 | elevator_t *elevator; |
360 | 360 | ||
361 | /* | 361 | /* |
362 | * the queue request freelist, one for reads and one for writes | 362 | * the queue request freelist, one for reads and one for writes |
363 | */ | 363 | */ |
364 | struct request_list rq; | 364 | struct request_list rq; |
365 | 365 | ||
366 | request_fn_proc *request_fn; | 366 | request_fn_proc *request_fn; |
367 | make_request_fn *make_request_fn; | 367 | make_request_fn *make_request_fn; |
368 | prep_rq_fn *prep_rq_fn; | 368 | prep_rq_fn *prep_rq_fn; |
369 | unplug_fn *unplug_fn; | 369 | unplug_fn *unplug_fn; |
370 | merge_bvec_fn *merge_bvec_fn; | 370 | merge_bvec_fn *merge_bvec_fn; |
371 | issue_flush_fn *issue_flush_fn; | 371 | issue_flush_fn *issue_flush_fn; |
372 | prepare_flush_fn *prepare_flush_fn; | 372 | prepare_flush_fn *prepare_flush_fn; |
373 | softirq_done_fn *softirq_done_fn; | 373 | softirq_done_fn *softirq_done_fn; |
374 | 374 | ||
375 | /* | 375 | /* |
376 | * Dispatch queue sorting | 376 | * Dispatch queue sorting |
377 | */ | 377 | */ |
378 | sector_t end_sector; | 378 | sector_t end_sector; |
379 | struct request *boundary_rq; | 379 | struct request *boundary_rq; |
380 | 380 | ||
381 | /* | 381 | /* |
382 | * Auto-unplugging state | 382 | * Auto-unplugging state |
383 | */ | 383 | */ |
384 | struct timer_list unplug_timer; | 384 | struct timer_list unplug_timer; |
385 | int unplug_thresh; /* After this many requests */ | 385 | int unplug_thresh; /* After this many requests */ |
386 | unsigned long unplug_delay; /* After this many jiffies */ | 386 | unsigned long unplug_delay; /* After this many jiffies */ |
387 | struct work_struct unplug_work; | 387 | struct work_struct unplug_work; |
388 | 388 | ||
389 | struct backing_dev_info backing_dev_info; | 389 | struct backing_dev_info backing_dev_info; |
390 | 390 | ||
391 | /* | 391 | /* |
392 | * The queue owner gets to use this for whatever they like. | 392 | * The queue owner gets to use this for whatever they like. |
393 | * ll_rw_blk doesn't touch it. | 393 | * ll_rw_blk doesn't touch it. |
394 | */ | 394 | */ |
395 | void *queuedata; | 395 | void *queuedata; |
396 | 396 | ||
397 | /* | 397 | /* |
398 | * queue needs bounce pages for pages above this limit | 398 | * queue needs bounce pages for pages above this limit |
399 | */ | 399 | */ |
400 | unsigned long bounce_pfn; | 400 | unsigned long bounce_pfn; |
401 | gfp_t bounce_gfp; | 401 | gfp_t bounce_gfp; |
402 | 402 | ||
403 | /* | 403 | /* |
404 | * various queue flags, see QUEUE_* below | 404 | * various queue flags, see QUEUE_* below |
405 | */ | 405 | */ |
406 | unsigned long queue_flags; | 406 | unsigned long queue_flags; |
407 | 407 | ||
408 | /* | 408 | /* |
409 | * protects queue structures from reentrancy. ->__queue_lock should | 409 | * protects queue structures from reentrancy. ->__queue_lock should |
410 | * _never_ be used directly, it is queue private. always use | 410 | * _never_ be used directly, it is queue private. always use |
411 | * ->queue_lock. | 411 | * ->queue_lock. |
412 | */ | 412 | */ |
413 | spinlock_t __queue_lock; | 413 | spinlock_t __queue_lock; |
414 | spinlock_t *queue_lock; | 414 | spinlock_t *queue_lock; |
415 | 415 | ||
416 | /* | 416 | /* |
417 | * queue kobject | 417 | * queue kobject |
418 | */ | 418 | */ |
419 | struct kobject kobj; | 419 | struct kobject kobj; |
420 | 420 | ||
421 | /* | 421 | /* |
422 | * queue settings | 422 | * queue settings |
423 | */ | 423 | */ |
424 | unsigned long nr_requests; /* Max # of requests */ | 424 | unsigned long nr_requests; /* Max # of requests */ |
425 | unsigned int nr_congestion_on; | 425 | unsigned int nr_congestion_on; |
426 | unsigned int nr_congestion_off; | 426 | unsigned int nr_congestion_off; |
427 | unsigned int nr_batching; | 427 | unsigned int nr_batching; |
428 | 428 | ||
429 | unsigned int max_sectors; | 429 | unsigned int max_sectors; |
430 | unsigned int max_hw_sectors; | 430 | unsigned int max_hw_sectors; |
431 | unsigned short max_phys_segments; | 431 | unsigned short max_phys_segments; |
432 | unsigned short max_hw_segments; | 432 | unsigned short max_hw_segments; |
433 | unsigned short hardsect_size; | 433 | unsigned short hardsect_size; |
434 | unsigned int max_segment_size; | 434 | unsigned int max_segment_size; |
435 | 435 | ||
436 | unsigned long seg_boundary_mask; | 436 | unsigned long seg_boundary_mask; |
437 | unsigned int dma_alignment; | 437 | unsigned int dma_alignment; |
438 | 438 | ||
439 | struct blk_queue_tag *queue_tags; | 439 | struct blk_queue_tag *queue_tags; |
440 | 440 | ||
441 | unsigned int nr_sorted; | 441 | unsigned int nr_sorted; |
442 | unsigned int in_flight; | 442 | unsigned int in_flight; |
443 | 443 | ||
444 | /* | 444 | /* |
445 | * sg stuff | 445 | * sg stuff |
446 | */ | 446 | */ |
447 | unsigned int sg_timeout; | 447 | unsigned int sg_timeout; |
448 | unsigned int sg_reserved_size; | 448 | unsigned int sg_reserved_size; |
449 | int node; | 449 | int node; |
450 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 450 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
451 | struct blk_trace *blk_trace; | 451 | struct blk_trace *blk_trace; |
452 | #endif | 452 | #endif |
453 | /* | 453 | /* |
454 | * reserved for flush operations | 454 | * reserved for flush operations |
455 | */ | 455 | */ |
456 | unsigned int ordered, next_ordered, ordseq; | 456 | unsigned int ordered, next_ordered, ordseq; |
457 | int orderr, ordcolor; | 457 | int orderr, ordcolor; |
458 | struct request pre_flush_rq, bar_rq, post_flush_rq; | 458 | struct request pre_flush_rq, bar_rq, post_flush_rq; |
459 | struct request *orig_bar_rq; | 459 | struct request *orig_bar_rq; |
460 | 460 | ||
461 | struct mutex sysfs_lock; | 461 | struct mutex sysfs_lock; |
462 | 462 | ||
463 | #if defined(CONFIG_BLK_DEV_BSG) | 463 | #if defined(CONFIG_BLK_DEV_BSG) |
464 | struct bsg_class_device bsg_dev; | 464 | struct bsg_class_device bsg_dev; |
465 | #endif | 465 | #endif |
466 | }; | 466 | }; |
467 | 467 | ||
468 | #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ | 468 | #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ |
469 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 469 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |
470 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ | 470 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ |
471 | #define QUEUE_FLAG_READFULL 3 /* read queue has been filled */ | 471 | #define QUEUE_FLAG_READFULL 3 /* read queue has been filled */ |
472 | #define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */ | 472 | #define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */ |
473 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ | 473 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ |
474 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ | 474 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ |
475 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ | 475 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ |
476 | #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ | 476 | #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ |
477 | #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ | 477 | #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ |
478 | 478 | ||
479 | enum { | 479 | enum { |
480 | /* | 480 | /* |
481 | * Hardbarrier is supported with one of the following methods. | 481 | * Hardbarrier is supported with one of the following methods. |
482 | * | 482 | * |
483 | * NONE : hardbarrier unsupported | 483 | * NONE : hardbarrier unsupported |
484 | * DRAIN : ordering by draining is enough | 484 | * DRAIN : ordering by draining is enough |
485 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes | 485 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes |
486 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write | 486 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write |
487 | * TAG : ordering by tag is enough | 487 | * TAG : ordering by tag is enough |
488 | * TAG_FLUSH : ordering by tag w/ pre and post flushes | 488 | * TAG_FLUSH : ordering by tag w/ pre and post flushes |
489 | * TAG_FUA : ordering by tag w/ pre flush and FUA write | 489 | * TAG_FUA : ordering by tag w/ pre flush and FUA write |
490 | */ | 490 | */ |
491 | QUEUE_ORDERED_NONE = 0x00, | 491 | QUEUE_ORDERED_NONE = 0x00, |
492 | QUEUE_ORDERED_DRAIN = 0x01, | 492 | QUEUE_ORDERED_DRAIN = 0x01, |
493 | QUEUE_ORDERED_TAG = 0x02, | 493 | QUEUE_ORDERED_TAG = 0x02, |
494 | 494 | ||
495 | QUEUE_ORDERED_PREFLUSH = 0x10, | 495 | QUEUE_ORDERED_PREFLUSH = 0x10, |
496 | QUEUE_ORDERED_POSTFLUSH = 0x20, | 496 | QUEUE_ORDERED_POSTFLUSH = 0x20, |
497 | QUEUE_ORDERED_FUA = 0x40, | 497 | QUEUE_ORDERED_FUA = 0x40, |
498 | 498 | ||
499 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | | 499 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | |
500 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, | 500 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, |
501 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | | 501 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | |
502 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, | 502 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, |
503 | QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | | 503 | QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | |
504 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, | 504 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, |
505 | QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | | 505 | QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | |
506 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, | 506 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, |
507 | 507 | ||
508 | /* | 508 | /* |
509 | * Ordered operation sequence | 509 | * Ordered operation sequence |
510 | */ | 510 | */ |
511 | QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ | 511 | QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ |
512 | QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ | 512 | QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ |
513 | QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ | 513 | QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ |
514 | QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ | 514 | QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ |
515 | QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ | 515 | QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ |
516 | QUEUE_ORDSEQ_DONE = 0x20, | 516 | QUEUE_ORDSEQ_DONE = 0x20, |
517 | }; | 517 | }; |
518 | 518 | ||
519 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) | 519 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) |
520 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) | 520 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) |
521 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) | 521 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) |
522 | #define blk_queue_flushing(q) ((q)->ordseq) | 522 | #define blk_queue_flushing(q) ((q)->ordseq) |
523 | 523 | ||
524 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) | 524 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) |
525 | #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) | 525 | #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) |
526 | #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) | 526 | #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) |
527 | #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) | 527 | #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) |
528 | 528 | ||
529 | #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) | 529 | #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) |
530 | #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) | 530 | #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) |
531 | 531 | ||
532 | #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) | 532 | #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) |
533 | 533 | ||
534 | #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) | 534 | #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) |
535 | #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) | 535 | #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) |
536 | #define blk_pm_request(rq) \ | 536 | #define blk_pm_request(rq) \ |
537 | (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) | 537 | (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) |
538 | 538 | ||
539 | #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) | 539 | #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) |
540 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) | 540 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) |
541 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) | 541 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) |
542 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) | 542 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) |
543 | 543 | ||
544 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) | 544 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) |
545 | 545 | ||
546 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) | 546 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) |
547 | 547 | ||
548 | /* | 548 | /* |
549 | * We regard a request as sync, if it's a READ or a SYNC write. | 549 | * We regard a request as sync, if it's a READ or a SYNC write. |
550 | */ | 550 | */ |
551 | #define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) | 551 | #define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) |
552 | #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) | 552 | #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) |
553 | 553 | ||
554 | static inline int blk_queue_full(struct request_queue *q, int rw) | 554 | static inline int blk_queue_full(struct request_queue *q, int rw) |
555 | { | 555 | { |
556 | if (rw == READ) | 556 | if (rw == READ) |
557 | return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); | 557 | return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); |
558 | return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); | 558 | return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); |
559 | } | 559 | } |
560 | 560 | ||
561 | static inline void blk_set_queue_full(struct request_queue *q, int rw) | 561 | static inline void blk_set_queue_full(struct request_queue *q, int rw) |
562 | { | 562 | { |
563 | if (rw == READ) | 563 | if (rw == READ) |
564 | set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); | 564 | set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); |
565 | else | 565 | else |
566 | set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); | 566 | set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); |
567 | } | 567 | } |
568 | 568 | ||
569 | static inline void blk_clear_queue_full(struct request_queue *q, int rw) | 569 | static inline void blk_clear_queue_full(struct request_queue *q, int rw) |
570 | { | 570 | { |
571 | if (rw == READ) | 571 | if (rw == READ) |
572 | clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); | 572 | clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); |
573 | else | 573 | else |
574 | clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); | 574 | clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); |
575 | } | 575 | } |
576 | 576 | ||
577 | 577 | ||
578 | /* | 578 | /* |
579 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may | 579 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may |
580 | * it already be started by driver. | 580 | * it already be started by driver. |
581 | */ | 581 | */ |
582 | #define RQ_NOMERGE_FLAGS \ | 582 | #define RQ_NOMERGE_FLAGS \ |
583 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) | 583 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) |
584 | #define rq_mergeable(rq) \ | 584 | #define rq_mergeable(rq) \ |
585 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) | 585 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) |
586 | 586 | ||
587 | /* | 587 | /* |
588 | * q->prep_rq_fn return values | 588 | * q->prep_rq_fn return values |
589 | */ | 589 | */ |
590 | #define BLKPREP_OK 0 /* serve it */ | 590 | #define BLKPREP_OK 0 /* serve it */ |
591 | #define BLKPREP_KILL 1 /* fatal error, kill */ | 591 | #define BLKPREP_KILL 1 /* fatal error, kill */ |
592 | #define BLKPREP_DEFER 2 /* leave on queue */ | 592 | #define BLKPREP_DEFER 2 /* leave on queue */ |
593 | 593 | ||
594 | extern unsigned long blk_max_low_pfn, blk_max_pfn; | 594 | extern unsigned long blk_max_low_pfn, blk_max_pfn; |
595 | 595 | ||
596 | /* | 596 | /* |
597 | * standard bounce addresses: | 597 | * standard bounce addresses: |
598 | * | 598 | * |
599 | * BLK_BOUNCE_HIGH : bounce all highmem pages | 599 | * BLK_BOUNCE_HIGH : bounce all highmem pages |
600 | * BLK_BOUNCE_ANY : don't bounce anything | 600 | * BLK_BOUNCE_ANY : don't bounce anything |
601 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary | 601 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary |
602 | */ | 602 | */ |
603 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) | 603 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) |
604 | #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) | 604 | #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) |
605 | #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) | 605 | #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) |
606 | 606 | ||
607 | /* | 607 | /* |
608 | * default timeout for SG_IO if none specified | 608 | * default timeout for SG_IO if none specified |
609 | */ | 609 | */ |
610 | #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) | 610 | #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) |
611 | 611 | ||
612 | #ifdef CONFIG_BOUNCE | 612 | #ifdef CONFIG_BOUNCE |
613 | extern int init_emergency_isa_pool(void); | 613 | extern int init_emergency_isa_pool(void); |
614 | extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); | 614 | extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); |
615 | #else | 615 | #else |
616 | static inline int init_emergency_isa_pool(void) | 616 | static inline int init_emergency_isa_pool(void) |
617 | { | 617 | { |
618 | return 0; | 618 | return 0; |
619 | } | 619 | } |
620 | static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) | 620 | static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) |
621 | { | 621 | { |
622 | } | 622 | } |
623 | #endif /* CONFIG_MMU */ | 623 | #endif /* CONFIG_MMU */ |
624 | 624 | ||
625 | struct req_iterator { | 625 | struct req_iterator { |
626 | int i; | 626 | int i; |
627 | struct bio *bio; | 627 | struct bio *bio; |
628 | }; | 628 | }; |
629 | 629 | ||
630 | /* This should not be used directly - use rq_for_each_segment */ | 630 | /* This should not be used directly - use rq_for_each_segment */ |
631 | #define __rq_for_each_bio(_bio, rq) \ | 631 | #define __rq_for_each_bio(_bio, rq) \ |
632 | if ((rq->bio)) \ | 632 | if ((rq->bio)) \ |
633 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) | 633 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) |
634 | 634 | ||
635 | #define rq_for_each_segment(bvl, _rq, _iter) \ | 635 | #define rq_for_each_segment(bvl, _rq, _iter) \ |
636 | __rq_for_each_bio(_iter.bio, _rq) \ | 636 | __rq_for_each_bio(_iter.bio, _rq) \ |
637 | bio_for_each_segment(bvl, _iter.bio, _iter.i) | 637 | bio_for_each_segment(bvl, _iter.bio, _iter.i) |
638 | 638 | ||
639 | #define rq_iter_last(rq, _iter) \ | 639 | #define rq_iter_last(rq, _iter) \ |
640 | (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) | 640 | (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) |
641 | 641 | ||
642 | extern int blk_register_queue(struct gendisk *disk); | 642 | extern int blk_register_queue(struct gendisk *disk); |
643 | extern void blk_unregister_queue(struct gendisk *disk); | 643 | extern void blk_unregister_queue(struct gendisk *disk); |
644 | extern void register_disk(struct gendisk *dev); | 644 | extern void register_disk(struct gendisk *dev); |
645 | extern void generic_make_request(struct bio *bio); | 645 | extern void generic_make_request(struct bio *bio); |
646 | extern void blk_put_request(struct request *); | 646 | extern void blk_put_request(struct request *); |
647 | extern void __blk_put_request(struct request_queue *, struct request *); | 647 | extern void __blk_put_request(struct request_queue *, struct request *); |
648 | extern void blk_end_sync_rq(struct request *rq, int error); | 648 | extern void blk_end_sync_rq(struct request *rq, int error); |
649 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); | 649 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); |
650 | extern void blk_insert_request(struct request_queue *, struct request *, int, void *); | 650 | extern void blk_insert_request(struct request_queue *, struct request *, int, void *); |
651 | extern void blk_requeue_request(struct request_queue *, struct request *); | 651 | extern void blk_requeue_request(struct request_queue *, struct request *); |
652 | extern void blk_plug_device(struct request_queue *); | 652 | extern void blk_plug_device(struct request_queue *); |
653 | extern int blk_remove_plug(struct request_queue *); | 653 | extern int blk_remove_plug(struct request_queue *); |
654 | extern void blk_recount_segments(struct request_queue *, struct bio *); | 654 | extern void blk_recount_segments(struct request_queue *, struct bio *); |
655 | extern int scsi_cmd_ioctl(struct file *, struct request_queue *, | 655 | extern int scsi_cmd_ioctl(struct file *, struct request_queue *, |
656 | struct gendisk *, unsigned int, void __user *); | 656 | struct gendisk *, unsigned int, void __user *); |
657 | extern int sg_scsi_ioctl(struct file *, struct request_queue *, | 657 | extern int sg_scsi_ioctl(struct file *, struct request_queue *, |
658 | struct gendisk *, struct scsi_ioctl_command __user *); | 658 | struct gendisk *, struct scsi_ioctl_command __user *); |
659 | 659 | ||
660 | /* | 660 | /* |
661 | * Temporary export, until SCSI gets fixed up. | 661 | * Temporary export, until SCSI gets fixed up. |
662 | */ | 662 | */ |
663 | extern int blk_rq_append_bio(struct request_queue *q, struct request *rq, | 663 | extern int blk_rq_append_bio(struct request_queue *q, struct request *rq, |
664 | struct bio *bio); | 664 | struct bio *bio); |
665 | 665 | ||
666 | /* | 666 | /* |
667 | * A queue has just exitted congestion. Note this in the global counter of | 667 | * A queue has just exitted congestion. Note this in the global counter of |
668 | * congested queues, and wake up anyone who was waiting for requests to be | 668 | * congested queues, and wake up anyone who was waiting for requests to be |
669 | * put back. | 669 | * put back. |
670 | */ | 670 | */ |
671 | static inline void blk_clear_queue_congested(struct request_queue *q, int rw) | 671 | static inline void blk_clear_queue_congested(struct request_queue *q, int rw) |
672 | { | 672 | { |
673 | clear_bdi_congested(&q->backing_dev_info, rw); | 673 | clear_bdi_congested(&q->backing_dev_info, rw); |
674 | } | 674 | } |
675 | 675 | ||
676 | /* | 676 | /* |
677 | * A queue has just entered congestion. Flag that in the queue's VM-visible | 677 | * A queue has just entered congestion. Flag that in the queue's VM-visible |
678 | * state flags and increment the global gounter of congested queues. | 678 | * state flags and increment the global gounter of congested queues. |
679 | */ | 679 | */ |
680 | static inline void blk_set_queue_congested(struct request_queue *q, int rw) | 680 | static inline void blk_set_queue_congested(struct request_queue *q, int rw) |
681 | { | 681 | { |
682 | set_bdi_congested(&q->backing_dev_info, rw); | 682 | set_bdi_congested(&q->backing_dev_info, rw); |
683 | } | 683 | } |
684 | 684 | ||
685 | extern void blk_start_queue(struct request_queue *q); | 685 | extern void blk_start_queue(struct request_queue *q); |
686 | extern void blk_stop_queue(struct request_queue *q); | 686 | extern void blk_stop_queue(struct request_queue *q); |
687 | extern void blk_sync_queue(struct request_queue *q); | 687 | extern void blk_sync_queue(struct request_queue *q); |
688 | extern void __blk_stop_queue(struct request_queue *q); | 688 | extern void __blk_stop_queue(struct request_queue *q); |
689 | extern void blk_run_queue(struct request_queue *); | 689 | extern void blk_run_queue(struct request_queue *); |
690 | extern void blk_start_queueing(struct request_queue *); | 690 | extern void blk_start_queueing(struct request_queue *); |
691 | extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); | 691 | extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); |
692 | extern int blk_rq_unmap_user(struct bio *); | 692 | extern int blk_rq_unmap_user(struct bio *); |
693 | extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); | 693 | extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); |
694 | extern int blk_rq_map_user_iov(struct request_queue *, struct request *, | 694 | extern int blk_rq_map_user_iov(struct request_queue *, struct request *, |
695 | struct sg_iovec *, int, unsigned int); | 695 | struct sg_iovec *, int, unsigned int); |
696 | extern int blk_execute_rq(struct request_queue *, struct gendisk *, | 696 | extern int blk_execute_rq(struct request_queue *, struct gendisk *, |
697 | struct request *, int); | 697 | struct request *, int); |
698 | extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, | 698 | extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, |
699 | struct request *, int, rq_end_io_fn *); | 699 | struct request *, int, rq_end_io_fn *); |
700 | extern int blk_verify_command(unsigned char *, int); | 700 | extern int blk_verify_command(unsigned char *, int); |
701 | 701 | ||
702 | static inline struct request_queue *bdev_get_queue(struct block_device *bdev) | 702 | static inline struct request_queue *bdev_get_queue(struct block_device *bdev) |
703 | { | 703 | { |
704 | return bdev->bd_disk->queue; | 704 | return bdev->bd_disk->queue; |
705 | } | 705 | } |
706 | 706 | ||
707 | static inline void blk_run_backing_dev(struct backing_dev_info *bdi, | 707 | static inline void blk_run_backing_dev(struct backing_dev_info *bdi, |
708 | struct page *page) | 708 | struct page *page) |
709 | { | 709 | { |
710 | if (bdi && bdi->unplug_io_fn) | 710 | if (bdi && bdi->unplug_io_fn) |
711 | bdi->unplug_io_fn(bdi, page); | 711 | bdi->unplug_io_fn(bdi, page); |
712 | } | 712 | } |
713 | 713 | ||
714 | static inline void blk_run_address_space(struct address_space *mapping) | 714 | static inline void blk_run_address_space(struct address_space *mapping) |
715 | { | 715 | { |
716 | if (mapping) | 716 | if (mapping) |
717 | blk_run_backing_dev(mapping->backing_dev_info, NULL); | 717 | blk_run_backing_dev(mapping->backing_dev_info, NULL); |
718 | } | 718 | } |
719 | 719 | ||
720 | /* | 720 | /* |
721 | * end_request() and friends. Must be called with the request queue spinlock | 721 | * end_request() and friends. Must be called with the request queue spinlock |
722 | * acquired. All functions called within end_request() _must_be_ atomic. | 722 | * acquired. All functions called within end_request() _must_be_ atomic. |
723 | * | 723 | * |
724 | * Several drivers define their own end_request and call | 724 | * Several drivers define their own end_request and call |
725 | * end_that_request_first() and end_that_request_last() | 725 | * end_that_request_first() and end_that_request_last() |
726 | * for parts of the original function. This prevents | 726 | * for parts of the original function. This prevents |
727 | * code duplication in drivers. | 727 | * code duplication in drivers. |
728 | */ | 728 | */ |
729 | extern int end_that_request_first(struct request *, int, int); | 729 | extern int end_that_request_first(struct request *, int, int); |
730 | extern int end_that_request_chunk(struct request *, int, int); | 730 | extern int end_that_request_chunk(struct request *, int, int); |
731 | extern void end_that_request_last(struct request *, int); | 731 | extern void end_that_request_last(struct request *, int); |
732 | extern void end_request(struct request *req, int uptodate); | 732 | extern void end_request(struct request *, int); |
733 | extern void end_queued_request(struct request *, int); | ||
734 | extern void end_dequeued_request(struct request *, int); | ||
733 | extern void blk_complete_request(struct request *); | 735 | extern void blk_complete_request(struct request *); |
734 | 736 | ||
735 | /* | 737 | /* |
736 | * end_that_request_first/chunk() takes an uptodate argument. we account | 738 | * end_that_request_first/chunk() takes an uptodate argument. we account |
737 | * any value <= as an io error. 0 means -EIO for compatability reasons, | 739 | * any value <= as an io error. 0 means -EIO for compatability reasons, |
738 | * any other < 0 value is the direct error type. An uptodate value of | 740 | * any other < 0 value is the direct error type. An uptodate value of |
739 | * 1 indicates successful io completion | 741 | * 1 indicates successful io completion |
740 | */ | 742 | */ |
741 | #define end_io_error(uptodate) (unlikely((uptodate) <= 0)) | 743 | #define end_io_error(uptodate) (unlikely((uptodate) <= 0)) |
742 | 744 | ||
743 | static inline void blkdev_dequeue_request(struct request *req) | 745 | static inline void blkdev_dequeue_request(struct request *req) |
744 | { | 746 | { |
745 | elv_dequeue_request(req->q, req); | 747 | elv_dequeue_request(req->q, req); |
746 | } | 748 | } |
747 | 749 | ||
748 | /* | 750 | /* |
749 | * Access functions for manipulating queue properties | 751 | * Access functions for manipulating queue properties |
750 | */ | 752 | */ |
751 | extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, | 753 | extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, |
752 | spinlock_t *lock, int node_id); | 754 | spinlock_t *lock, int node_id); |
753 | extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); | 755 | extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); |
754 | extern void blk_cleanup_queue(struct request_queue *); | 756 | extern void blk_cleanup_queue(struct request_queue *); |
755 | extern void blk_queue_make_request(struct request_queue *, make_request_fn *); | 757 | extern void blk_queue_make_request(struct request_queue *, make_request_fn *); |
756 | extern void blk_queue_bounce_limit(struct request_queue *, u64); | 758 | extern void blk_queue_bounce_limit(struct request_queue *, u64); |
757 | extern void blk_queue_max_sectors(struct request_queue *, unsigned int); | 759 | extern void blk_queue_max_sectors(struct request_queue *, unsigned int); |
758 | extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); | 760 | extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); |
759 | extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); | 761 | extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); |
760 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); | 762 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); |
761 | extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); | 763 | extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); |
762 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); | 764 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); |
763 | extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); | 765 | extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); |
764 | extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); | 766 | extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); |
765 | extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); | 767 | extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); |
766 | extern void blk_queue_dma_alignment(struct request_queue *, int); | 768 | extern void blk_queue_dma_alignment(struct request_queue *, int); |
767 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); | 769 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); |
768 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); | 770 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); |
769 | extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); | 771 | extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); |
770 | extern void blk_queue_issue_flush_fn(struct request_queue *, issue_flush_fn *); | 772 | extern void blk_queue_issue_flush_fn(struct request_queue *, issue_flush_fn *); |
771 | extern int blk_do_ordered(struct request_queue *, struct request **); | 773 | extern int blk_do_ordered(struct request_queue *, struct request **); |
772 | extern unsigned blk_ordered_cur_seq(struct request_queue *); | 774 | extern unsigned blk_ordered_cur_seq(struct request_queue *); |
773 | extern unsigned blk_ordered_req_seq(struct request *); | 775 | extern unsigned blk_ordered_req_seq(struct request *); |
774 | extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); | 776 | extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); |
775 | 777 | ||
776 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); | 778 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); |
777 | extern void blk_dump_rq_flags(struct request *, char *); | 779 | extern void blk_dump_rq_flags(struct request *, char *); |
778 | extern void generic_unplug_device(struct request_queue *); | 780 | extern void generic_unplug_device(struct request_queue *); |
779 | extern void __generic_unplug_device(struct request_queue *); | 781 | extern void __generic_unplug_device(struct request_queue *); |
780 | extern long nr_blockdev_pages(void); | 782 | extern long nr_blockdev_pages(void); |
781 | 783 | ||
782 | int blk_get_queue(struct request_queue *); | 784 | int blk_get_queue(struct request_queue *); |
783 | struct request_queue *blk_alloc_queue(gfp_t); | 785 | struct request_queue *blk_alloc_queue(gfp_t); |
784 | struct request_queue *blk_alloc_queue_node(gfp_t, int); | 786 | struct request_queue *blk_alloc_queue_node(gfp_t, int); |
785 | extern void blk_put_queue(struct request_queue *); | 787 | extern void blk_put_queue(struct request_queue *); |
786 | 788 | ||
787 | /* | 789 | /* |
788 | * tag stuff | 790 | * tag stuff |
789 | */ | 791 | */ |
790 | #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) | 792 | #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) |
791 | #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) | 793 | #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) |
792 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) | 794 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) |
793 | extern int blk_queue_start_tag(struct request_queue *, struct request *); | 795 | extern int blk_queue_start_tag(struct request_queue *, struct request *); |
794 | extern struct request *blk_queue_find_tag(struct request_queue *, int); | 796 | extern struct request *blk_queue_find_tag(struct request_queue *, int); |
795 | extern void blk_queue_end_tag(struct request_queue *, struct request *); | 797 | extern void blk_queue_end_tag(struct request_queue *, struct request *); |
796 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); | 798 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); |
797 | extern void blk_queue_free_tags(struct request_queue *); | 799 | extern void blk_queue_free_tags(struct request_queue *); |
798 | extern int blk_queue_resize_tags(struct request_queue *, int); | 800 | extern int blk_queue_resize_tags(struct request_queue *, int); |
799 | extern void blk_queue_invalidate_tags(struct request_queue *); | 801 | extern void blk_queue_invalidate_tags(struct request_queue *); |
800 | extern struct blk_queue_tag *blk_init_tags(int); | 802 | extern struct blk_queue_tag *blk_init_tags(int); |
801 | extern void blk_free_tags(struct blk_queue_tag *); | 803 | extern void blk_free_tags(struct blk_queue_tag *); |
802 | 804 | ||
803 | static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, | 805 | static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, |
804 | int tag) | 806 | int tag) |
805 | { | 807 | { |
806 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) | 808 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) |
807 | return NULL; | 809 | return NULL; |
808 | return bqt->tag_index[tag]; | 810 | return bqt->tag_index[tag]; |
809 | } | 811 | } |
810 | 812 | ||
811 | extern int blkdev_issue_flush(struct block_device *, sector_t *); | 813 | extern int blkdev_issue_flush(struct block_device *, sector_t *); |
812 | 814 | ||
813 | #define MAX_PHYS_SEGMENTS 128 | 815 | #define MAX_PHYS_SEGMENTS 128 |
814 | #define MAX_HW_SEGMENTS 128 | 816 | #define MAX_HW_SEGMENTS 128 |
815 | #define SAFE_MAX_SECTORS 255 | 817 | #define SAFE_MAX_SECTORS 255 |
816 | #define BLK_DEF_MAX_SECTORS 1024 | 818 | #define BLK_DEF_MAX_SECTORS 1024 |
817 | 819 | ||
818 | #define MAX_SEGMENT_SIZE 65536 | 820 | #define MAX_SEGMENT_SIZE 65536 |
819 | 821 | ||
820 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) | 822 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) |
821 | 823 | ||
822 | static inline int queue_hardsect_size(struct request_queue *q) | 824 | static inline int queue_hardsect_size(struct request_queue *q) |
823 | { | 825 | { |
824 | int retval = 512; | 826 | int retval = 512; |
825 | 827 | ||
826 | if (q && q->hardsect_size) | 828 | if (q && q->hardsect_size) |
827 | retval = q->hardsect_size; | 829 | retval = q->hardsect_size; |
828 | 830 | ||
829 | return retval; | 831 | return retval; |
830 | } | 832 | } |
831 | 833 | ||
832 | static inline int bdev_hardsect_size(struct block_device *bdev) | 834 | static inline int bdev_hardsect_size(struct block_device *bdev) |
833 | { | 835 | { |
834 | return queue_hardsect_size(bdev_get_queue(bdev)); | 836 | return queue_hardsect_size(bdev_get_queue(bdev)); |
835 | } | 837 | } |
836 | 838 | ||
837 | static inline int queue_dma_alignment(struct request_queue *q) | 839 | static inline int queue_dma_alignment(struct request_queue *q) |
838 | { | 840 | { |
839 | int retval = 511; | 841 | int retval = 511; |
840 | 842 | ||
841 | if (q && q->dma_alignment) | 843 | if (q && q->dma_alignment) |
842 | retval = q->dma_alignment; | 844 | retval = q->dma_alignment; |
843 | 845 | ||
844 | return retval; | 846 | return retval; |
845 | } | 847 | } |
846 | 848 | ||
847 | /* assumes size > 256 */ | 849 | /* assumes size > 256 */ |
848 | static inline unsigned int blksize_bits(unsigned int size) | 850 | static inline unsigned int blksize_bits(unsigned int size) |
849 | { | 851 | { |
850 | unsigned int bits = 8; | 852 | unsigned int bits = 8; |
851 | do { | 853 | do { |
852 | bits++; | 854 | bits++; |
853 | size >>= 1; | 855 | size >>= 1; |
854 | } while (size > 256); | 856 | } while (size > 256); |
855 | return bits; | 857 | return bits; |
856 | } | 858 | } |
857 | 859 | ||
858 | static inline unsigned int block_size(struct block_device *bdev) | 860 | static inline unsigned int block_size(struct block_device *bdev) |
859 | { | 861 | { |
860 | return bdev->bd_block_size; | 862 | return bdev->bd_block_size; |
861 | } | 863 | } |
862 | 864 | ||
863 | typedef struct {struct page *v;} Sector; | 865 | typedef struct {struct page *v;} Sector; |
864 | 866 | ||
865 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); | 867 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); |
866 | 868 | ||
867 | static inline void put_dev_sector(Sector p) | 869 | static inline void put_dev_sector(Sector p) |
868 | { | 870 | { |
869 | page_cache_release(p.v); | 871 | page_cache_release(p.v); |
870 | } | 872 | } |
871 | 873 | ||
872 | struct work_struct; | 874 | struct work_struct; |
873 | int kblockd_schedule_work(struct work_struct *work); | 875 | int kblockd_schedule_work(struct work_struct *work); |
874 | void kblockd_flush_work(struct work_struct *work); | 876 | void kblockd_flush_work(struct work_struct *work); |
875 | 877 | ||
876 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ | 878 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ |
877 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) | 879 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) |
878 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ | 880 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ |
879 | MODULE_ALIAS("block-major-" __stringify(major) "-*") | 881 | MODULE_ALIAS("block-major-" __stringify(major) "-*") |
880 | 882 | ||
881 | 883 | ||
882 | #else /* CONFIG_BLOCK */ | 884 | #else /* CONFIG_BLOCK */ |
883 | /* | 885 | /* |
884 | * stubs for when the block layer is configured out | 886 | * stubs for when the block layer is configured out |
885 | */ | 887 | */ |
886 | #define buffer_heads_over_limit 0 | 888 | #define buffer_heads_over_limit 0 |
887 | 889 | ||
888 | static inline long nr_blockdev_pages(void) | 890 | static inline long nr_blockdev_pages(void) |
889 | { | 891 | { |
890 | return 0; | 892 | return 0; |
891 | } | 893 | } |
892 | 894 | ||
893 | static inline void exit_io_context(void) | 895 | static inline void exit_io_context(void) |
894 | { | 896 | { |
895 | } | 897 | } |
896 | 898 | ||
897 | #endif /* CONFIG_BLOCK */ | 899 | #endif /* CONFIG_BLOCK */ |
898 | 900 | ||
899 | #endif | 901 | #endif |
900 | 902 |