Commit bf2de6f5a4faf0197268f18d08969b003b87b6e8
1 parent
c07e2b4129
Exists in
master
and in
4 other branches
block: Initial support for data-less (or empty) barrier support
This implements functionality to pass down or insert a barrier in a queue, without having data attached to it. The ->prepare_flush_fn() infrastructure from data barriers are reused to provide this functionality. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Showing 5 changed files with 71 additions and 21 deletions Inline Diff
block/elevator.c
1 | /* | 1 | /* |
2 | * Block device elevator/IO-scheduler. | 2 | * Block device elevator/IO-scheduler. |
3 | * | 3 | * |
4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * | 5 | * |
6 | * 30042000 Jens Axboe <axboe@kernel.dk> : | 6 | * 30042000 Jens Axboe <axboe@kernel.dk> : |
7 | * | 7 | * |
8 | * Split the elevator a bit so that it is possible to choose a different | 8 | * Split the elevator a bit so that it is possible to choose a different |
9 | * one or even write a new "plug in". There are three pieces: | 9 | * one or even write a new "plug in". There are three pieces: |
10 | * - elevator_fn, inserts a new request in the queue list | 10 | * - elevator_fn, inserts a new request in the queue list |
11 | * - elevator_merge_fn, decides whether a new buffer can be merged with | 11 | * - elevator_merge_fn, decides whether a new buffer can be merged with |
12 | * an existing request | 12 | * an existing request |
13 | * - elevator_dequeue_fn, called when a request is taken off the active list | 13 | * - elevator_dequeue_fn, called when a request is taken off the active list |
14 | * | 14 | * |
15 | * 20082000 Dave Jones <davej@suse.de> : | 15 | * 20082000 Dave Jones <davej@suse.de> : |
16 | * Removed tests for max-bomb-segments, which was breaking elvtune | 16 | * Removed tests for max-bomb-segments, which was breaking elvtune |
17 | * when run without -bN | 17 | * when run without -bN |
18 | * | 18 | * |
19 | * Jens: | 19 | * Jens: |
20 | * - Rework again to work with bio instead of buffer_heads | 20 | * - Rework again to work with bio instead of buffer_heads |
21 | * - loose bi_dev comparisons, partition handling is right now | 21 | * - loose bi_dev comparisons, partition handling is right now |
22 | * - completely modularize elevator setup and teardown | 22 | * - completely modularize elevator setup and teardown |
23 | * | 23 | * |
24 | */ | 24 | */ |
25 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/blkdev.h> | 27 | #include <linux/blkdev.h> |
28 | #include <linux/elevator.h> | 28 | #include <linux/elevator.h> |
29 | #include <linux/bio.h> | 29 | #include <linux/bio.h> |
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/compiler.h> | 33 | #include <linux/compiler.h> |
34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
35 | #include <linux/blktrace_api.h> | 35 | #include <linux/blktrace_api.h> |
36 | #include <linux/hash.h> | 36 | #include <linux/hash.h> |
37 | 37 | ||
38 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
39 | 39 | ||
40 | static DEFINE_SPINLOCK(elv_list_lock); | 40 | static DEFINE_SPINLOCK(elv_list_lock); |
41 | static LIST_HEAD(elv_list); | 41 | static LIST_HEAD(elv_list); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Merge hash stuff. | 44 | * Merge hash stuff. |
45 | */ | 45 | */ |
46 | static const int elv_hash_shift = 6; | 46 | static const int elv_hash_shift = 6; |
47 | #define ELV_HASH_BLOCK(sec) ((sec) >> 3) | 47 | #define ELV_HASH_BLOCK(sec) ((sec) >> 3) |
48 | #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) | 48 | #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) |
49 | #define ELV_HASH_ENTRIES (1 << elv_hash_shift) | 49 | #define ELV_HASH_ENTRIES (1 << elv_hash_shift) |
50 | #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) | 50 | #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) |
51 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) | 51 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * Query io scheduler to see if the current process issuing bio may be | 54 | * Query io scheduler to see if the current process issuing bio may be |
55 | * merged with rq. | 55 | * merged with rq. |
56 | */ | 56 | */ |
57 | static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) | 57 | static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) |
58 | { | 58 | { |
59 | struct request_queue *q = rq->q; | 59 | struct request_queue *q = rq->q; |
60 | elevator_t *e = q->elevator; | 60 | elevator_t *e = q->elevator; |
61 | 61 | ||
62 | if (e->ops->elevator_allow_merge_fn) | 62 | if (e->ops->elevator_allow_merge_fn) |
63 | return e->ops->elevator_allow_merge_fn(q, rq, bio); | 63 | return e->ops->elevator_allow_merge_fn(q, rq, bio); |
64 | 64 | ||
65 | return 1; | 65 | return 1; |
66 | } | 66 | } |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * can we safely merge with this request? | 69 | * can we safely merge with this request? |
70 | */ | 70 | */ |
71 | inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) | 71 | inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) |
72 | { | 72 | { |
73 | if (!rq_mergeable(rq)) | 73 | if (!rq_mergeable(rq)) |
74 | return 0; | 74 | return 0; |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * different data direction or already started, don't merge | 77 | * different data direction or already started, don't merge |
78 | */ | 78 | */ |
79 | if (bio_data_dir(bio) != rq_data_dir(rq)) | 79 | if (bio_data_dir(bio) != rq_data_dir(rq)) |
80 | return 0; | 80 | return 0; |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * must be same device and not a special request | 83 | * must be same device and not a special request |
84 | */ | 84 | */ |
85 | if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) | 85 | if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) |
86 | return 0; | 86 | return 0; |
87 | 87 | ||
88 | if (!elv_iosched_allow_merge(rq, bio)) | 88 | if (!elv_iosched_allow_merge(rq, bio)) |
89 | return 0; | 89 | return 0; |
90 | 90 | ||
91 | return 1; | 91 | return 1; |
92 | } | 92 | } |
93 | EXPORT_SYMBOL(elv_rq_merge_ok); | 93 | EXPORT_SYMBOL(elv_rq_merge_ok); |
94 | 94 | ||
95 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) | 95 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) |
96 | { | 96 | { |
97 | int ret = ELEVATOR_NO_MERGE; | 97 | int ret = ELEVATOR_NO_MERGE; |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * we can merge and sequence is ok, check if it's possible | 100 | * we can merge and sequence is ok, check if it's possible |
101 | */ | 101 | */ |
102 | if (elv_rq_merge_ok(__rq, bio)) { | 102 | if (elv_rq_merge_ok(__rq, bio)) { |
103 | if (__rq->sector + __rq->nr_sectors == bio->bi_sector) | 103 | if (__rq->sector + __rq->nr_sectors == bio->bi_sector) |
104 | ret = ELEVATOR_BACK_MERGE; | 104 | ret = ELEVATOR_BACK_MERGE; |
105 | else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) | 105 | else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) |
106 | ret = ELEVATOR_FRONT_MERGE; | 106 | ret = ELEVATOR_FRONT_MERGE; |
107 | } | 107 | } |
108 | 108 | ||
109 | return ret; | 109 | return ret; |
110 | } | 110 | } |
111 | 111 | ||
112 | static struct elevator_type *elevator_find(const char *name) | 112 | static struct elevator_type *elevator_find(const char *name) |
113 | { | 113 | { |
114 | struct elevator_type *e; | 114 | struct elevator_type *e; |
115 | 115 | ||
116 | list_for_each_entry(e, &elv_list, list) { | 116 | list_for_each_entry(e, &elv_list, list) { |
117 | if (!strcmp(e->elevator_name, name)) | 117 | if (!strcmp(e->elevator_name, name)) |
118 | return e; | 118 | return e; |
119 | } | 119 | } |
120 | 120 | ||
121 | return NULL; | 121 | return NULL; |
122 | } | 122 | } |
123 | 123 | ||
124 | static void elevator_put(struct elevator_type *e) | 124 | static void elevator_put(struct elevator_type *e) |
125 | { | 125 | { |
126 | module_put(e->elevator_owner); | 126 | module_put(e->elevator_owner); |
127 | } | 127 | } |
128 | 128 | ||
129 | static struct elevator_type *elevator_get(const char *name) | 129 | static struct elevator_type *elevator_get(const char *name) |
130 | { | 130 | { |
131 | struct elevator_type *e; | 131 | struct elevator_type *e; |
132 | 132 | ||
133 | spin_lock(&elv_list_lock); | 133 | spin_lock(&elv_list_lock); |
134 | 134 | ||
135 | e = elevator_find(name); | 135 | e = elevator_find(name); |
136 | if (e && !try_module_get(e->elevator_owner)) | 136 | if (e && !try_module_get(e->elevator_owner)) |
137 | e = NULL; | 137 | e = NULL; |
138 | 138 | ||
139 | spin_unlock(&elv_list_lock); | 139 | spin_unlock(&elv_list_lock); |
140 | 140 | ||
141 | return e; | 141 | return e; |
142 | } | 142 | } |
143 | 143 | ||
144 | static void *elevator_init_queue(struct request_queue *q, | 144 | static void *elevator_init_queue(struct request_queue *q, |
145 | struct elevator_queue *eq) | 145 | struct elevator_queue *eq) |
146 | { | 146 | { |
147 | return eq->ops->elevator_init_fn(q); | 147 | return eq->ops->elevator_init_fn(q); |
148 | } | 148 | } |
149 | 149 | ||
150 | static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, | 150 | static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, |
151 | void *data) | 151 | void *data) |
152 | { | 152 | { |
153 | q->elevator = eq; | 153 | q->elevator = eq; |
154 | eq->elevator_data = data; | 154 | eq->elevator_data = data; |
155 | } | 155 | } |
156 | 156 | ||
157 | static char chosen_elevator[16]; | 157 | static char chosen_elevator[16]; |
158 | 158 | ||
159 | static int __init elevator_setup(char *str) | 159 | static int __init elevator_setup(char *str) |
160 | { | 160 | { |
161 | /* | 161 | /* |
162 | * Be backwards-compatible with previous kernels, so users | 162 | * Be backwards-compatible with previous kernels, so users |
163 | * won't get the wrong elevator. | 163 | * won't get the wrong elevator. |
164 | */ | 164 | */ |
165 | if (!strcmp(str, "as")) | 165 | if (!strcmp(str, "as")) |
166 | strcpy(chosen_elevator, "anticipatory"); | 166 | strcpy(chosen_elevator, "anticipatory"); |
167 | else | 167 | else |
168 | strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); | 168 | strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); |
169 | return 1; | 169 | return 1; |
170 | } | 170 | } |
171 | 171 | ||
172 | __setup("elevator=", elevator_setup); | 172 | __setup("elevator=", elevator_setup); |
173 | 173 | ||
174 | static struct kobj_type elv_ktype; | 174 | static struct kobj_type elv_ktype; |
175 | 175 | ||
176 | static elevator_t *elevator_alloc(struct request_queue *q, | 176 | static elevator_t *elevator_alloc(struct request_queue *q, |
177 | struct elevator_type *e) | 177 | struct elevator_type *e) |
178 | { | 178 | { |
179 | elevator_t *eq; | 179 | elevator_t *eq; |
180 | int i; | 180 | int i; |
181 | 181 | ||
182 | eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node); | 182 | eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node); |
183 | if (unlikely(!eq)) | 183 | if (unlikely(!eq)) |
184 | goto err; | 184 | goto err; |
185 | 185 | ||
186 | eq->ops = &e->ops; | 186 | eq->ops = &e->ops; |
187 | eq->elevator_type = e; | 187 | eq->elevator_type = e; |
188 | kobject_init(&eq->kobj); | 188 | kobject_init(&eq->kobj); |
189 | kobject_set_name(&eq->kobj, "%s", "iosched"); | 189 | kobject_set_name(&eq->kobj, "%s", "iosched"); |
190 | eq->kobj.ktype = &elv_ktype; | 190 | eq->kobj.ktype = &elv_ktype; |
191 | mutex_init(&eq->sysfs_lock); | 191 | mutex_init(&eq->sysfs_lock); |
192 | 192 | ||
193 | eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, | 193 | eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, |
194 | GFP_KERNEL, q->node); | 194 | GFP_KERNEL, q->node); |
195 | if (!eq->hash) | 195 | if (!eq->hash) |
196 | goto err; | 196 | goto err; |
197 | 197 | ||
198 | for (i = 0; i < ELV_HASH_ENTRIES; i++) | 198 | for (i = 0; i < ELV_HASH_ENTRIES; i++) |
199 | INIT_HLIST_HEAD(&eq->hash[i]); | 199 | INIT_HLIST_HEAD(&eq->hash[i]); |
200 | 200 | ||
201 | return eq; | 201 | return eq; |
202 | err: | 202 | err: |
203 | kfree(eq); | 203 | kfree(eq); |
204 | elevator_put(e); | 204 | elevator_put(e); |
205 | return NULL; | 205 | return NULL; |
206 | } | 206 | } |
207 | 207 | ||
208 | static void elevator_release(struct kobject *kobj) | 208 | static void elevator_release(struct kobject *kobj) |
209 | { | 209 | { |
210 | elevator_t *e = container_of(kobj, elevator_t, kobj); | 210 | elevator_t *e = container_of(kobj, elevator_t, kobj); |
211 | 211 | ||
212 | elevator_put(e->elevator_type); | 212 | elevator_put(e->elevator_type); |
213 | kfree(e->hash); | 213 | kfree(e->hash); |
214 | kfree(e); | 214 | kfree(e); |
215 | } | 215 | } |
216 | 216 | ||
217 | int elevator_init(struct request_queue *q, char *name) | 217 | int elevator_init(struct request_queue *q, char *name) |
218 | { | 218 | { |
219 | struct elevator_type *e = NULL; | 219 | struct elevator_type *e = NULL; |
220 | struct elevator_queue *eq; | 220 | struct elevator_queue *eq; |
221 | int ret = 0; | 221 | int ret = 0; |
222 | void *data; | 222 | void *data; |
223 | 223 | ||
224 | INIT_LIST_HEAD(&q->queue_head); | 224 | INIT_LIST_HEAD(&q->queue_head); |
225 | q->last_merge = NULL; | 225 | q->last_merge = NULL; |
226 | q->end_sector = 0; | 226 | q->end_sector = 0; |
227 | q->boundary_rq = NULL; | 227 | q->boundary_rq = NULL; |
228 | 228 | ||
229 | if (name && !(e = elevator_get(name))) | 229 | if (name && !(e = elevator_get(name))) |
230 | return -EINVAL; | 230 | return -EINVAL; |
231 | 231 | ||
232 | if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) | 232 | if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) |
233 | printk("I/O scheduler %s not found\n", chosen_elevator); | 233 | printk("I/O scheduler %s not found\n", chosen_elevator); |
234 | 234 | ||
235 | if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { | 235 | if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { |
236 | printk("Default I/O scheduler not found, using no-op\n"); | 236 | printk("Default I/O scheduler not found, using no-op\n"); |
237 | e = elevator_get("noop"); | 237 | e = elevator_get("noop"); |
238 | } | 238 | } |
239 | 239 | ||
240 | eq = elevator_alloc(q, e); | 240 | eq = elevator_alloc(q, e); |
241 | if (!eq) | 241 | if (!eq) |
242 | return -ENOMEM; | 242 | return -ENOMEM; |
243 | 243 | ||
244 | data = elevator_init_queue(q, eq); | 244 | data = elevator_init_queue(q, eq); |
245 | if (!data) { | 245 | if (!data) { |
246 | kobject_put(&eq->kobj); | 246 | kobject_put(&eq->kobj); |
247 | return -ENOMEM; | 247 | return -ENOMEM; |
248 | } | 248 | } |
249 | 249 | ||
250 | elevator_attach(q, eq, data); | 250 | elevator_attach(q, eq, data); |
251 | return ret; | 251 | return ret; |
252 | } | 252 | } |
253 | 253 | ||
254 | EXPORT_SYMBOL(elevator_init); | 254 | EXPORT_SYMBOL(elevator_init); |
255 | 255 | ||
256 | void elevator_exit(elevator_t *e) | 256 | void elevator_exit(elevator_t *e) |
257 | { | 257 | { |
258 | mutex_lock(&e->sysfs_lock); | 258 | mutex_lock(&e->sysfs_lock); |
259 | if (e->ops->elevator_exit_fn) | 259 | if (e->ops->elevator_exit_fn) |
260 | e->ops->elevator_exit_fn(e); | 260 | e->ops->elevator_exit_fn(e); |
261 | e->ops = NULL; | 261 | e->ops = NULL; |
262 | mutex_unlock(&e->sysfs_lock); | 262 | mutex_unlock(&e->sysfs_lock); |
263 | 263 | ||
264 | kobject_put(&e->kobj); | 264 | kobject_put(&e->kobj); |
265 | } | 265 | } |
266 | 266 | ||
267 | EXPORT_SYMBOL(elevator_exit); | 267 | EXPORT_SYMBOL(elevator_exit); |
268 | 268 | ||
269 | static void elv_activate_rq(struct request_queue *q, struct request *rq) | 269 | static void elv_activate_rq(struct request_queue *q, struct request *rq) |
270 | { | 270 | { |
271 | elevator_t *e = q->elevator; | 271 | elevator_t *e = q->elevator; |
272 | 272 | ||
273 | if (e->ops->elevator_activate_req_fn) | 273 | if (e->ops->elevator_activate_req_fn) |
274 | e->ops->elevator_activate_req_fn(q, rq); | 274 | e->ops->elevator_activate_req_fn(q, rq); |
275 | } | 275 | } |
276 | 276 | ||
277 | static void elv_deactivate_rq(struct request_queue *q, struct request *rq) | 277 | static void elv_deactivate_rq(struct request_queue *q, struct request *rq) |
278 | { | 278 | { |
279 | elevator_t *e = q->elevator; | 279 | elevator_t *e = q->elevator; |
280 | 280 | ||
281 | if (e->ops->elevator_deactivate_req_fn) | 281 | if (e->ops->elevator_deactivate_req_fn) |
282 | e->ops->elevator_deactivate_req_fn(q, rq); | 282 | e->ops->elevator_deactivate_req_fn(q, rq); |
283 | } | 283 | } |
284 | 284 | ||
285 | static inline void __elv_rqhash_del(struct request *rq) | 285 | static inline void __elv_rqhash_del(struct request *rq) |
286 | { | 286 | { |
287 | hlist_del_init(&rq->hash); | 287 | hlist_del_init(&rq->hash); |
288 | } | 288 | } |
289 | 289 | ||
290 | static void elv_rqhash_del(struct request_queue *q, struct request *rq) | 290 | static void elv_rqhash_del(struct request_queue *q, struct request *rq) |
291 | { | 291 | { |
292 | if (ELV_ON_HASH(rq)) | 292 | if (ELV_ON_HASH(rq)) |
293 | __elv_rqhash_del(rq); | 293 | __elv_rqhash_del(rq); |
294 | } | 294 | } |
295 | 295 | ||
296 | static void elv_rqhash_add(struct request_queue *q, struct request *rq) | 296 | static void elv_rqhash_add(struct request_queue *q, struct request *rq) |
297 | { | 297 | { |
298 | elevator_t *e = q->elevator; | 298 | elevator_t *e = q->elevator; |
299 | 299 | ||
300 | BUG_ON(ELV_ON_HASH(rq)); | 300 | BUG_ON(ELV_ON_HASH(rq)); |
301 | hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); | 301 | hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); |
302 | } | 302 | } |
303 | 303 | ||
304 | static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) | 304 | static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) |
305 | { | 305 | { |
306 | __elv_rqhash_del(rq); | 306 | __elv_rqhash_del(rq); |
307 | elv_rqhash_add(q, rq); | 307 | elv_rqhash_add(q, rq); |
308 | } | 308 | } |
309 | 309 | ||
310 | static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) | 310 | static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) |
311 | { | 311 | { |
312 | elevator_t *e = q->elevator; | 312 | elevator_t *e = q->elevator; |
313 | struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; | 313 | struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; |
314 | struct hlist_node *entry, *next; | 314 | struct hlist_node *entry, *next; |
315 | struct request *rq; | 315 | struct request *rq; |
316 | 316 | ||
317 | hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { | 317 | hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { |
318 | BUG_ON(!ELV_ON_HASH(rq)); | 318 | BUG_ON(!ELV_ON_HASH(rq)); |
319 | 319 | ||
320 | if (unlikely(!rq_mergeable(rq))) { | 320 | if (unlikely(!rq_mergeable(rq))) { |
321 | __elv_rqhash_del(rq); | 321 | __elv_rqhash_del(rq); |
322 | continue; | 322 | continue; |
323 | } | 323 | } |
324 | 324 | ||
325 | if (rq_hash_key(rq) == offset) | 325 | if (rq_hash_key(rq) == offset) |
326 | return rq; | 326 | return rq; |
327 | } | 327 | } |
328 | 328 | ||
329 | return NULL; | 329 | return NULL; |
330 | } | 330 | } |
331 | 331 | ||
332 | /* | 332 | /* |
333 | * RB-tree support functions for inserting/lookup/removal of requests | 333 | * RB-tree support functions for inserting/lookup/removal of requests |
334 | * in a sorted RB tree. | 334 | * in a sorted RB tree. |
335 | */ | 335 | */ |
336 | struct request *elv_rb_add(struct rb_root *root, struct request *rq) | 336 | struct request *elv_rb_add(struct rb_root *root, struct request *rq) |
337 | { | 337 | { |
338 | struct rb_node **p = &root->rb_node; | 338 | struct rb_node **p = &root->rb_node; |
339 | struct rb_node *parent = NULL; | 339 | struct rb_node *parent = NULL; |
340 | struct request *__rq; | 340 | struct request *__rq; |
341 | 341 | ||
342 | while (*p) { | 342 | while (*p) { |
343 | parent = *p; | 343 | parent = *p; |
344 | __rq = rb_entry(parent, struct request, rb_node); | 344 | __rq = rb_entry(parent, struct request, rb_node); |
345 | 345 | ||
346 | if (rq->sector < __rq->sector) | 346 | if (rq->sector < __rq->sector) |
347 | p = &(*p)->rb_left; | 347 | p = &(*p)->rb_left; |
348 | else if (rq->sector > __rq->sector) | 348 | else if (rq->sector > __rq->sector) |
349 | p = &(*p)->rb_right; | 349 | p = &(*p)->rb_right; |
350 | else | 350 | else |
351 | return __rq; | 351 | return __rq; |
352 | } | 352 | } |
353 | 353 | ||
354 | rb_link_node(&rq->rb_node, parent, p); | 354 | rb_link_node(&rq->rb_node, parent, p); |
355 | rb_insert_color(&rq->rb_node, root); | 355 | rb_insert_color(&rq->rb_node, root); |
356 | return NULL; | 356 | return NULL; |
357 | } | 357 | } |
358 | 358 | ||
359 | EXPORT_SYMBOL(elv_rb_add); | 359 | EXPORT_SYMBOL(elv_rb_add); |
360 | 360 | ||
361 | void elv_rb_del(struct rb_root *root, struct request *rq) | 361 | void elv_rb_del(struct rb_root *root, struct request *rq) |
362 | { | 362 | { |
363 | BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); | 363 | BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); |
364 | rb_erase(&rq->rb_node, root); | 364 | rb_erase(&rq->rb_node, root); |
365 | RB_CLEAR_NODE(&rq->rb_node); | 365 | RB_CLEAR_NODE(&rq->rb_node); |
366 | } | 366 | } |
367 | 367 | ||
368 | EXPORT_SYMBOL(elv_rb_del); | 368 | EXPORT_SYMBOL(elv_rb_del); |
369 | 369 | ||
370 | struct request *elv_rb_find(struct rb_root *root, sector_t sector) | 370 | struct request *elv_rb_find(struct rb_root *root, sector_t sector) |
371 | { | 371 | { |
372 | struct rb_node *n = root->rb_node; | 372 | struct rb_node *n = root->rb_node; |
373 | struct request *rq; | 373 | struct request *rq; |
374 | 374 | ||
375 | while (n) { | 375 | while (n) { |
376 | rq = rb_entry(n, struct request, rb_node); | 376 | rq = rb_entry(n, struct request, rb_node); |
377 | 377 | ||
378 | if (sector < rq->sector) | 378 | if (sector < rq->sector) |
379 | n = n->rb_left; | 379 | n = n->rb_left; |
380 | else if (sector > rq->sector) | 380 | else if (sector > rq->sector) |
381 | n = n->rb_right; | 381 | n = n->rb_right; |
382 | else | 382 | else |
383 | return rq; | 383 | return rq; |
384 | } | 384 | } |
385 | 385 | ||
386 | return NULL; | 386 | return NULL; |
387 | } | 387 | } |
388 | 388 | ||
389 | EXPORT_SYMBOL(elv_rb_find); | 389 | EXPORT_SYMBOL(elv_rb_find); |
390 | 390 | ||
391 | /* | 391 | /* |
392 | * Insert rq into dispatch queue of q. Queue lock must be held on | 392 | * Insert rq into dispatch queue of q. Queue lock must be held on |
393 | * entry. rq is sort insted into the dispatch queue. To be used by | 393 | * entry. rq is sort insted into the dispatch queue. To be used by |
394 | * specific elevators. | 394 | * specific elevators. |
395 | */ | 395 | */ |
396 | void elv_dispatch_sort(struct request_queue *q, struct request *rq) | 396 | void elv_dispatch_sort(struct request_queue *q, struct request *rq) |
397 | { | 397 | { |
398 | sector_t boundary; | 398 | sector_t boundary; |
399 | struct list_head *entry; | 399 | struct list_head *entry; |
400 | 400 | ||
401 | if (q->last_merge == rq) | 401 | if (q->last_merge == rq) |
402 | q->last_merge = NULL; | 402 | q->last_merge = NULL; |
403 | 403 | ||
404 | elv_rqhash_del(q, rq); | 404 | elv_rqhash_del(q, rq); |
405 | 405 | ||
406 | q->nr_sorted--; | 406 | q->nr_sorted--; |
407 | 407 | ||
408 | boundary = q->end_sector; | 408 | boundary = q->end_sector; |
409 | 409 | ||
410 | list_for_each_prev(entry, &q->queue_head) { | 410 | list_for_each_prev(entry, &q->queue_head) { |
411 | struct request *pos = list_entry_rq(entry); | 411 | struct request *pos = list_entry_rq(entry); |
412 | 412 | ||
413 | if (rq_data_dir(rq) != rq_data_dir(pos)) | 413 | if (rq_data_dir(rq) != rq_data_dir(pos)) |
414 | break; | 414 | break; |
415 | if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) | 415 | if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) |
416 | break; | 416 | break; |
417 | if (rq->sector >= boundary) { | 417 | if (rq->sector >= boundary) { |
418 | if (pos->sector < boundary) | 418 | if (pos->sector < boundary) |
419 | continue; | 419 | continue; |
420 | } else { | 420 | } else { |
421 | if (pos->sector >= boundary) | 421 | if (pos->sector >= boundary) |
422 | break; | 422 | break; |
423 | } | 423 | } |
424 | if (rq->sector >= pos->sector) | 424 | if (rq->sector >= pos->sector) |
425 | break; | 425 | break; |
426 | } | 426 | } |
427 | 427 | ||
428 | list_add(&rq->queuelist, entry); | 428 | list_add(&rq->queuelist, entry); |
429 | } | 429 | } |
430 | 430 | ||
431 | EXPORT_SYMBOL(elv_dispatch_sort); | 431 | EXPORT_SYMBOL(elv_dispatch_sort); |
432 | 432 | ||
433 | /* | 433 | /* |
434 | * Insert rq into dispatch queue of q. Queue lock must be held on | 434 | * Insert rq into dispatch queue of q. Queue lock must be held on |
435 | * entry. rq is added to the back of the dispatch queue. To be used by | 435 | * entry. rq is added to the back of the dispatch queue. To be used by |
436 | * specific elevators. | 436 | * specific elevators. |
437 | */ | 437 | */ |
438 | void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) | 438 | void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) |
439 | { | 439 | { |
440 | if (q->last_merge == rq) | 440 | if (q->last_merge == rq) |
441 | q->last_merge = NULL; | 441 | q->last_merge = NULL; |
442 | 442 | ||
443 | elv_rqhash_del(q, rq); | 443 | elv_rqhash_del(q, rq); |
444 | 444 | ||
445 | q->nr_sorted--; | 445 | q->nr_sorted--; |
446 | 446 | ||
447 | q->end_sector = rq_end_sector(rq); | 447 | q->end_sector = rq_end_sector(rq); |
448 | q->boundary_rq = rq; | 448 | q->boundary_rq = rq; |
449 | list_add_tail(&rq->queuelist, &q->queue_head); | 449 | list_add_tail(&rq->queuelist, &q->queue_head); |
450 | } | 450 | } |
451 | 451 | ||
452 | EXPORT_SYMBOL(elv_dispatch_add_tail); | 452 | EXPORT_SYMBOL(elv_dispatch_add_tail); |
453 | 453 | ||
454 | int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) | 454 | int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) |
455 | { | 455 | { |
456 | elevator_t *e = q->elevator; | 456 | elevator_t *e = q->elevator; |
457 | struct request *__rq; | 457 | struct request *__rq; |
458 | int ret; | 458 | int ret; |
459 | 459 | ||
460 | /* | 460 | /* |
461 | * First try one-hit cache. | 461 | * First try one-hit cache. |
462 | */ | 462 | */ |
463 | if (q->last_merge) { | 463 | if (q->last_merge) { |
464 | ret = elv_try_merge(q->last_merge, bio); | 464 | ret = elv_try_merge(q->last_merge, bio); |
465 | if (ret != ELEVATOR_NO_MERGE) { | 465 | if (ret != ELEVATOR_NO_MERGE) { |
466 | *req = q->last_merge; | 466 | *req = q->last_merge; |
467 | return ret; | 467 | return ret; |
468 | } | 468 | } |
469 | } | 469 | } |
470 | 470 | ||
471 | /* | 471 | /* |
472 | * See if our hash lookup can find a potential backmerge. | 472 | * See if our hash lookup can find a potential backmerge. |
473 | */ | 473 | */ |
474 | __rq = elv_rqhash_find(q, bio->bi_sector); | 474 | __rq = elv_rqhash_find(q, bio->bi_sector); |
475 | if (__rq && elv_rq_merge_ok(__rq, bio)) { | 475 | if (__rq && elv_rq_merge_ok(__rq, bio)) { |
476 | *req = __rq; | 476 | *req = __rq; |
477 | return ELEVATOR_BACK_MERGE; | 477 | return ELEVATOR_BACK_MERGE; |
478 | } | 478 | } |
479 | 479 | ||
480 | if (e->ops->elevator_merge_fn) | 480 | if (e->ops->elevator_merge_fn) |
481 | return e->ops->elevator_merge_fn(q, req, bio); | 481 | return e->ops->elevator_merge_fn(q, req, bio); |
482 | 482 | ||
483 | return ELEVATOR_NO_MERGE; | 483 | return ELEVATOR_NO_MERGE; |
484 | } | 484 | } |
485 | 485 | ||
486 | void elv_merged_request(struct request_queue *q, struct request *rq, int type) | 486 | void elv_merged_request(struct request_queue *q, struct request *rq, int type) |
487 | { | 487 | { |
488 | elevator_t *e = q->elevator; | 488 | elevator_t *e = q->elevator; |
489 | 489 | ||
490 | if (e->ops->elevator_merged_fn) | 490 | if (e->ops->elevator_merged_fn) |
491 | e->ops->elevator_merged_fn(q, rq, type); | 491 | e->ops->elevator_merged_fn(q, rq, type); |
492 | 492 | ||
493 | if (type == ELEVATOR_BACK_MERGE) | 493 | if (type == ELEVATOR_BACK_MERGE) |
494 | elv_rqhash_reposition(q, rq); | 494 | elv_rqhash_reposition(q, rq); |
495 | 495 | ||
496 | q->last_merge = rq; | 496 | q->last_merge = rq; |
497 | } | 497 | } |
498 | 498 | ||
499 | void elv_merge_requests(struct request_queue *q, struct request *rq, | 499 | void elv_merge_requests(struct request_queue *q, struct request *rq, |
500 | struct request *next) | 500 | struct request *next) |
501 | { | 501 | { |
502 | elevator_t *e = q->elevator; | 502 | elevator_t *e = q->elevator; |
503 | 503 | ||
504 | if (e->ops->elevator_merge_req_fn) | 504 | if (e->ops->elevator_merge_req_fn) |
505 | e->ops->elevator_merge_req_fn(q, rq, next); | 505 | e->ops->elevator_merge_req_fn(q, rq, next); |
506 | 506 | ||
507 | elv_rqhash_reposition(q, rq); | 507 | elv_rqhash_reposition(q, rq); |
508 | elv_rqhash_del(q, next); | 508 | elv_rqhash_del(q, next); |
509 | 509 | ||
510 | q->nr_sorted--; | 510 | q->nr_sorted--; |
511 | q->last_merge = rq; | 511 | q->last_merge = rq; |
512 | } | 512 | } |
513 | 513 | ||
514 | void elv_requeue_request(struct request_queue *q, struct request *rq) | 514 | void elv_requeue_request(struct request_queue *q, struct request *rq) |
515 | { | 515 | { |
516 | /* | 516 | /* |
517 | * it already went through dequeue, we need to decrement the | 517 | * it already went through dequeue, we need to decrement the |
518 | * in_flight count again | 518 | * in_flight count again |
519 | */ | 519 | */ |
520 | if (blk_account_rq(rq)) { | 520 | if (blk_account_rq(rq)) { |
521 | q->in_flight--; | 521 | q->in_flight--; |
522 | if (blk_sorted_rq(rq)) | 522 | if (blk_sorted_rq(rq)) |
523 | elv_deactivate_rq(q, rq); | 523 | elv_deactivate_rq(q, rq); |
524 | } | 524 | } |
525 | 525 | ||
526 | rq->cmd_flags &= ~REQ_STARTED; | 526 | rq->cmd_flags &= ~REQ_STARTED; |
527 | 527 | ||
528 | elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); | 528 | elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); |
529 | } | 529 | } |
530 | 530 | ||
531 | static void elv_drain_elevator(struct request_queue *q) | 531 | static void elv_drain_elevator(struct request_queue *q) |
532 | { | 532 | { |
533 | static int printed; | 533 | static int printed; |
534 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | 534 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) |
535 | ; | 535 | ; |
536 | if (q->nr_sorted == 0) | 536 | if (q->nr_sorted == 0) |
537 | return; | 537 | return; |
538 | if (printed++ < 10) { | 538 | if (printed++ < 10) { |
539 | printk(KERN_ERR "%s: forced dispatching is broken " | 539 | printk(KERN_ERR "%s: forced dispatching is broken " |
540 | "(nr_sorted=%u), please report this\n", | 540 | "(nr_sorted=%u), please report this\n", |
541 | q->elevator->elevator_type->elevator_name, q->nr_sorted); | 541 | q->elevator->elevator_type->elevator_name, q->nr_sorted); |
542 | } | 542 | } |
543 | } | 543 | } |
544 | 544 | ||
545 | void elv_insert(struct request_queue *q, struct request *rq, int where) | 545 | void elv_insert(struct request_queue *q, struct request *rq, int where) |
546 | { | 546 | { |
547 | struct list_head *pos; | 547 | struct list_head *pos; |
548 | unsigned ordseq; | 548 | unsigned ordseq; |
549 | int unplug_it = 1; | 549 | int unplug_it = 1; |
550 | 550 | ||
551 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); | 551 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); |
552 | 552 | ||
553 | rq->q = q; | 553 | rq->q = q; |
554 | 554 | ||
555 | switch (where) { | 555 | switch (where) { |
556 | case ELEVATOR_INSERT_FRONT: | 556 | case ELEVATOR_INSERT_FRONT: |
557 | rq->cmd_flags |= REQ_SOFTBARRIER; | 557 | rq->cmd_flags |= REQ_SOFTBARRIER; |
558 | 558 | ||
559 | list_add(&rq->queuelist, &q->queue_head); | 559 | list_add(&rq->queuelist, &q->queue_head); |
560 | break; | 560 | break; |
561 | 561 | ||
562 | case ELEVATOR_INSERT_BACK: | 562 | case ELEVATOR_INSERT_BACK: |
563 | rq->cmd_flags |= REQ_SOFTBARRIER; | 563 | rq->cmd_flags |= REQ_SOFTBARRIER; |
564 | elv_drain_elevator(q); | 564 | elv_drain_elevator(q); |
565 | list_add_tail(&rq->queuelist, &q->queue_head); | 565 | list_add_tail(&rq->queuelist, &q->queue_head); |
566 | /* | 566 | /* |
567 | * We kick the queue here for the following reasons. | 567 | * We kick the queue here for the following reasons. |
568 | * - The elevator might have returned NULL previously | 568 | * - The elevator might have returned NULL previously |
569 | * to delay requests and returned them now. As the | 569 | * to delay requests and returned them now. As the |
570 | * queue wasn't empty before this request, ll_rw_blk | 570 | * queue wasn't empty before this request, ll_rw_blk |
571 | * won't run the queue on return, resulting in hang. | 571 | * won't run the queue on return, resulting in hang. |
572 | * - Usually, back inserted requests won't be merged | 572 | * - Usually, back inserted requests won't be merged |
573 | * with anything. There's no point in delaying queue | 573 | * with anything. There's no point in delaying queue |
574 | * processing. | 574 | * processing. |
575 | */ | 575 | */ |
576 | blk_remove_plug(q); | 576 | blk_remove_plug(q); |
577 | q->request_fn(q); | 577 | q->request_fn(q); |
578 | break; | 578 | break; |
579 | 579 | ||
580 | case ELEVATOR_INSERT_SORT: | 580 | case ELEVATOR_INSERT_SORT: |
581 | BUG_ON(!blk_fs_request(rq)); | 581 | BUG_ON(!blk_fs_request(rq)); |
582 | rq->cmd_flags |= REQ_SORTED; | 582 | rq->cmd_flags |= REQ_SORTED; |
583 | q->nr_sorted++; | 583 | q->nr_sorted++; |
584 | if (rq_mergeable(rq)) { | 584 | if (rq_mergeable(rq)) { |
585 | elv_rqhash_add(q, rq); | 585 | elv_rqhash_add(q, rq); |
586 | if (!q->last_merge) | 586 | if (!q->last_merge) |
587 | q->last_merge = rq; | 587 | q->last_merge = rq; |
588 | } | 588 | } |
589 | 589 | ||
590 | /* | 590 | /* |
591 | * Some ioscheds (cfq) run q->request_fn directly, so | 591 | * Some ioscheds (cfq) run q->request_fn directly, so |
592 | * rq cannot be accessed after calling | 592 | * rq cannot be accessed after calling |
593 | * elevator_add_req_fn. | 593 | * elevator_add_req_fn. |
594 | */ | 594 | */ |
595 | q->elevator->ops->elevator_add_req_fn(q, rq); | 595 | q->elevator->ops->elevator_add_req_fn(q, rq); |
596 | break; | 596 | break; |
597 | 597 | ||
598 | case ELEVATOR_INSERT_REQUEUE: | 598 | case ELEVATOR_INSERT_REQUEUE: |
599 | /* | 599 | /* |
600 | * If ordered flush isn't in progress, we do front | 600 | * If ordered flush isn't in progress, we do front |
601 | * insertion; otherwise, requests should be requeued | 601 | * insertion; otherwise, requests should be requeued |
602 | * in ordseq order. | 602 | * in ordseq order. |
603 | */ | 603 | */ |
604 | rq->cmd_flags |= REQ_SOFTBARRIER; | 604 | rq->cmd_flags |= REQ_SOFTBARRIER; |
605 | 605 | ||
606 | /* | 606 | /* |
607 | * Most requeues happen because of a busy condition, | 607 | * Most requeues happen because of a busy condition, |
608 | * don't force unplug of the queue for that case. | 608 | * don't force unplug of the queue for that case. |
609 | */ | 609 | */ |
610 | unplug_it = 0; | 610 | unplug_it = 0; |
611 | 611 | ||
612 | if (q->ordseq == 0) { | 612 | if (q->ordseq == 0) { |
613 | list_add(&rq->queuelist, &q->queue_head); | 613 | list_add(&rq->queuelist, &q->queue_head); |
614 | break; | 614 | break; |
615 | } | 615 | } |
616 | 616 | ||
617 | ordseq = blk_ordered_req_seq(rq); | 617 | ordseq = blk_ordered_req_seq(rq); |
618 | 618 | ||
619 | list_for_each(pos, &q->queue_head) { | 619 | list_for_each(pos, &q->queue_head) { |
620 | struct request *pos_rq = list_entry_rq(pos); | 620 | struct request *pos_rq = list_entry_rq(pos); |
621 | if (ordseq <= blk_ordered_req_seq(pos_rq)) | 621 | if (ordseq <= blk_ordered_req_seq(pos_rq)) |
622 | break; | 622 | break; |
623 | } | 623 | } |
624 | 624 | ||
625 | list_add_tail(&rq->queuelist, pos); | 625 | list_add_tail(&rq->queuelist, pos); |
626 | break; | 626 | break; |
627 | 627 | ||
628 | default: | 628 | default: |
629 | printk(KERN_ERR "%s: bad insertion point %d\n", | 629 | printk(KERN_ERR "%s: bad insertion point %d\n", |
630 | __FUNCTION__, where); | 630 | __FUNCTION__, where); |
631 | BUG(); | 631 | BUG(); |
632 | } | 632 | } |
633 | 633 | ||
634 | if (unplug_it && blk_queue_plugged(q)) { | 634 | if (unplug_it && blk_queue_plugged(q)) { |
635 | int nrq = q->rq.count[READ] + q->rq.count[WRITE] | 635 | int nrq = q->rq.count[READ] + q->rq.count[WRITE] |
636 | - q->in_flight; | 636 | - q->in_flight; |
637 | 637 | ||
638 | if (nrq >= q->unplug_thresh) | 638 | if (nrq >= q->unplug_thresh) |
639 | __generic_unplug_device(q); | 639 | __generic_unplug_device(q); |
640 | } | 640 | } |
641 | } | 641 | } |
642 | 642 | ||
643 | void __elv_add_request(struct request_queue *q, struct request *rq, int where, | 643 | void __elv_add_request(struct request_queue *q, struct request *rq, int where, |
644 | int plug) | 644 | int plug) |
645 | { | 645 | { |
646 | if (q->ordcolor) | 646 | if (q->ordcolor) |
647 | rq->cmd_flags |= REQ_ORDERED_COLOR; | 647 | rq->cmd_flags |= REQ_ORDERED_COLOR; |
648 | 648 | ||
649 | if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { | 649 | if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { |
650 | /* | 650 | /* |
651 | * toggle ordered color | 651 | * toggle ordered color |
652 | */ | 652 | */ |
653 | if (blk_barrier_rq(rq)) | 653 | if (blk_barrier_rq(rq)) |
654 | q->ordcolor ^= 1; | 654 | q->ordcolor ^= 1; |
655 | 655 | ||
656 | /* | 656 | /* |
657 | * barriers implicitly indicate back insertion | 657 | * barriers implicitly indicate back insertion |
658 | */ | 658 | */ |
659 | if (where == ELEVATOR_INSERT_SORT) | 659 | if (where == ELEVATOR_INSERT_SORT) |
660 | where = ELEVATOR_INSERT_BACK; | 660 | where = ELEVATOR_INSERT_BACK; |
661 | 661 | ||
662 | /* | 662 | /* |
663 | * this request is scheduling boundary, update | 663 | * this request is scheduling boundary, update |
664 | * end_sector | 664 | * end_sector |
665 | */ | 665 | */ |
666 | if (blk_fs_request(rq)) { | 666 | if (blk_fs_request(rq)) { |
667 | q->end_sector = rq_end_sector(rq); | 667 | q->end_sector = rq_end_sector(rq); |
668 | q->boundary_rq = rq; | 668 | q->boundary_rq = rq; |
669 | } | 669 | } |
670 | } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) | 670 | } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) |
671 | where = ELEVATOR_INSERT_BACK; | 671 | where = ELEVATOR_INSERT_BACK; |
672 | 672 | ||
673 | if (plug) | 673 | if (plug) |
674 | blk_plug_device(q); | 674 | blk_plug_device(q); |
675 | 675 | ||
676 | elv_insert(q, rq, where); | 676 | elv_insert(q, rq, where); |
677 | } | 677 | } |
678 | 678 | ||
679 | EXPORT_SYMBOL(__elv_add_request); | 679 | EXPORT_SYMBOL(__elv_add_request); |
680 | 680 | ||
681 | void elv_add_request(struct request_queue *q, struct request *rq, int where, | 681 | void elv_add_request(struct request_queue *q, struct request *rq, int where, |
682 | int plug) | 682 | int plug) |
683 | { | 683 | { |
684 | unsigned long flags; | 684 | unsigned long flags; |
685 | 685 | ||
686 | spin_lock_irqsave(q->queue_lock, flags); | 686 | spin_lock_irqsave(q->queue_lock, flags); |
687 | __elv_add_request(q, rq, where, plug); | 687 | __elv_add_request(q, rq, where, plug); |
688 | spin_unlock_irqrestore(q->queue_lock, flags); | 688 | spin_unlock_irqrestore(q->queue_lock, flags); |
689 | } | 689 | } |
690 | 690 | ||
691 | EXPORT_SYMBOL(elv_add_request); | 691 | EXPORT_SYMBOL(elv_add_request); |
692 | 692 | ||
693 | static inline struct request *__elv_next_request(struct request_queue *q) | 693 | static inline struct request *__elv_next_request(struct request_queue *q) |
694 | { | 694 | { |
695 | struct request *rq; | 695 | struct request *rq; |
696 | 696 | ||
697 | while (1) { | 697 | while (1) { |
698 | while (!list_empty(&q->queue_head)) { | 698 | while (!list_empty(&q->queue_head)) { |
699 | rq = list_entry_rq(q->queue_head.next); | 699 | rq = list_entry_rq(q->queue_head.next); |
700 | if (blk_do_ordered(q, &rq)) | 700 | if (blk_do_ordered(q, &rq)) |
701 | return rq; | 701 | return rq; |
702 | } | 702 | } |
703 | 703 | ||
704 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) | 704 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) |
705 | return NULL; | 705 | return NULL; |
706 | } | 706 | } |
707 | } | 707 | } |
708 | 708 | ||
709 | struct request *elv_next_request(struct request_queue *q) | 709 | struct request *elv_next_request(struct request_queue *q) |
710 | { | 710 | { |
711 | struct request *rq; | 711 | struct request *rq; |
712 | int ret; | 712 | int ret; |
713 | 713 | ||
714 | while ((rq = __elv_next_request(q)) != NULL) { | 714 | while ((rq = __elv_next_request(q)) != NULL) { |
715 | /* | ||
716 | * Kill the empty barrier place holder, the driver must | ||
717 | * not ever see it. | ||
718 | */ | ||
719 | if (blk_empty_barrier(rq)) { | ||
720 | end_queued_request(rq, 1); | ||
721 | continue; | ||
722 | } | ||
715 | if (!(rq->cmd_flags & REQ_STARTED)) { | 723 | if (!(rq->cmd_flags & REQ_STARTED)) { |
716 | /* | 724 | /* |
717 | * This is the first time the device driver | 725 | * This is the first time the device driver |
718 | * sees this request (possibly after | 726 | * sees this request (possibly after |
719 | * requeueing). Notify IO scheduler. | 727 | * requeueing). Notify IO scheduler. |
720 | */ | 728 | */ |
721 | if (blk_sorted_rq(rq)) | 729 | if (blk_sorted_rq(rq)) |
722 | elv_activate_rq(q, rq); | 730 | elv_activate_rq(q, rq); |
723 | 731 | ||
724 | /* | 732 | /* |
725 | * just mark as started even if we don't start | 733 | * just mark as started even if we don't start |
726 | * it, a request that has been delayed should | 734 | * it, a request that has been delayed should |
727 | * not be passed by new incoming requests | 735 | * not be passed by new incoming requests |
728 | */ | 736 | */ |
729 | rq->cmd_flags |= REQ_STARTED; | 737 | rq->cmd_flags |= REQ_STARTED; |
730 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); | 738 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); |
731 | } | 739 | } |
732 | 740 | ||
733 | if (!q->boundary_rq || q->boundary_rq == rq) { | 741 | if (!q->boundary_rq || q->boundary_rq == rq) { |
734 | q->end_sector = rq_end_sector(rq); | 742 | q->end_sector = rq_end_sector(rq); |
735 | q->boundary_rq = NULL; | 743 | q->boundary_rq = NULL; |
736 | } | 744 | } |
737 | 745 | ||
738 | if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) | 746 | if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) |
739 | break; | 747 | break; |
740 | 748 | ||
741 | ret = q->prep_rq_fn(q, rq); | 749 | ret = q->prep_rq_fn(q, rq); |
742 | if (ret == BLKPREP_OK) { | 750 | if (ret == BLKPREP_OK) { |
743 | break; | 751 | break; |
744 | } else if (ret == BLKPREP_DEFER) { | 752 | } else if (ret == BLKPREP_DEFER) { |
745 | /* | 753 | /* |
746 | * the request may have been (partially) prepped. | 754 | * the request may have been (partially) prepped. |
747 | * we need to keep this request in the front to | 755 | * we need to keep this request in the front to |
748 | * avoid resource deadlock. REQ_STARTED will | 756 | * avoid resource deadlock. REQ_STARTED will |
749 | * prevent other fs requests from passing this one. | 757 | * prevent other fs requests from passing this one. |
750 | */ | 758 | */ |
751 | rq = NULL; | 759 | rq = NULL; |
752 | break; | 760 | break; |
753 | } else if (ret == BLKPREP_KILL) { | 761 | } else if (ret == BLKPREP_KILL) { |
754 | rq->cmd_flags |= REQ_QUIET; | 762 | rq->cmd_flags |= REQ_QUIET; |
755 | end_queued_request(rq, 0); | 763 | end_queued_request(rq, 0); |
756 | } else { | 764 | } else { |
757 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, | 765 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, |
758 | ret); | 766 | ret); |
759 | break; | 767 | break; |
760 | } | 768 | } |
761 | } | 769 | } |
762 | 770 | ||
763 | return rq; | 771 | return rq; |
764 | } | 772 | } |
765 | 773 | ||
766 | EXPORT_SYMBOL(elv_next_request); | 774 | EXPORT_SYMBOL(elv_next_request); |
767 | 775 | ||
768 | void elv_dequeue_request(struct request_queue *q, struct request *rq) | 776 | void elv_dequeue_request(struct request_queue *q, struct request *rq) |
769 | { | 777 | { |
770 | BUG_ON(list_empty(&rq->queuelist)); | 778 | BUG_ON(list_empty(&rq->queuelist)); |
771 | BUG_ON(ELV_ON_HASH(rq)); | 779 | BUG_ON(ELV_ON_HASH(rq)); |
772 | 780 | ||
773 | list_del_init(&rq->queuelist); | 781 | list_del_init(&rq->queuelist); |
774 | 782 | ||
775 | /* | 783 | /* |
776 | * the time frame between a request being removed from the lists | 784 | * the time frame between a request being removed from the lists |
777 | * and to it is freed is accounted as io that is in progress at | 785 | * and to it is freed is accounted as io that is in progress at |
778 | * the driver side. | 786 | * the driver side. |
779 | */ | 787 | */ |
780 | if (blk_account_rq(rq)) | 788 | if (blk_account_rq(rq)) |
781 | q->in_flight++; | 789 | q->in_flight++; |
782 | } | 790 | } |
783 | 791 | ||
784 | EXPORT_SYMBOL(elv_dequeue_request); | 792 | EXPORT_SYMBOL(elv_dequeue_request); |
785 | 793 | ||
786 | int elv_queue_empty(struct request_queue *q) | 794 | int elv_queue_empty(struct request_queue *q) |
787 | { | 795 | { |
788 | elevator_t *e = q->elevator; | 796 | elevator_t *e = q->elevator; |
789 | 797 | ||
790 | if (!list_empty(&q->queue_head)) | 798 | if (!list_empty(&q->queue_head)) |
791 | return 0; | 799 | return 0; |
792 | 800 | ||
793 | if (e->ops->elevator_queue_empty_fn) | 801 | if (e->ops->elevator_queue_empty_fn) |
794 | return e->ops->elevator_queue_empty_fn(q); | 802 | return e->ops->elevator_queue_empty_fn(q); |
795 | 803 | ||
796 | return 1; | 804 | return 1; |
797 | } | 805 | } |
798 | 806 | ||
799 | EXPORT_SYMBOL(elv_queue_empty); | 807 | EXPORT_SYMBOL(elv_queue_empty); |
800 | 808 | ||
801 | struct request *elv_latter_request(struct request_queue *q, struct request *rq) | 809 | struct request *elv_latter_request(struct request_queue *q, struct request *rq) |
802 | { | 810 | { |
803 | elevator_t *e = q->elevator; | 811 | elevator_t *e = q->elevator; |
804 | 812 | ||
805 | if (e->ops->elevator_latter_req_fn) | 813 | if (e->ops->elevator_latter_req_fn) |
806 | return e->ops->elevator_latter_req_fn(q, rq); | 814 | return e->ops->elevator_latter_req_fn(q, rq); |
807 | return NULL; | 815 | return NULL; |
808 | } | 816 | } |
809 | 817 | ||
810 | struct request *elv_former_request(struct request_queue *q, struct request *rq) | 818 | struct request *elv_former_request(struct request_queue *q, struct request *rq) |
811 | { | 819 | { |
812 | elevator_t *e = q->elevator; | 820 | elevator_t *e = q->elevator; |
813 | 821 | ||
814 | if (e->ops->elevator_former_req_fn) | 822 | if (e->ops->elevator_former_req_fn) |
815 | return e->ops->elevator_former_req_fn(q, rq); | 823 | return e->ops->elevator_former_req_fn(q, rq); |
816 | return NULL; | 824 | return NULL; |
817 | } | 825 | } |
818 | 826 | ||
819 | int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | 827 | int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
820 | { | 828 | { |
821 | elevator_t *e = q->elevator; | 829 | elevator_t *e = q->elevator; |
822 | 830 | ||
823 | if (e->ops->elevator_set_req_fn) | 831 | if (e->ops->elevator_set_req_fn) |
824 | return e->ops->elevator_set_req_fn(q, rq, gfp_mask); | 832 | return e->ops->elevator_set_req_fn(q, rq, gfp_mask); |
825 | 833 | ||
826 | rq->elevator_private = NULL; | 834 | rq->elevator_private = NULL; |
827 | return 0; | 835 | return 0; |
828 | } | 836 | } |
829 | 837 | ||
830 | void elv_put_request(struct request_queue *q, struct request *rq) | 838 | void elv_put_request(struct request_queue *q, struct request *rq) |
831 | { | 839 | { |
832 | elevator_t *e = q->elevator; | 840 | elevator_t *e = q->elevator; |
833 | 841 | ||
834 | if (e->ops->elevator_put_req_fn) | 842 | if (e->ops->elevator_put_req_fn) |
835 | e->ops->elevator_put_req_fn(rq); | 843 | e->ops->elevator_put_req_fn(rq); |
836 | } | 844 | } |
837 | 845 | ||
838 | int elv_may_queue(struct request_queue *q, int rw) | 846 | int elv_may_queue(struct request_queue *q, int rw) |
839 | { | 847 | { |
840 | elevator_t *e = q->elevator; | 848 | elevator_t *e = q->elevator; |
841 | 849 | ||
842 | if (e->ops->elevator_may_queue_fn) | 850 | if (e->ops->elevator_may_queue_fn) |
843 | return e->ops->elevator_may_queue_fn(q, rw); | 851 | return e->ops->elevator_may_queue_fn(q, rw); |
844 | 852 | ||
845 | return ELV_MQUEUE_MAY; | 853 | return ELV_MQUEUE_MAY; |
846 | } | 854 | } |
847 | 855 | ||
848 | void elv_completed_request(struct request_queue *q, struct request *rq) | 856 | void elv_completed_request(struct request_queue *q, struct request *rq) |
849 | { | 857 | { |
850 | elevator_t *e = q->elevator; | 858 | elevator_t *e = q->elevator; |
851 | 859 | ||
852 | /* | 860 | /* |
853 | * request is released from the driver, io must be done | 861 | * request is released from the driver, io must be done |
854 | */ | 862 | */ |
855 | if (blk_account_rq(rq)) { | 863 | if (blk_account_rq(rq)) { |
856 | q->in_flight--; | 864 | q->in_flight--; |
857 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) | 865 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) |
858 | e->ops->elevator_completed_req_fn(q, rq); | 866 | e->ops->elevator_completed_req_fn(q, rq); |
859 | } | 867 | } |
860 | 868 | ||
861 | /* | 869 | /* |
862 | * Check if the queue is waiting for fs requests to be | 870 | * Check if the queue is waiting for fs requests to be |
863 | * drained for flush sequence. | 871 | * drained for flush sequence. |
864 | */ | 872 | */ |
865 | if (unlikely(q->ordseq)) { | 873 | if (unlikely(q->ordseq)) { |
866 | struct request *first_rq = list_entry_rq(q->queue_head.next); | 874 | struct request *first_rq = list_entry_rq(q->queue_head.next); |
867 | if (q->in_flight == 0 && | 875 | if (q->in_flight == 0 && |
868 | blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && | 876 | blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && |
869 | blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { | 877 | blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { |
870 | blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); | 878 | blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); |
871 | q->request_fn(q); | 879 | q->request_fn(q); |
872 | } | 880 | } |
873 | } | 881 | } |
874 | } | 882 | } |
875 | 883 | ||
876 | #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) | 884 | #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) |
877 | 885 | ||
878 | static ssize_t | 886 | static ssize_t |
879 | elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | 887 | elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) |
880 | { | 888 | { |
881 | elevator_t *e = container_of(kobj, elevator_t, kobj); | 889 | elevator_t *e = container_of(kobj, elevator_t, kobj); |
882 | struct elv_fs_entry *entry = to_elv(attr); | 890 | struct elv_fs_entry *entry = to_elv(attr); |
883 | ssize_t error; | 891 | ssize_t error; |
884 | 892 | ||
885 | if (!entry->show) | 893 | if (!entry->show) |
886 | return -EIO; | 894 | return -EIO; |
887 | 895 | ||
888 | mutex_lock(&e->sysfs_lock); | 896 | mutex_lock(&e->sysfs_lock); |
889 | error = e->ops ? entry->show(e, page) : -ENOENT; | 897 | error = e->ops ? entry->show(e, page) : -ENOENT; |
890 | mutex_unlock(&e->sysfs_lock); | 898 | mutex_unlock(&e->sysfs_lock); |
891 | return error; | 899 | return error; |
892 | } | 900 | } |
893 | 901 | ||
894 | static ssize_t | 902 | static ssize_t |
895 | elv_attr_store(struct kobject *kobj, struct attribute *attr, | 903 | elv_attr_store(struct kobject *kobj, struct attribute *attr, |
896 | const char *page, size_t length) | 904 | const char *page, size_t length) |
897 | { | 905 | { |
898 | elevator_t *e = container_of(kobj, elevator_t, kobj); | 906 | elevator_t *e = container_of(kobj, elevator_t, kobj); |
899 | struct elv_fs_entry *entry = to_elv(attr); | 907 | struct elv_fs_entry *entry = to_elv(attr); |
900 | ssize_t error; | 908 | ssize_t error; |
901 | 909 | ||
902 | if (!entry->store) | 910 | if (!entry->store) |
903 | return -EIO; | 911 | return -EIO; |
904 | 912 | ||
905 | mutex_lock(&e->sysfs_lock); | 913 | mutex_lock(&e->sysfs_lock); |
906 | error = e->ops ? entry->store(e, page, length) : -ENOENT; | 914 | error = e->ops ? entry->store(e, page, length) : -ENOENT; |
907 | mutex_unlock(&e->sysfs_lock); | 915 | mutex_unlock(&e->sysfs_lock); |
908 | return error; | 916 | return error; |
909 | } | 917 | } |
910 | 918 | ||
911 | static struct sysfs_ops elv_sysfs_ops = { | 919 | static struct sysfs_ops elv_sysfs_ops = { |
912 | .show = elv_attr_show, | 920 | .show = elv_attr_show, |
913 | .store = elv_attr_store, | 921 | .store = elv_attr_store, |
914 | }; | 922 | }; |
915 | 923 | ||
916 | static struct kobj_type elv_ktype = { | 924 | static struct kobj_type elv_ktype = { |
917 | .sysfs_ops = &elv_sysfs_ops, | 925 | .sysfs_ops = &elv_sysfs_ops, |
918 | .release = elevator_release, | 926 | .release = elevator_release, |
919 | }; | 927 | }; |
920 | 928 | ||
921 | int elv_register_queue(struct request_queue *q) | 929 | int elv_register_queue(struct request_queue *q) |
922 | { | 930 | { |
923 | elevator_t *e = q->elevator; | 931 | elevator_t *e = q->elevator; |
924 | int error; | 932 | int error; |
925 | 933 | ||
926 | e->kobj.parent = &q->kobj; | 934 | e->kobj.parent = &q->kobj; |
927 | 935 | ||
928 | error = kobject_add(&e->kobj); | 936 | error = kobject_add(&e->kobj); |
929 | if (!error) { | 937 | if (!error) { |
930 | struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; | 938 | struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; |
931 | if (attr) { | 939 | if (attr) { |
932 | while (attr->attr.name) { | 940 | while (attr->attr.name) { |
933 | if (sysfs_create_file(&e->kobj, &attr->attr)) | 941 | if (sysfs_create_file(&e->kobj, &attr->attr)) |
934 | break; | 942 | break; |
935 | attr++; | 943 | attr++; |
936 | } | 944 | } |
937 | } | 945 | } |
938 | kobject_uevent(&e->kobj, KOBJ_ADD); | 946 | kobject_uevent(&e->kobj, KOBJ_ADD); |
939 | } | 947 | } |
940 | return error; | 948 | return error; |
941 | } | 949 | } |
942 | 950 | ||
943 | static void __elv_unregister_queue(elevator_t *e) | 951 | static void __elv_unregister_queue(elevator_t *e) |
944 | { | 952 | { |
945 | kobject_uevent(&e->kobj, KOBJ_REMOVE); | 953 | kobject_uevent(&e->kobj, KOBJ_REMOVE); |
946 | kobject_del(&e->kobj); | 954 | kobject_del(&e->kobj); |
947 | } | 955 | } |
948 | 956 | ||
949 | void elv_unregister_queue(struct request_queue *q) | 957 | void elv_unregister_queue(struct request_queue *q) |
950 | { | 958 | { |
951 | if (q) | 959 | if (q) |
952 | __elv_unregister_queue(q->elevator); | 960 | __elv_unregister_queue(q->elevator); |
953 | } | 961 | } |
954 | 962 | ||
955 | int elv_register(struct elevator_type *e) | 963 | int elv_register(struct elevator_type *e) |
956 | { | 964 | { |
957 | char *def = ""; | 965 | char *def = ""; |
958 | 966 | ||
959 | spin_lock(&elv_list_lock); | 967 | spin_lock(&elv_list_lock); |
960 | BUG_ON(elevator_find(e->elevator_name)); | 968 | BUG_ON(elevator_find(e->elevator_name)); |
961 | list_add_tail(&e->list, &elv_list); | 969 | list_add_tail(&e->list, &elv_list); |
962 | spin_unlock(&elv_list_lock); | 970 | spin_unlock(&elv_list_lock); |
963 | 971 | ||
964 | if (!strcmp(e->elevator_name, chosen_elevator) || | 972 | if (!strcmp(e->elevator_name, chosen_elevator) || |
965 | (!*chosen_elevator && | 973 | (!*chosen_elevator && |
966 | !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) | 974 | !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) |
967 | def = " (default)"; | 975 | def = " (default)"; |
968 | 976 | ||
969 | printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def); | 977 | printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def); |
970 | return 0; | 978 | return 0; |
971 | } | 979 | } |
972 | EXPORT_SYMBOL_GPL(elv_register); | 980 | EXPORT_SYMBOL_GPL(elv_register); |
973 | 981 | ||
974 | void elv_unregister(struct elevator_type *e) | 982 | void elv_unregister(struct elevator_type *e) |
975 | { | 983 | { |
976 | struct task_struct *g, *p; | 984 | struct task_struct *g, *p; |
977 | 985 | ||
978 | /* | 986 | /* |
979 | * Iterate every thread in the process to remove the io contexts. | 987 | * Iterate every thread in the process to remove the io contexts. |
980 | */ | 988 | */ |
981 | if (e->ops.trim) { | 989 | if (e->ops.trim) { |
982 | read_lock(&tasklist_lock); | 990 | read_lock(&tasklist_lock); |
983 | do_each_thread(g, p) { | 991 | do_each_thread(g, p) { |
984 | task_lock(p); | 992 | task_lock(p); |
985 | if (p->io_context) | 993 | if (p->io_context) |
986 | e->ops.trim(p->io_context); | 994 | e->ops.trim(p->io_context); |
987 | task_unlock(p); | 995 | task_unlock(p); |
988 | } while_each_thread(g, p); | 996 | } while_each_thread(g, p); |
989 | read_unlock(&tasklist_lock); | 997 | read_unlock(&tasklist_lock); |
990 | } | 998 | } |
991 | 999 | ||
992 | spin_lock(&elv_list_lock); | 1000 | spin_lock(&elv_list_lock); |
993 | list_del_init(&e->list); | 1001 | list_del_init(&e->list); |
994 | spin_unlock(&elv_list_lock); | 1002 | spin_unlock(&elv_list_lock); |
995 | } | 1003 | } |
996 | EXPORT_SYMBOL_GPL(elv_unregister); | 1004 | EXPORT_SYMBOL_GPL(elv_unregister); |
997 | 1005 | ||
998 | /* | 1006 | /* |
999 | * switch to new_e io scheduler. be careful not to introduce deadlocks - | 1007 | * switch to new_e io scheduler. be careful not to introduce deadlocks - |
1000 | * we don't free the old io scheduler, before we have allocated what we | 1008 | * we don't free the old io scheduler, before we have allocated what we |
1001 | * need for the new one. this way we have a chance of going back to the old | 1009 | * need for the new one. this way we have a chance of going back to the old |
1002 | * one, if the new one fails init for some reason. | 1010 | * one, if the new one fails init for some reason. |
1003 | */ | 1011 | */ |
1004 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | 1012 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) |
1005 | { | 1013 | { |
1006 | elevator_t *old_elevator, *e; | 1014 | elevator_t *old_elevator, *e; |
1007 | void *data; | 1015 | void *data; |
1008 | 1016 | ||
1009 | /* | 1017 | /* |
1010 | * Allocate new elevator | 1018 | * Allocate new elevator |
1011 | */ | 1019 | */ |
1012 | e = elevator_alloc(q, new_e); | 1020 | e = elevator_alloc(q, new_e); |
1013 | if (!e) | 1021 | if (!e) |
1014 | return 0; | 1022 | return 0; |
1015 | 1023 | ||
1016 | data = elevator_init_queue(q, e); | 1024 | data = elevator_init_queue(q, e); |
1017 | if (!data) { | 1025 | if (!data) { |
1018 | kobject_put(&e->kobj); | 1026 | kobject_put(&e->kobj); |
1019 | return 0; | 1027 | return 0; |
1020 | } | 1028 | } |
1021 | 1029 | ||
1022 | /* | 1030 | /* |
1023 | * Turn on BYPASS and drain all requests w/ elevator private data | 1031 | * Turn on BYPASS and drain all requests w/ elevator private data |
1024 | */ | 1032 | */ |
1025 | spin_lock_irq(q->queue_lock); | 1033 | spin_lock_irq(q->queue_lock); |
1026 | 1034 | ||
1027 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 1035 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
1028 | 1036 | ||
1029 | elv_drain_elevator(q); | 1037 | elv_drain_elevator(q); |
1030 | 1038 | ||
1031 | while (q->rq.elvpriv) { | 1039 | while (q->rq.elvpriv) { |
1032 | blk_remove_plug(q); | 1040 | blk_remove_plug(q); |
1033 | q->request_fn(q); | 1041 | q->request_fn(q); |
1034 | spin_unlock_irq(q->queue_lock); | 1042 | spin_unlock_irq(q->queue_lock); |
1035 | msleep(10); | 1043 | msleep(10); |
1036 | spin_lock_irq(q->queue_lock); | 1044 | spin_lock_irq(q->queue_lock); |
1037 | elv_drain_elevator(q); | 1045 | elv_drain_elevator(q); |
1038 | } | 1046 | } |
1039 | 1047 | ||
1040 | /* | 1048 | /* |
1041 | * Remember old elevator. | 1049 | * Remember old elevator. |
1042 | */ | 1050 | */ |
1043 | old_elevator = q->elevator; | 1051 | old_elevator = q->elevator; |
1044 | 1052 | ||
1045 | /* | 1053 | /* |
1046 | * attach and start new elevator | 1054 | * attach and start new elevator |
1047 | */ | 1055 | */ |
1048 | elevator_attach(q, e, data); | 1056 | elevator_attach(q, e, data); |
1049 | 1057 | ||
1050 | spin_unlock_irq(q->queue_lock); | 1058 | spin_unlock_irq(q->queue_lock); |
1051 | 1059 | ||
1052 | __elv_unregister_queue(old_elevator); | 1060 | __elv_unregister_queue(old_elevator); |
1053 | 1061 | ||
1054 | if (elv_register_queue(q)) | 1062 | if (elv_register_queue(q)) |
1055 | goto fail_register; | 1063 | goto fail_register; |
1056 | 1064 | ||
1057 | /* | 1065 | /* |
1058 | * finally exit old elevator and turn off BYPASS. | 1066 | * finally exit old elevator and turn off BYPASS. |
1059 | */ | 1067 | */ |
1060 | elevator_exit(old_elevator); | 1068 | elevator_exit(old_elevator); |
1061 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 1069 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
1062 | return 1; | 1070 | return 1; |
1063 | 1071 | ||
1064 | fail_register: | 1072 | fail_register: |
1065 | /* | 1073 | /* |
1066 | * switch failed, exit the new io scheduler and reattach the old | 1074 | * switch failed, exit the new io scheduler and reattach the old |
1067 | * one again (along with re-adding the sysfs dir) | 1075 | * one again (along with re-adding the sysfs dir) |
1068 | */ | 1076 | */ |
1069 | elevator_exit(e); | 1077 | elevator_exit(e); |
1070 | q->elevator = old_elevator; | 1078 | q->elevator = old_elevator; |
1071 | elv_register_queue(q); | 1079 | elv_register_queue(q); |
1072 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 1080 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
1073 | return 0; | 1081 | return 0; |
1074 | } | 1082 | } |
1075 | 1083 | ||
1076 | ssize_t elv_iosched_store(struct request_queue *q, const char *name, | 1084 | ssize_t elv_iosched_store(struct request_queue *q, const char *name, |
1077 | size_t count) | 1085 | size_t count) |
1078 | { | 1086 | { |
1079 | char elevator_name[ELV_NAME_MAX]; | 1087 | char elevator_name[ELV_NAME_MAX]; |
1080 | size_t len; | 1088 | size_t len; |
1081 | struct elevator_type *e; | 1089 | struct elevator_type *e; |
1082 | 1090 | ||
1083 | elevator_name[sizeof(elevator_name) - 1] = '\0'; | 1091 | elevator_name[sizeof(elevator_name) - 1] = '\0'; |
1084 | strncpy(elevator_name, name, sizeof(elevator_name) - 1); | 1092 | strncpy(elevator_name, name, sizeof(elevator_name) - 1); |
1085 | len = strlen(elevator_name); | 1093 | len = strlen(elevator_name); |
1086 | 1094 | ||
1087 | if (len && elevator_name[len - 1] == '\n') | 1095 | if (len && elevator_name[len - 1] == '\n') |
1088 | elevator_name[len - 1] = '\0'; | 1096 | elevator_name[len - 1] = '\0'; |
1089 | 1097 | ||
1090 | e = elevator_get(elevator_name); | 1098 | e = elevator_get(elevator_name); |
1091 | if (!e) { | 1099 | if (!e) { |
1092 | printk(KERN_ERR "elevator: type %s not found\n", elevator_name); | 1100 | printk(KERN_ERR "elevator: type %s not found\n", elevator_name); |
1093 | return -EINVAL; | 1101 | return -EINVAL; |
1094 | } | 1102 | } |
1095 | 1103 | ||
1096 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { | 1104 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { |
1097 | elevator_put(e); | 1105 | elevator_put(e); |
1098 | return count; | 1106 | return count; |
1099 | } | 1107 | } |
1100 | 1108 | ||
1101 | if (!elevator_switch(q, e)) | 1109 | if (!elevator_switch(q, e)) |
1102 | printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); | 1110 | printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); |
1103 | return count; | 1111 | return count; |
1104 | } | 1112 | } |
1105 | 1113 | ||
1106 | ssize_t elv_iosched_show(struct request_queue *q, char *name) | 1114 | ssize_t elv_iosched_show(struct request_queue *q, char *name) |
1107 | { | 1115 | { |
1108 | elevator_t *e = q->elevator; | 1116 | elevator_t *e = q->elevator; |
1109 | struct elevator_type *elv = e->elevator_type; | 1117 | struct elevator_type *elv = e->elevator_type; |
1110 | struct elevator_type *__e; | 1118 | struct elevator_type *__e; |
1111 | int len = 0; | 1119 | int len = 0; |
1112 | 1120 | ||
1113 | spin_lock(&elv_list_lock); | 1121 | spin_lock(&elv_list_lock); |
1114 | list_for_each_entry(__e, &elv_list, list) { | 1122 | list_for_each_entry(__e, &elv_list, list) { |
1115 | if (!strcmp(elv->elevator_name, __e->elevator_name)) | 1123 | if (!strcmp(elv->elevator_name, __e->elevator_name)) |
1116 | len += sprintf(name+len, "[%s] ", elv->elevator_name); | 1124 | len += sprintf(name+len, "[%s] ", elv->elevator_name); |
1117 | else | 1125 | else |
1118 | len += sprintf(name+len, "%s ", __e->elevator_name); | 1126 | len += sprintf(name+len, "%s ", __e->elevator_name); |
1119 | } | 1127 | } |
1120 | spin_unlock(&elv_list_lock); | 1128 | spin_unlock(&elv_list_lock); |
1121 | 1129 | ||
1122 | len += sprintf(len+name, "\n"); | 1130 | len += sprintf(len+name, "\n"); |
1123 | return len; | 1131 | return len; |
1124 | } | 1132 | } |
1125 | 1133 | ||
1126 | struct request *elv_rb_former_request(struct request_queue *q, | 1134 | struct request *elv_rb_former_request(struct request_queue *q, |
1127 | struct request *rq) | 1135 | struct request *rq) |
1128 | { | 1136 | { |
1129 | struct rb_node *rbprev = rb_prev(&rq->rb_node); | 1137 | struct rb_node *rbprev = rb_prev(&rq->rb_node); |
1130 | 1138 | ||
1131 | if (rbprev) | 1139 | if (rbprev) |
1132 | return rb_entry_rq(rbprev); | 1140 | return rb_entry_rq(rbprev); |
1133 | 1141 | ||
1134 | return NULL; | 1142 | return NULL; |
1135 | } | 1143 | } |
1136 | 1144 | ||
1137 | EXPORT_SYMBOL(elv_rb_former_request); | 1145 | EXPORT_SYMBOL(elv_rb_former_request); |
1138 | 1146 | ||
1139 | struct request *elv_rb_latter_request(struct request_queue *q, | 1147 | struct request *elv_rb_latter_request(struct request_queue *q, |
1140 | struct request *rq) | 1148 | struct request *rq) |
1141 | { | 1149 | { |
1142 | struct rb_node *rbnext = rb_next(&rq->rb_node); | 1150 | struct rb_node *rbnext = rb_next(&rq->rb_node); |
1143 | 1151 | ||
1144 | if (rbnext) | 1152 | if (rbnext) |
1145 | return rb_entry_rq(rbnext); | 1153 | return rb_entry_rq(rbnext); |
1146 | 1154 | ||
1147 | return NULL; | 1155 | return NULL; |
1148 | } | 1156 | } |
1149 | 1157 | ||
1150 | EXPORT_SYMBOL(elv_rb_latter_request); | 1158 | EXPORT_SYMBOL(elv_rb_latter_request); |
1151 | 1159 |
block/ll_rw_blk.c
1 | /* | 1 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics | 3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics |
4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> | 5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> |
6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 | 6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 |
7 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 | 7 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 |
8 | */ | 8 | */ |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * This handles all read/write requests to block devices | 11 | * This handles all read/write requests to block devices |
12 | */ | 12 | */ |
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/backing-dev.h> | 15 | #include <linux/backing-dev.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/highmem.h> | 18 | #include <linux/highmem.h> |
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/kernel_stat.h> | 20 | #include <linux/kernel_stat.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ | 23 | #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ |
24 | #include <linux/completion.h> | 24 | #include <linux/completion.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/swap.h> | 26 | #include <linux/swap.h> |
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/task_io_accounting_ops.h> | 28 | #include <linux/task_io_accounting_ops.h> |
29 | #include <linux/interrupt.h> | 29 | #include <linux/interrupt.h> |
30 | #include <linux/cpu.h> | 30 | #include <linux/cpu.h> |
31 | #include <linux/blktrace_api.h> | 31 | #include <linux/blktrace_api.h> |
32 | #include <linux/fault-inject.h> | 32 | #include <linux/fault-inject.h> |
33 | 33 | ||
34 | /* | 34 | /* |
35 | * for max sense size | 35 | * for max sense size |
36 | */ | 36 | */ |
37 | #include <scsi/scsi_cmnd.h> | 37 | #include <scsi/scsi_cmnd.h> |
38 | 38 | ||
39 | static void blk_unplug_work(struct work_struct *work); | 39 | static void blk_unplug_work(struct work_struct *work); |
40 | static void blk_unplug_timeout(unsigned long data); | 40 | static void blk_unplug_timeout(unsigned long data); |
41 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); | 41 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); |
42 | static void init_request_from_bio(struct request *req, struct bio *bio); | 42 | static void init_request_from_bio(struct request *req, struct bio *bio); |
43 | static int __make_request(struct request_queue *q, struct bio *bio); | 43 | static int __make_request(struct request_queue *q, struct bio *bio); |
44 | static struct io_context *current_io_context(gfp_t gfp_flags, int node); | 44 | static struct io_context *current_io_context(gfp_t gfp_flags, int node); |
45 | static void blk_recalc_rq_segments(struct request *rq); | 45 | static void blk_recalc_rq_segments(struct request *rq); |
46 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 46 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
47 | struct bio *bio); | 47 | struct bio *bio); |
48 | 48 | ||
49 | /* | 49 | /* |
50 | * For the allocated request tables | 50 | * For the allocated request tables |
51 | */ | 51 | */ |
52 | static struct kmem_cache *request_cachep; | 52 | static struct kmem_cache *request_cachep; |
53 | 53 | ||
54 | /* | 54 | /* |
55 | * For queue allocation | 55 | * For queue allocation |
56 | */ | 56 | */ |
57 | static struct kmem_cache *requestq_cachep; | 57 | static struct kmem_cache *requestq_cachep; |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * For io context allocations | 60 | * For io context allocations |
61 | */ | 61 | */ |
62 | static struct kmem_cache *iocontext_cachep; | 62 | static struct kmem_cache *iocontext_cachep; |
63 | 63 | ||
64 | /* | 64 | /* |
65 | * Controlling structure to kblockd | 65 | * Controlling structure to kblockd |
66 | */ | 66 | */ |
67 | static struct workqueue_struct *kblockd_workqueue; | 67 | static struct workqueue_struct *kblockd_workqueue; |
68 | 68 | ||
69 | unsigned long blk_max_low_pfn, blk_max_pfn; | 69 | unsigned long blk_max_low_pfn, blk_max_pfn; |
70 | 70 | ||
71 | EXPORT_SYMBOL(blk_max_low_pfn); | 71 | EXPORT_SYMBOL(blk_max_low_pfn); |
72 | EXPORT_SYMBOL(blk_max_pfn); | 72 | EXPORT_SYMBOL(blk_max_pfn); |
73 | 73 | ||
74 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | 74 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); |
75 | 75 | ||
76 | /* Amount of time in which a process may batch requests */ | 76 | /* Amount of time in which a process may batch requests */ |
77 | #define BLK_BATCH_TIME (HZ/50UL) | 77 | #define BLK_BATCH_TIME (HZ/50UL) |
78 | 78 | ||
79 | /* Number of requests a "batching" process may submit */ | 79 | /* Number of requests a "batching" process may submit */ |
80 | #define BLK_BATCH_REQ 32 | 80 | #define BLK_BATCH_REQ 32 |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Return the threshold (number of used requests) at which the queue is | 83 | * Return the threshold (number of used requests) at which the queue is |
84 | * considered to be congested. It include a little hysteresis to keep the | 84 | * considered to be congested. It include a little hysteresis to keep the |
85 | * context switch rate down. | 85 | * context switch rate down. |
86 | */ | 86 | */ |
87 | static inline int queue_congestion_on_threshold(struct request_queue *q) | 87 | static inline int queue_congestion_on_threshold(struct request_queue *q) |
88 | { | 88 | { |
89 | return q->nr_congestion_on; | 89 | return q->nr_congestion_on; |
90 | } | 90 | } |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * The threshold at which a queue is considered to be uncongested | 93 | * The threshold at which a queue is considered to be uncongested |
94 | */ | 94 | */ |
95 | static inline int queue_congestion_off_threshold(struct request_queue *q) | 95 | static inline int queue_congestion_off_threshold(struct request_queue *q) |
96 | { | 96 | { |
97 | return q->nr_congestion_off; | 97 | return q->nr_congestion_off; |
98 | } | 98 | } |
99 | 99 | ||
100 | static void blk_queue_congestion_threshold(struct request_queue *q) | 100 | static void blk_queue_congestion_threshold(struct request_queue *q) |
101 | { | 101 | { |
102 | int nr; | 102 | int nr; |
103 | 103 | ||
104 | nr = q->nr_requests - (q->nr_requests / 8) + 1; | 104 | nr = q->nr_requests - (q->nr_requests / 8) + 1; |
105 | if (nr > q->nr_requests) | 105 | if (nr > q->nr_requests) |
106 | nr = q->nr_requests; | 106 | nr = q->nr_requests; |
107 | q->nr_congestion_on = nr; | 107 | q->nr_congestion_on = nr; |
108 | 108 | ||
109 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; | 109 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; |
110 | if (nr < 1) | 110 | if (nr < 1) |
111 | nr = 1; | 111 | nr = 1; |
112 | q->nr_congestion_off = nr; | 112 | q->nr_congestion_off = nr; |
113 | } | 113 | } |
114 | 114 | ||
115 | /** | 115 | /** |
116 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info | 116 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info |
117 | * @bdev: device | 117 | * @bdev: device |
118 | * | 118 | * |
119 | * Locates the passed device's request queue and returns the address of its | 119 | * Locates the passed device's request queue and returns the address of its |
120 | * backing_dev_info | 120 | * backing_dev_info |
121 | * | 121 | * |
122 | * Will return NULL if the request queue cannot be located. | 122 | * Will return NULL if the request queue cannot be located. |
123 | */ | 123 | */ |
124 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) | 124 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) |
125 | { | 125 | { |
126 | struct backing_dev_info *ret = NULL; | 126 | struct backing_dev_info *ret = NULL; |
127 | struct request_queue *q = bdev_get_queue(bdev); | 127 | struct request_queue *q = bdev_get_queue(bdev); |
128 | 128 | ||
129 | if (q) | 129 | if (q) |
130 | ret = &q->backing_dev_info; | 130 | ret = &q->backing_dev_info; |
131 | return ret; | 131 | return ret; |
132 | } | 132 | } |
133 | EXPORT_SYMBOL(blk_get_backing_dev_info); | 133 | EXPORT_SYMBOL(blk_get_backing_dev_info); |
134 | 134 | ||
135 | /** | 135 | /** |
136 | * blk_queue_prep_rq - set a prepare_request function for queue | 136 | * blk_queue_prep_rq - set a prepare_request function for queue |
137 | * @q: queue | 137 | * @q: queue |
138 | * @pfn: prepare_request function | 138 | * @pfn: prepare_request function |
139 | * | 139 | * |
140 | * It's possible for a queue to register a prepare_request callback which | 140 | * It's possible for a queue to register a prepare_request callback which |
141 | * is invoked before the request is handed to the request_fn. The goal of | 141 | * is invoked before the request is handed to the request_fn. The goal of |
142 | * the function is to prepare a request for I/O, it can be used to build a | 142 | * the function is to prepare a request for I/O, it can be used to build a |
143 | * cdb from the request data for instance. | 143 | * cdb from the request data for instance. |
144 | * | 144 | * |
145 | */ | 145 | */ |
146 | void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) | 146 | void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) |
147 | { | 147 | { |
148 | q->prep_rq_fn = pfn; | 148 | q->prep_rq_fn = pfn; |
149 | } | 149 | } |
150 | 150 | ||
151 | EXPORT_SYMBOL(blk_queue_prep_rq); | 151 | EXPORT_SYMBOL(blk_queue_prep_rq); |
152 | 152 | ||
153 | /** | 153 | /** |
154 | * blk_queue_merge_bvec - set a merge_bvec function for queue | 154 | * blk_queue_merge_bvec - set a merge_bvec function for queue |
155 | * @q: queue | 155 | * @q: queue |
156 | * @mbfn: merge_bvec_fn | 156 | * @mbfn: merge_bvec_fn |
157 | * | 157 | * |
158 | * Usually queues have static limitations on the max sectors or segments that | 158 | * Usually queues have static limitations on the max sectors or segments that |
159 | * we can put in a request. Stacking drivers may have some settings that | 159 | * we can put in a request. Stacking drivers may have some settings that |
160 | * are dynamic, and thus we have to query the queue whether it is ok to | 160 | * are dynamic, and thus we have to query the queue whether it is ok to |
161 | * add a new bio_vec to a bio at a given offset or not. If the block device | 161 | * add a new bio_vec to a bio at a given offset or not. If the block device |
162 | * has such limitations, it needs to register a merge_bvec_fn to control | 162 | * has such limitations, it needs to register a merge_bvec_fn to control |
163 | * the size of bio's sent to it. Note that a block device *must* allow a | 163 | * the size of bio's sent to it. Note that a block device *must* allow a |
164 | * single page to be added to an empty bio. The block device driver may want | 164 | * single page to be added to an empty bio. The block device driver may want |
165 | * to use the bio_split() function to deal with these bio's. By default | 165 | * to use the bio_split() function to deal with these bio's. By default |
166 | * no merge_bvec_fn is defined for a queue, and only the fixed limits are | 166 | * no merge_bvec_fn is defined for a queue, and only the fixed limits are |
167 | * honored. | 167 | * honored. |
168 | */ | 168 | */ |
169 | void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) | 169 | void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) |
170 | { | 170 | { |
171 | q->merge_bvec_fn = mbfn; | 171 | q->merge_bvec_fn = mbfn; |
172 | } | 172 | } |
173 | 173 | ||
174 | EXPORT_SYMBOL(blk_queue_merge_bvec); | 174 | EXPORT_SYMBOL(blk_queue_merge_bvec); |
175 | 175 | ||
176 | void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) | 176 | void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) |
177 | { | 177 | { |
178 | q->softirq_done_fn = fn; | 178 | q->softirq_done_fn = fn; |
179 | } | 179 | } |
180 | 180 | ||
181 | EXPORT_SYMBOL(blk_queue_softirq_done); | 181 | EXPORT_SYMBOL(blk_queue_softirq_done); |
182 | 182 | ||
183 | /** | 183 | /** |
184 | * blk_queue_make_request - define an alternate make_request function for a device | 184 | * blk_queue_make_request - define an alternate make_request function for a device |
185 | * @q: the request queue for the device to be affected | 185 | * @q: the request queue for the device to be affected |
186 | * @mfn: the alternate make_request function | 186 | * @mfn: the alternate make_request function |
187 | * | 187 | * |
188 | * Description: | 188 | * Description: |
189 | * The normal way for &struct bios to be passed to a device | 189 | * The normal way for &struct bios to be passed to a device |
190 | * driver is for them to be collected into requests on a request | 190 | * driver is for them to be collected into requests on a request |
191 | * queue, and then to allow the device driver to select requests | 191 | * queue, and then to allow the device driver to select requests |
192 | * off that queue when it is ready. This works well for many block | 192 | * off that queue when it is ready. This works well for many block |
193 | * devices. However some block devices (typically virtual devices | 193 | * devices. However some block devices (typically virtual devices |
194 | * such as md or lvm) do not benefit from the processing on the | 194 | * such as md or lvm) do not benefit from the processing on the |
195 | * request queue, and are served best by having the requests passed | 195 | * request queue, and are served best by having the requests passed |
196 | * directly to them. This can be achieved by providing a function | 196 | * directly to them. This can be achieved by providing a function |
197 | * to blk_queue_make_request(). | 197 | * to blk_queue_make_request(). |
198 | * | 198 | * |
199 | * Caveat: | 199 | * Caveat: |
200 | * The driver that does this *must* be able to deal appropriately | 200 | * The driver that does this *must* be able to deal appropriately |
201 | * with buffers in "highmemory". This can be accomplished by either calling | 201 | * with buffers in "highmemory". This can be accomplished by either calling |
202 | * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling | 202 | * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling |
203 | * blk_queue_bounce() to create a buffer in normal memory. | 203 | * blk_queue_bounce() to create a buffer in normal memory. |
204 | **/ | 204 | **/ |
205 | void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn) | 205 | void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn) |
206 | { | 206 | { |
207 | /* | 207 | /* |
208 | * set defaults | 208 | * set defaults |
209 | */ | 209 | */ |
210 | q->nr_requests = BLKDEV_MAX_RQ; | 210 | q->nr_requests = BLKDEV_MAX_RQ; |
211 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); | 211 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); |
212 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); | 212 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); |
213 | q->make_request_fn = mfn; | 213 | q->make_request_fn = mfn; |
214 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 214 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
215 | q->backing_dev_info.state = 0; | 215 | q->backing_dev_info.state = 0; |
216 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; | 216 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; |
217 | blk_queue_max_sectors(q, SAFE_MAX_SECTORS); | 217 | blk_queue_max_sectors(q, SAFE_MAX_SECTORS); |
218 | blk_queue_hardsect_size(q, 512); | 218 | blk_queue_hardsect_size(q, 512); |
219 | blk_queue_dma_alignment(q, 511); | 219 | blk_queue_dma_alignment(q, 511); |
220 | blk_queue_congestion_threshold(q); | 220 | blk_queue_congestion_threshold(q); |
221 | q->nr_batching = BLK_BATCH_REQ; | 221 | q->nr_batching = BLK_BATCH_REQ; |
222 | 222 | ||
223 | q->unplug_thresh = 4; /* hmm */ | 223 | q->unplug_thresh = 4; /* hmm */ |
224 | q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ | 224 | q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ |
225 | if (q->unplug_delay == 0) | 225 | if (q->unplug_delay == 0) |
226 | q->unplug_delay = 1; | 226 | q->unplug_delay = 1; |
227 | 227 | ||
228 | INIT_WORK(&q->unplug_work, blk_unplug_work); | 228 | INIT_WORK(&q->unplug_work, blk_unplug_work); |
229 | 229 | ||
230 | q->unplug_timer.function = blk_unplug_timeout; | 230 | q->unplug_timer.function = blk_unplug_timeout; |
231 | q->unplug_timer.data = (unsigned long)q; | 231 | q->unplug_timer.data = (unsigned long)q; |
232 | 232 | ||
233 | /* | 233 | /* |
234 | * by default assume old behaviour and bounce for any highmem page | 234 | * by default assume old behaviour and bounce for any highmem page |
235 | */ | 235 | */ |
236 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | 236 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); |
237 | } | 237 | } |
238 | 238 | ||
239 | EXPORT_SYMBOL(blk_queue_make_request); | 239 | EXPORT_SYMBOL(blk_queue_make_request); |
240 | 240 | ||
241 | static void rq_init(struct request_queue *q, struct request *rq) | 241 | static void rq_init(struct request_queue *q, struct request *rq) |
242 | { | 242 | { |
243 | INIT_LIST_HEAD(&rq->queuelist); | 243 | INIT_LIST_HEAD(&rq->queuelist); |
244 | INIT_LIST_HEAD(&rq->donelist); | 244 | INIT_LIST_HEAD(&rq->donelist); |
245 | 245 | ||
246 | rq->errors = 0; | 246 | rq->errors = 0; |
247 | rq->bio = rq->biotail = NULL; | 247 | rq->bio = rq->biotail = NULL; |
248 | INIT_HLIST_NODE(&rq->hash); | 248 | INIT_HLIST_NODE(&rq->hash); |
249 | RB_CLEAR_NODE(&rq->rb_node); | 249 | RB_CLEAR_NODE(&rq->rb_node); |
250 | rq->ioprio = 0; | 250 | rq->ioprio = 0; |
251 | rq->buffer = NULL; | 251 | rq->buffer = NULL; |
252 | rq->ref_count = 1; | 252 | rq->ref_count = 1; |
253 | rq->q = q; | 253 | rq->q = q; |
254 | rq->special = NULL; | 254 | rq->special = NULL; |
255 | rq->data_len = 0; | 255 | rq->data_len = 0; |
256 | rq->data = NULL; | 256 | rq->data = NULL; |
257 | rq->nr_phys_segments = 0; | 257 | rq->nr_phys_segments = 0; |
258 | rq->sense = NULL; | 258 | rq->sense = NULL; |
259 | rq->end_io = NULL; | 259 | rq->end_io = NULL; |
260 | rq->end_io_data = NULL; | 260 | rq->end_io_data = NULL; |
261 | rq->completion_data = NULL; | 261 | rq->completion_data = NULL; |
262 | rq->next_rq = NULL; | 262 | rq->next_rq = NULL; |
263 | } | 263 | } |
264 | 264 | ||
265 | /** | 265 | /** |
266 | * blk_queue_ordered - does this queue support ordered writes | 266 | * blk_queue_ordered - does this queue support ordered writes |
267 | * @q: the request queue | 267 | * @q: the request queue |
268 | * @ordered: one of QUEUE_ORDERED_* | 268 | * @ordered: one of QUEUE_ORDERED_* |
269 | * @prepare_flush_fn: rq setup helper for cache flush ordered writes | 269 | * @prepare_flush_fn: rq setup helper for cache flush ordered writes |
270 | * | 270 | * |
271 | * Description: | 271 | * Description: |
272 | * For journalled file systems, doing ordered writes on a commit | 272 | * For journalled file systems, doing ordered writes on a commit |
273 | * block instead of explicitly doing wait_on_buffer (which is bad | 273 | * block instead of explicitly doing wait_on_buffer (which is bad |
274 | * for performance) can be a big win. Block drivers supporting this | 274 | * for performance) can be a big win. Block drivers supporting this |
275 | * feature should call this function and indicate so. | 275 | * feature should call this function and indicate so. |
276 | * | 276 | * |
277 | **/ | 277 | **/ |
278 | int blk_queue_ordered(struct request_queue *q, unsigned ordered, | 278 | int blk_queue_ordered(struct request_queue *q, unsigned ordered, |
279 | prepare_flush_fn *prepare_flush_fn) | 279 | prepare_flush_fn *prepare_flush_fn) |
280 | { | 280 | { |
281 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && | 281 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && |
282 | prepare_flush_fn == NULL) { | 282 | prepare_flush_fn == NULL) { |
283 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); | 283 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); |
284 | return -EINVAL; | 284 | return -EINVAL; |
285 | } | 285 | } |
286 | 286 | ||
287 | if (ordered != QUEUE_ORDERED_NONE && | 287 | if (ordered != QUEUE_ORDERED_NONE && |
288 | ordered != QUEUE_ORDERED_DRAIN && | 288 | ordered != QUEUE_ORDERED_DRAIN && |
289 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && | 289 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && |
290 | ordered != QUEUE_ORDERED_DRAIN_FUA && | 290 | ordered != QUEUE_ORDERED_DRAIN_FUA && |
291 | ordered != QUEUE_ORDERED_TAG && | 291 | ordered != QUEUE_ORDERED_TAG && |
292 | ordered != QUEUE_ORDERED_TAG_FLUSH && | 292 | ordered != QUEUE_ORDERED_TAG_FLUSH && |
293 | ordered != QUEUE_ORDERED_TAG_FUA) { | 293 | ordered != QUEUE_ORDERED_TAG_FUA) { |
294 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); | 294 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); |
295 | return -EINVAL; | 295 | return -EINVAL; |
296 | } | 296 | } |
297 | 297 | ||
298 | q->ordered = ordered; | 298 | q->ordered = ordered; |
299 | q->next_ordered = ordered; | 299 | q->next_ordered = ordered; |
300 | q->prepare_flush_fn = prepare_flush_fn; | 300 | q->prepare_flush_fn = prepare_flush_fn; |
301 | 301 | ||
302 | return 0; | 302 | return 0; |
303 | } | 303 | } |
304 | 304 | ||
305 | EXPORT_SYMBOL(blk_queue_ordered); | 305 | EXPORT_SYMBOL(blk_queue_ordered); |
306 | 306 | ||
307 | /** | 307 | /** |
308 | * blk_queue_issue_flush_fn - set function for issuing a flush | 308 | * blk_queue_issue_flush_fn - set function for issuing a flush |
309 | * @q: the request queue | 309 | * @q: the request queue |
310 | * @iff: the function to be called issuing the flush | 310 | * @iff: the function to be called issuing the flush |
311 | * | 311 | * |
312 | * Description: | 312 | * Description: |
313 | * If a driver supports issuing a flush command, the support is notified | 313 | * If a driver supports issuing a flush command, the support is notified |
314 | * to the block layer by defining it through this call. | 314 | * to the block layer by defining it through this call. |
315 | * | 315 | * |
316 | **/ | 316 | **/ |
317 | void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff) | 317 | void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff) |
318 | { | 318 | { |
319 | q->issue_flush_fn = iff; | 319 | q->issue_flush_fn = iff; |
320 | } | 320 | } |
321 | 321 | ||
322 | EXPORT_SYMBOL(blk_queue_issue_flush_fn); | 322 | EXPORT_SYMBOL(blk_queue_issue_flush_fn); |
323 | 323 | ||
324 | /* | 324 | /* |
325 | * Cache flushing for ordered writes handling | 325 | * Cache flushing for ordered writes handling |
326 | */ | 326 | */ |
327 | inline unsigned blk_ordered_cur_seq(struct request_queue *q) | 327 | inline unsigned blk_ordered_cur_seq(struct request_queue *q) |
328 | { | 328 | { |
329 | if (!q->ordseq) | 329 | if (!q->ordseq) |
330 | return 0; | 330 | return 0; |
331 | return 1 << ffz(q->ordseq); | 331 | return 1 << ffz(q->ordseq); |
332 | } | 332 | } |
333 | 333 | ||
334 | unsigned blk_ordered_req_seq(struct request *rq) | 334 | unsigned blk_ordered_req_seq(struct request *rq) |
335 | { | 335 | { |
336 | struct request_queue *q = rq->q; | 336 | struct request_queue *q = rq->q; |
337 | 337 | ||
338 | BUG_ON(q->ordseq == 0); | 338 | BUG_ON(q->ordseq == 0); |
339 | 339 | ||
340 | if (rq == &q->pre_flush_rq) | 340 | if (rq == &q->pre_flush_rq) |
341 | return QUEUE_ORDSEQ_PREFLUSH; | 341 | return QUEUE_ORDSEQ_PREFLUSH; |
342 | if (rq == &q->bar_rq) | 342 | if (rq == &q->bar_rq) |
343 | return QUEUE_ORDSEQ_BAR; | 343 | return QUEUE_ORDSEQ_BAR; |
344 | if (rq == &q->post_flush_rq) | 344 | if (rq == &q->post_flush_rq) |
345 | return QUEUE_ORDSEQ_POSTFLUSH; | 345 | return QUEUE_ORDSEQ_POSTFLUSH; |
346 | 346 | ||
347 | /* | 347 | /* |
348 | * !fs requests don't need to follow barrier ordering. Always | 348 | * !fs requests don't need to follow barrier ordering. Always |
349 | * put them at the front. This fixes the following deadlock. | 349 | * put them at the front. This fixes the following deadlock. |
350 | * | 350 | * |
351 | * http://thread.gmane.org/gmane.linux.kernel/537473 | 351 | * http://thread.gmane.org/gmane.linux.kernel/537473 |
352 | */ | 352 | */ |
353 | if (!blk_fs_request(rq)) | 353 | if (!blk_fs_request(rq)) |
354 | return QUEUE_ORDSEQ_DRAIN; | 354 | return QUEUE_ORDSEQ_DRAIN; |
355 | 355 | ||
356 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == | 356 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == |
357 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) | 357 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) |
358 | return QUEUE_ORDSEQ_DRAIN; | 358 | return QUEUE_ORDSEQ_DRAIN; |
359 | else | 359 | else |
360 | return QUEUE_ORDSEQ_DONE; | 360 | return QUEUE_ORDSEQ_DONE; |
361 | } | 361 | } |
362 | 362 | ||
363 | void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) | 363 | void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) |
364 | { | 364 | { |
365 | struct request *rq; | 365 | struct request *rq; |
366 | int uptodate; | 366 | int uptodate; |
367 | 367 | ||
368 | if (error && !q->orderr) | 368 | if (error && !q->orderr) |
369 | q->orderr = error; | 369 | q->orderr = error; |
370 | 370 | ||
371 | BUG_ON(q->ordseq & seq); | 371 | BUG_ON(q->ordseq & seq); |
372 | q->ordseq |= seq; | 372 | q->ordseq |= seq; |
373 | 373 | ||
374 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) | 374 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) |
375 | return; | 375 | return; |
376 | 376 | ||
377 | /* | 377 | /* |
378 | * Okay, sequence complete. | 378 | * Okay, sequence complete. |
379 | */ | 379 | */ |
380 | uptodate = 1; | 380 | uptodate = 1; |
381 | if (q->orderr) | 381 | if (q->orderr) |
382 | uptodate = q->orderr; | 382 | uptodate = q->orderr; |
383 | 383 | ||
384 | q->ordseq = 0; | 384 | q->ordseq = 0; |
385 | rq = q->orig_bar_rq; | 385 | rq = q->orig_bar_rq; |
386 | 386 | ||
387 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); | 387 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); |
388 | end_that_request_last(rq, uptodate); | 388 | end_that_request_last(rq, uptodate); |
389 | } | 389 | } |
390 | 390 | ||
391 | static void pre_flush_end_io(struct request *rq, int error) | 391 | static void pre_flush_end_io(struct request *rq, int error) |
392 | { | 392 | { |
393 | elv_completed_request(rq->q, rq); | 393 | elv_completed_request(rq->q, rq); |
394 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); | 394 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); |
395 | } | 395 | } |
396 | 396 | ||
397 | static void bar_end_io(struct request *rq, int error) | 397 | static void bar_end_io(struct request *rq, int error) |
398 | { | 398 | { |
399 | elv_completed_request(rq->q, rq); | 399 | elv_completed_request(rq->q, rq); |
400 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); | 400 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); |
401 | } | 401 | } |
402 | 402 | ||
403 | static void post_flush_end_io(struct request *rq, int error) | 403 | static void post_flush_end_io(struct request *rq, int error) |
404 | { | 404 | { |
405 | elv_completed_request(rq->q, rq); | 405 | elv_completed_request(rq->q, rq); |
406 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); | 406 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); |
407 | } | 407 | } |
408 | 408 | ||
409 | static void queue_flush(struct request_queue *q, unsigned which) | 409 | static void queue_flush(struct request_queue *q, unsigned which) |
410 | { | 410 | { |
411 | struct request *rq; | 411 | struct request *rq; |
412 | rq_end_io_fn *end_io; | 412 | rq_end_io_fn *end_io; |
413 | 413 | ||
414 | if (which == QUEUE_ORDERED_PREFLUSH) { | 414 | if (which == QUEUE_ORDERED_PREFLUSH) { |
415 | rq = &q->pre_flush_rq; | 415 | rq = &q->pre_flush_rq; |
416 | end_io = pre_flush_end_io; | 416 | end_io = pre_flush_end_io; |
417 | } else { | 417 | } else { |
418 | rq = &q->post_flush_rq; | 418 | rq = &q->post_flush_rq; |
419 | end_io = post_flush_end_io; | 419 | end_io = post_flush_end_io; |
420 | } | 420 | } |
421 | 421 | ||
422 | rq->cmd_flags = REQ_HARDBARRIER; | 422 | rq->cmd_flags = REQ_HARDBARRIER; |
423 | rq_init(q, rq); | 423 | rq_init(q, rq); |
424 | rq->elevator_private = NULL; | 424 | rq->elevator_private = NULL; |
425 | rq->elevator_private2 = NULL; | 425 | rq->elevator_private2 = NULL; |
426 | rq->rq_disk = q->bar_rq.rq_disk; | 426 | rq->rq_disk = q->bar_rq.rq_disk; |
427 | rq->end_io = end_io; | 427 | rq->end_io = end_io; |
428 | q->prepare_flush_fn(q, rq); | 428 | q->prepare_flush_fn(q, rq); |
429 | 429 | ||
430 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | 430 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
431 | } | 431 | } |
432 | 432 | ||
433 | static inline struct request *start_ordered(struct request_queue *q, | 433 | static inline struct request *start_ordered(struct request_queue *q, |
434 | struct request *rq) | 434 | struct request *rq) |
435 | { | 435 | { |
436 | q->orderr = 0; | 436 | q->orderr = 0; |
437 | q->ordered = q->next_ordered; | 437 | q->ordered = q->next_ordered; |
438 | q->ordseq |= QUEUE_ORDSEQ_STARTED; | 438 | q->ordseq |= QUEUE_ORDSEQ_STARTED; |
439 | 439 | ||
440 | /* | 440 | /* |
441 | * Prep proxy barrier request. | 441 | * Prep proxy barrier request. |
442 | */ | 442 | */ |
443 | blkdev_dequeue_request(rq); | 443 | blkdev_dequeue_request(rq); |
444 | q->orig_bar_rq = rq; | 444 | q->orig_bar_rq = rq; |
445 | rq = &q->bar_rq; | 445 | rq = &q->bar_rq; |
446 | rq->cmd_flags = 0; | 446 | rq->cmd_flags = 0; |
447 | rq_init(q, rq); | 447 | rq_init(q, rq); |
448 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) | 448 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) |
449 | rq->cmd_flags |= REQ_RW; | 449 | rq->cmd_flags |= REQ_RW; |
450 | if (q->ordered & QUEUE_ORDERED_FUA) | 450 | if (q->ordered & QUEUE_ORDERED_FUA) |
451 | rq->cmd_flags |= REQ_FUA; | 451 | rq->cmd_flags |= REQ_FUA; |
452 | rq->elevator_private = NULL; | 452 | rq->elevator_private = NULL; |
453 | rq->elevator_private2 = NULL; | 453 | rq->elevator_private2 = NULL; |
454 | init_request_from_bio(rq, q->orig_bar_rq->bio); | 454 | init_request_from_bio(rq, q->orig_bar_rq->bio); |
455 | rq->end_io = bar_end_io; | 455 | rq->end_io = bar_end_io; |
456 | 456 | ||
457 | /* | 457 | /* |
458 | * Queue ordered sequence. As we stack them at the head, we | 458 | * Queue ordered sequence. As we stack them at the head, we |
459 | * need to queue in reverse order. Note that we rely on that | 459 | * need to queue in reverse order. Note that we rely on that |
460 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | 460 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs |
461 | * request gets inbetween ordered sequence. | 461 | * request gets inbetween ordered sequence. If this request is |
462 | * an empty barrier, we don't need to do a postflush ever since | ||
463 | * there will be no data written between the pre and post flush. | ||
464 | * Hence a single flush will suffice. | ||
462 | */ | 465 | */ |
463 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) | 466 | if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) |
464 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); | 467 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); |
465 | else | 468 | else |
466 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; | 469 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; |
467 | 470 | ||
468 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | 471 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
469 | 472 | ||
470 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { | 473 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { |
471 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); | 474 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); |
472 | rq = &q->pre_flush_rq; | 475 | rq = &q->pre_flush_rq; |
473 | } else | 476 | } else |
474 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; | 477 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; |
475 | 478 | ||
476 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) | 479 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) |
477 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; | 480 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; |
478 | else | 481 | else |
479 | rq = NULL; | 482 | rq = NULL; |
480 | 483 | ||
481 | return rq; | 484 | return rq; |
482 | } | 485 | } |
483 | 486 | ||
484 | int blk_do_ordered(struct request_queue *q, struct request **rqp) | 487 | int blk_do_ordered(struct request_queue *q, struct request **rqp) |
485 | { | 488 | { |
486 | struct request *rq = *rqp; | 489 | struct request *rq = *rqp; |
487 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); | 490 | const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); |
488 | 491 | ||
489 | if (!q->ordseq) { | 492 | if (!q->ordseq) { |
490 | if (!is_barrier) | 493 | if (!is_barrier) |
491 | return 1; | 494 | return 1; |
492 | 495 | ||
493 | if (q->next_ordered != QUEUE_ORDERED_NONE) { | 496 | if (q->next_ordered != QUEUE_ORDERED_NONE) { |
494 | *rqp = start_ordered(q, rq); | 497 | *rqp = start_ordered(q, rq); |
495 | return 1; | 498 | return 1; |
496 | } else { | 499 | } else { |
497 | /* | 500 | /* |
498 | * This can happen when the queue switches to | 501 | * This can happen when the queue switches to |
499 | * ORDERED_NONE while this request is on it. | 502 | * ORDERED_NONE while this request is on it. |
500 | */ | 503 | */ |
501 | blkdev_dequeue_request(rq); | 504 | blkdev_dequeue_request(rq); |
502 | end_that_request_first(rq, -EOPNOTSUPP, | 505 | end_that_request_first(rq, -EOPNOTSUPP, |
503 | rq->hard_nr_sectors); | 506 | rq->hard_nr_sectors); |
504 | end_that_request_last(rq, -EOPNOTSUPP); | 507 | end_that_request_last(rq, -EOPNOTSUPP); |
505 | *rqp = NULL; | 508 | *rqp = NULL; |
506 | return 0; | 509 | return 0; |
507 | } | 510 | } |
508 | } | 511 | } |
509 | 512 | ||
510 | /* | 513 | /* |
511 | * Ordered sequence in progress | 514 | * Ordered sequence in progress |
512 | */ | 515 | */ |
513 | 516 | ||
514 | /* Special requests are not subject to ordering rules. */ | 517 | /* Special requests are not subject to ordering rules. */ |
515 | if (!blk_fs_request(rq) && | 518 | if (!blk_fs_request(rq) && |
516 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) | 519 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) |
517 | return 1; | 520 | return 1; |
518 | 521 | ||
519 | if (q->ordered & QUEUE_ORDERED_TAG) { | 522 | if (q->ordered & QUEUE_ORDERED_TAG) { |
520 | /* Ordered by tag. Blocking the next barrier is enough. */ | 523 | /* Ordered by tag. Blocking the next barrier is enough. */ |
521 | if (is_barrier && rq != &q->bar_rq) | 524 | if (is_barrier && rq != &q->bar_rq) |
522 | *rqp = NULL; | 525 | *rqp = NULL; |
523 | } else { | 526 | } else { |
524 | /* Ordered by draining. Wait for turn. */ | 527 | /* Ordered by draining. Wait for turn. */ |
525 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); | 528 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); |
526 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) | 529 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) |
527 | *rqp = NULL; | 530 | *rqp = NULL; |
528 | } | 531 | } |
529 | 532 | ||
530 | return 1; | 533 | return 1; |
531 | } | 534 | } |
532 | 535 | ||
533 | static void req_bio_endio(struct request *rq, struct bio *bio, | 536 | static void req_bio_endio(struct request *rq, struct bio *bio, |
534 | unsigned int nbytes, int error) | 537 | unsigned int nbytes, int error) |
535 | { | 538 | { |
536 | struct request_queue *q = rq->q; | 539 | struct request_queue *q = rq->q; |
537 | 540 | ||
538 | if (&q->bar_rq != rq) { | 541 | if (&q->bar_rq != rq) { |
539 | if (error) | 542 | if (error) |
540 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 543 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
541 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 544 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
542 | error = -EIO; | 545 | error = -EIO; |
543 | 546 | ||
544 | if (unlikely(nbytes > bio->bi_size)) { | 547 | if (unlikely(nbytes > bio->bi_size)) { |
545 | printk("%s: want %u bytes done, only %u left\n", | 548 | printk("%s: want %u bytes done, only %u left\n", |
546 | __FUNCTION__, nbytes, bio->bi_size); | 549 | __FUNCTION__, nbytes, bio->bi_size); |
547 | nbytes = bio->bi_size; | 550 | nbytes = bio->bi_size; |
548 | } | 551 | } |
549 | 552 | ||
550 | bio->bi_size -= nbytes; | 553 | bio->bi_size -= nbytes; |
551 | bio->bi_sector += (nbytes >> 9); | 554 | bio->bi_sector += (nbytes >> 9); |
552 | if (bio->bi_size == 0) | 555 | if (bio->bi_size == 0) |
553 | bio_endio(bio, error); | 556 | bio_endio(bio, error); |
554 | } else { | 557 | } else { |
555 | 558 | ||
556 | /* | 559 | /* |
557 | * Okay, this is the barrier request in progress, just | 560 | * Okay, this is the barrier request in progress, just |
558 | * record the error; | 561 | * record the error; |
559 | */ | 562 | */ |
560 | if (error && !q->orderr) | 563 | if (error && !q->orderr) |
561 | q->orderr = error; | 564 | q->orderr = error; |
562 | } | 565 | } |
563 | } | 566 | } |
564 | 567 | ||
565 | /** | 568 | /** |
566 | * blk_queue_bounce_limit - set bounce buffer limit for queue | 569 | * blk_queue_bounce_limit - set bounce buffer limit for queue |
567 | * @q: the request queue for the device | 570 | * @q: the request queue for the device |
568 | * @dma_addr: bus address limit | 571 | * @dma_addr: bus address limit |
569 | * | 572 | * |
570 | * Description: | 573 | * Description: |
571 | * Different hardware can have different requirements as to what pages | 574 | * Different hardware can have different requirements as to what pages |
572 | * it can do I/O directly to. A low level driver can call | 575 | * it can do I/O directly to. A low level driver can call |
573 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce | 576 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce |
574 | * buffers for doing I/O to pages residing above @page. | 577 | * buffers for doing I/O to pages residing above @page. |
575 | **/ | 578 | **/ |
576 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) | 579 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) |
577 | { | 580 | { |
578 | unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; | 581 | unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; |
579 | int dma = 0; | 582 | int dma = 0; |
580 | 583 | ||
581 | q->bounce_gfp = GFP_NOIO; | 584 | q->bounce_gfp = GFP_NOIO; |
582 | #if BITS_PER_LONG == 64 | 585 | #if BITS_PER_LONG == 64 |
583 | /* Assume anything <= 4GB can be handled by IOMMU. | 586 | /* Assume anything <= 4GB can be handled by IOMMU. |
584 | Actually some IOMMUs can handle everything, but I don't | 587 | Actually some IOMMUs can handle everything, but I don't |
585 | know of a way to test this here. */ | 588 | know of a way to test this here. */ |
586 | if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) | 589 | if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) |
587 | dma = 1; | 590 | dma = 1; |
588 | q->bounce_pfn = max_low_pfn; | 591 | q->bounce_pfn = max_low_pfn; |
589 | #else | 592 | #else |
590 | if (bounce_pfn < blk_max_low_pfn) | 593 | if (bounce_pfn < blk_max_low_pfn) |
591 | dma = 1; | 594 | dma = 1; |
592 | q->bounce_pfn = bounce_pfn; | 595 | q->bounce_pfn = bounce_pfn; |
593 | #endif | 596 | #endif |
594 | if (dma) { | 597 | if (dma) { |
595 | init_emergency_isa_pool(); | 598 | init_emergency_isa_pool(); |
596 | q->bounce_gfp = GFP_NOIO | GFP_DMA; | 599 | q->bounce_gfp = GFP_NOIO | GFP_DMA; |
597 | q->bounce_pfn = bounce_pfn; | 600 | q->bounce_pfn = bounce_pfn; |
598 | } | 601 | } |
599 | } | 602 | } |
600 | 603 | ||
601 | EXPORT_SYMBOL(blk_queue_bounce_limit); | 604 | EXPORT_SYMBOL(blk_queue_bounce_limit); |
602 | 605 | ||
603 | /** | 606 | /** |
604 | * blk_queue_max_sectors - set max sectors for a request for this queue | 607 | * blk_queue_max_sectors - set max sectors for a request for this queue |
605 | * @q: the request queue for the device | 608 | * @q: the request queue for the device |
606 | * @max_sectors: max sectors in the usual 512b unit | 609 | * @max_sectors: max sectors in the usual 512b unit |
607 | * | 610 | * |
608 | * Description: | 611 | * Description: |
609 | * Enables a low level driver to set an upper limit on the size of | 612 | * Enables a low level driver to set an upper limit on the size of |
610 | * received requests. | 613 | * received requests. |
611 | **/ | 614 | **/ |
612 | void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) | 615 | void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) |
613 | { | 616 | { |
614 | if ((max_sectors << 9) < PAGE_CACHE_SIZE) { | 617 | if ((max_sectors << 9) < PAGE_CACHE_SIZE) { |
615 | max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); | 618 | max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); |
616 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); | 619 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); |
617 | } | 620 | } |
618 | 621 | ||
619 | if (BLK_DEF_MAX_SECTORS > max_sectors) | 622 | if (BLK_DEF_MAX_SECTORS > max_sectors) |
620 | q->max_hw_sectors = q->max_sectors = max_sectors; | 623 | q->max_hw_sectors = q->max_sectors = max_sectors; |
621 | else { | 624 | else { |
622 | q->max_sectors = BLK_DEF_MAX_SECTORS; | 625 | q->max_sectors = BLK_DEF_MAX_SECTORS; |
623 | q->max_hw_sectors = max_sectors; | 626 | q->max_hw_sectors = max_sectors; |
624 | } | 627 | } |
625 | } | 628 | } |
626 | 629 | ||
627 | EXPORT_SYMBOL(blk_queue_max_sectors); | 630 | EXPORT_SYMBOL(blk_queue_max_sectors); |
628 | 631 | ||
629 | /** | 632 | /** |
630 | * blk_queue_max_phys_segments - set max phys segments for a request for this queue | 633 | * blk_queue_max_phys_segments - set max phys segments for a request for this queue |
631 | * @q: the request queue for the device | 634 | * @q: the request queue for the device |
632 | * @max_segments: max number of segments | 635 | * @max_segments: max number of segments |
633 | * | 636 | * |
634 | * Description: | 637 | * Description: |
635 | * Enables a low level driver to set an upper limit on the number of | 638 | * Enables a low level driver to set an upper limit on the number of |
636 | * physical data segments in a request. This would be the largest sized | 639 | * physical data segments in a request. This would be the largest sized |
637 | * scatter list the driver could handle. | 640 | * scatter list the driver could handle. |
638 | **/ | 641 | **/ |
639 | void blk_queue_max_phys_segments(struct request_queue *q, | 642 | void blk_queue_max_phys_segments(struct request_queue *q, |
640 | unsigned short max_segments) | 643 | unsigned short max_segments) |
641 | { | 644 | { |
642 | if (!max_segments) { | 645 | if (!max_segments) { |
643 | max_segments = 1; | 646 | max_segments = 1; |
644 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); | 647 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); |
645 | } | 648 | } |
646 | 649 | ||
647 | q->max_phys_segments = max_segments; | 650 | q->max_phys_segments = max_segments; |
648 | } | 651 | } |
649 | 652 | ||
650 | EXPORT_SYMBOL(blk_queue_max_phys_segments); | 653 | EXPORT_SYMBOL(blk_queue_max_phys_segments); |
651 | 654 | ||
652 | /** | 655 | /** |
653 | * blk_queue_max_hw_segments - set max hw segments for a request for this queue | 656 | * blk_queue_max_hw_segments - set max hw segments for a request for this queue |
654 | * @q: the request queue for the device | 657 | * @q: the request queue for the device |
655 | * @max_segments: max number of segments | 658 | * @max_segments: max number of segments |
656 | * | 659 | * |
657 | * Description: | 660 | * Description: |
658 | * Enables a low level driver to set an upper limit on the number of | 661 | * Enables a low level driver to set an upper limit on the number of |
659 | * hw data segments in a request. This would be the largest number of | 662 | * hw data segments in a request. This would be the largest number of |
660 | * address/length pairs the host adapter can actually give as once | 663 | * address/length pairs the host adapter can actually give as once |
661 | * to the device. | 664 | * to the device. |
662 | **/ | 665 | **/ |
663 | void blk_queue_max_hw_segments(struct request_queue *q, | 666 | void blk_queue_max_hw_segments(struct request_queue *q, |
664 | unsigned short max_segments) | 667 | unsigned short max_segments) |
665 | { | 668 | { |
666 | if (!max_segments) { | 669 | if (!max_segments) { |
667 | max_segments = 1; | 670 | max_segments = 1; |
668 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); | 671 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); |
669 | } | 672 | } |
670 | 673 | ||
671 | q->max_hw_segments = max_segments; | 674 | q->max_hw_segments = max_segments; |
672 | } | 675 | } |
673 | 676 | ||
674 | EXPORT_SYMBOL(blk_queue_max_hw_segments); | 677 | EXPORT_SYMBOL(blk_queue_max_hw_segments); |
675 | 678 | ||
676 | /** | 679 | /** |
677 | * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg | 680 | * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg |
678 | * @q: the request queue for the device | 681 | * @q: the request queue for the device |
679 | * @max_size: max size of segment in bytes | 682 | * @max_size: max size of segment in bytes |
680 | * | 683 | * |
681 | * Description: | 684 | * Description: |
682 | * Enables a low level driver to set an upper limit on the size of a | 685 | * Enables a low level driver to set an upper limit on the size of a |
683 | * coalesced segment | 686 | * coalesced segment |
684 | **/ | 687 | **/ |
685 | void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) | 688 | void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) |
686 | { | 689 | { |
687 | if (max_size < PAGE_CACHE_SIZE) { | 690 | if (max_size < PAGE_CACHE_SIZE) { |
688 | max_size = PAGE_CACHE_SIZE; | 691 | max_size = PAGE_CACHE_SIZE; |
689 | printk("%s: set to minimum %d\n", __FUNCTION__, max_size); | 692 | printk("%s: set to minimum %d\n", __FUNCTION__, max_size); |
690 | } | 693 | } |
691 | 694 | ||
692 | q->max_segment_size = max_size; | 695 | q->max_segment_size = max_size; |
693 | } | 696 | } |
694 | 697 | ||
695 | EXPORT_SYMBOL(blk_queue_max_segment_size); | 698 | EXPORT_SYMBOL(blk_queue_max_segment_size); |
696 | 699 | ||
697 | /** | 700 | /** |
698 | * blk_queue_hardsect_size - set hardware sector size for the queue | 701 | * blk_queue_hardsect_size - set hardware sector size for the queue |
699 | * @q: the request queue for the device | 702 | * @q: the request queue for the device |
700 | * @size: the hardware sector size, in bytes | 703 | * @size: the hardware sector size, in bytes |
701 | * | 704 | * |
702 | * Description: | 705 | * Description: |
703 | * This should typically be set to the lowest possible sector size | 706 | * This should typically be set to the lowest possible sector size |
704 | * that the hardware can operate on (possible without reverting to | 707 | * that the hardware can operate on (possible without reverting to |
705 | * even internal read-modify-write operations). Usually the default | 708 | * even internal read-modify-write operations). Usually the default |
706 | * of 512 covers most hardware. | 709 | * of 512 covers most hardware. |
707 | **/ | 710 | **/ |
708 | void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) | 711 | void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) |
709 | { | 712 | { |
710 | q->hardsect_size = size; | 713 | q->hardsect_size = size; |
711 | } | 714 | } |
712 | 715 | ||
713 | EXPORT_SYMBOL(blk_queue_hardsect_size); | 716 | EXPORT_SYMBOL(blk_queue_hardsect_size); |
714 | 717 | ||
715 | /* | 718 | /* |
716 | * Returns the minimum that is _not_ zero, unless both are zero. | 719 | * Returns the minimum that is _not_ zero, unless both are zero. |
717 | */ | 720 | */ |
718 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | 721 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) |
719 | 722 | ||
720 | /** | 723 | /** |
721 | * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers | 724 | * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers |
722 | * @t: the stacking driver (top) | 725 | * @t: the stacking driver (top) |
723 | * @b: the underlying device (bottom) | 726 | * @b: the underlying device (bottom) |
724 | **/ | 727 | **/ |
725 | void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) | 728 | void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) |
726 | { | 729 | { |
727 | /* zero is "infinity" */ | 730 | /* zero is "infinity" */ |
728 | t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); | 731 | t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); |
729 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); | 732 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); |
730 | 733 | ||
731 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); | 734 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); |
732 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); | 735 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); |
733 | t->max_segment_size = min(t->max_segment_size,b->max_segment_size); | 736 | t->max_segment_size = min(t->max_segment_size,b->max_segment_size); |
734 | t->hardsect_size = max(t->hardsect_size,b->hardsect_size); | 737 | t->hardsect_size = max(t->hardsect_size,b->hardsect_size); |
735 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) | 738 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) |
736 | clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); | 739 | clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); |
737 | } | 740 | } |
738 | 741 | ||
739 | EXPORT_SYMBOL(blk_queue_stack_limits); | 742 | EXPORT_SYMBOL(blk_queue_stack_limits); |
740 | 743 | ||
741 | /** | 744 | /** |
742 | * blk_queue_segment_boundary - set boundary rules for segment merging | 745 | * blk_queue_segment_boundary - set boundary rules for segment merging |
743 | * @q: the request queue for the device | 746 | * @q: the request queue for the device |
744 | * @mask: the memory boundary mask | 747 | * @mask: the memory boundary mask |
745 | **/ | 748 | **/ |
746 | void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) | 749 | void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) |
747 | { | 750 | { |
748 | if (mask < PAGE_CACHE_SIZE - 1) { | 751 | if (mask < PAGE_CACHE_SIZE - 1) { |
749 | mask = PAGE_CACHE_SIZE - 1; | 752 | mask = PAGE_CACHE_SIZE - 1; |
750 | printk("%s: set to minimum %lx\n", __FUNCTION__, mask); | 753 | printk("%s: set to minimum %lx\n", __FUNCTION__, mask); |
751 | } | 754 | } |
752 | 755 | ||
753 | q->seg_boundary_mask = mask; | 756 | q->seg_boundary_mask = mask; |
754 | } | 757 | } |
755 | 758 | ||
756 | EXPORT_SYMBOL(blk_queue_segment_boundary); | 759 | EXPORT_SYMBOL(blk_queue_segment_boundary); |
757 | 760 | ||
758 | /** | 761 | /** |
759 | * blk_queue_dma_alignment - set dma length and memory alignment | 762 | * blk_queue_dma_alignment - set dma length and memory alignment |
760 | * @q: the request queue for the device | 763 | * @q: the request queue for the device |
761 | * @mask: alignment mask | 764 | * @mask: alignment mask |
762 | * | 765 | * |
763 | * description: | 766 | * description: |
764 | * set required memory and length aligment for direct dma transactions. | 767 | * set required memory and length aligment for direct dma transactions. |
765 | * this is used when buiding direct io requests for the queue. | 768 | * this is used when buiding direct io requests for the queue. |
766 | * | 769 | * |
767 | **/ | 770 | **/ |
768 | void blk_queue_dma_alignment(struct request_queue *q, int mask) | 771 | void blk_queue_dma_alignment(struct request_queue *q, int mask) |
769 | { | 772 | { |
770 | q->dma_alignment = mask; | 773 | q->dma_alignment = mask; |
771 | } | 774 | } |
772 | 775 | ||
773 | EXPORT_SYMBOL(blk_queue_dma_alignment); | 776 | EXPORT_SYMBOL(blk_queue_dma_alignment); |
774 | 777 | ||
775 | /** | 778 | /** |
776 | * blk_queue_find_tag - find a request by its tag and queue | 779 | * blk_queue_find_tag - find a request by its tag and queue |
777 | * @q: The request queue for the device | 780 | * @q: The request queue for the device |
778 | * @tag: The tag of the request | 781 | * @tag: The tag of the request |
779 | * | 782 | * |
780 | * Notes: | 783 | * Notes: |
781 | * Should be used when a device returns a tag and you want to match | 784 | * Should be used when a device returns a tag and you want to match |
782 | * it with a request. | 785 | * it with a request. |
783 | * | 786 | * |
784 | * no locks need be held. | 787 | * no locks need be held. |
785 | **/ | 788 | **/ |
786 | struct request *blk_queue_find_tag(struct request_queue *q, int tag) | 789 | struct request *blk_queue_find_tag(struct request_queue *q, int tag) |
787 | { | 790 | { |
788 | return blk_map_queue_find_tag(q->queue_tags, tag); | 791 | return blk_map_queue_find_tag(q->queue_tags, tag); |
789 | } | 792 | } |
790 | 793 | ||
791 | EXPORT_SYMBOL(blk_queue_find_tag); | 794 | EXPORT_SYMBOL(blk_queue_find_tag); |
792 | 795 | ||
793 | /** | 796 | /** |
794 | * __blk_free_tags - release a given set of tag maintenance info | 797 | * __blk_free_tags - release a given set of tag maintenance info |
795 | * @bqt: the tag map to free | 798 | * @bqt: the tag map to free |
796 | * | 799 | * |
797 | * Tries to free the specified @bqt@. Returns true if it was | 800 | * Tries to free the specified @bqt@. Returns true if it was |
798 | * actually freed and false if there are still references using it | 801 | * actually freed and false if there are still references using it |
799 | */ | 802 | */ |
800 | static int __blk_free_tags(struct blk_queue_tag *bqt) | 803 | static int __blk_free_tags(struct blk_queue_tag *bqt) |
801 | { | 804 | { |
802 | int retval; | 805 | int retval; |
803 | 806 | ||
804 | retval = atomic_dec_and_test(&bqt->refcnt); | 807 | retval = atomic_dec_and_test(&bqt->refcnt); |
805 | if (retval) { | 808 | if (retval) { |
806 | BUG_ON(bqt->busy); | 809 | BUG_ON(bqt->busy); |
807 | BUG_ON(!list_empty(&bqt->busy_list)); | 810 | BUG_ON(!list_empty(&bqt->busy_list)); |
808 | 811 | ||
809 | kfree(bqt->tag_index); | 812 | kfree(bqt->tag_index); |
810 | bqt->tag_index = NULL; | 813 | bqt->tag_index = NULL; |
811 | 814 | ||
812 | kfree(bqt->tag_map); | 815 | kfree(bqt->tag_map); |
813 | bqt->tag_map = NULL; | 816 | bqt->tag_map = NULL; |
814 | 817 | ||
815 | kfree(bqt); | 818 | kfree(bqt); |
816 | 819 | ||
817 | } | 820 | } |
818 | 821 | ||
819 | return retval; | 822 | return retval; |
820 | } | 823 | } |
821 | 824 | ||
822 | /** | 825 | /** |
823 | * __blk_queue_free_tags - release tag maintenance info | 826 | * __blk_queue_free_tags - release tag maintenance info |
824 | * @q: the request queue for the device | 827 | * @q: the request queue for the device |
825 | * | 828 | * |
826 | * Notes: | 829 | * Notes: |
827 | * blk_cleanup_queue() will take care of calling this function, if tagging | 830 | * blk_cleanup_queue() will take care of calling this function, if tagging |
828 | * has been used. So there's no need to call this directly. | 831 | * has been used. So there's no need to call this directly. |
829 | **/ | 832 | **/ |
830 | static void __blk_queue_free_tags(struct request_queue *q) | 833 | static void __blk_queue_free_tags(struct request_queue *q) |
831 | { | 834 | { |
832 | struct blk_queue_tag *bqt = q->queue_tags; | 835 | struct blk_queue_tag *bqt = q->queue_tags; |
833 | 836 | ||
834 | if (!bqt) | 837 | if (!bqt) |
835 | return; | 838 | return; |
836 | 839 | ||
837 | __blk_free_tags(bqt); | 840 | __blk_free_tags(bqt); |
838 | 841 | ||
839 | q->queue_tags = NULL; | 842 | q->queue_tags = NULL; |
840 | q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); | 843 | q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); |
841 | } | 844 | } |
842 | 845 | ||
843 | 846 | ||
844 | /** | 847 | /** |
845 | * blk_free_tags - release a given set of tag maintenance info | 848 | * blk_free_tags - release a given set of tag maintenance info |
846 | * @bqt: the tag map to free | 849 | * @bqt: the tag map to free |
847 | * | 850 | * |
848 | * For externally managed @bqt@ frees the map. Callers of this | 851 | * For externally managed @bqt@ frees the map. Callers of this |
849 | * function must guarantee to have released all the queues that | 852 | * function must guarantee to have released all the queues that |
850 | * might have been using this tag map. | 853 | * might have been using this tag map. |
851 | */ | 854 | */ |
852 | void blk_free_tags(struct blk_queue_tag *bqt) | 855 | void blk_free_tags(struct blk_queue_tag *bqt) |
853 | { | 856 | { |
854 | if (unlikely(!__blk_free_tags(bqt))) | 857 | if (unlikely(!__blk_free_tags(bqt))) |
855 | BUG(); | 858 | BUG(); |
856 | } | 859 | } |
857 | EXPORT_SYMBOL(blk_free_tags); | 860 | EXPORT_SYMBOL(blk_free_tags); |
858 | 861 | ||
859 | /** | 862 | /** |
860 | * blk_queue_free_tags - release tag maintenance info | 863 | * blk_queue_free_tags - release tag maintenance info |
861 | * @q: the request queue for the device | 864 | * @q: the request queue for the device |
862 | * | 865 | * |
863 | * Notes: | 866 | * Notes: |
864 | * This is used to disabled tagged queuing to a device, yet leave | 867 | * This is used to disabled tagged queuing to a device, yet leave |
865 | * queue in function. | 868 | * queue in function. |
866 | **/ | 869 | **/ |
867 | void blk_queue_free_tags(struct request_queue *q) | 870 | void blk_queue_free_tags(struct request_queue *q) |
868 | { | 871 | { |
869 | clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | 872 | clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); |
870 | } | 873 | } |
871 | 874 | ||
872 | EXPORT_SYMBOL(blk_queue_free_tags); | 875 | EXPORT_SYMBOL(blk_queue_free_tags); |
873 | 876 | ||
874 | static int | 877 | static int |
875 | init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth) | 878 | init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth) |
876 | { | 879 | { |
877 | struct request **tag_index; | 880 | struct request **tag_index; |
878 | unsigned long *tag_map; | 881 | unsigned long *tag_map; |
879 | int nr_ulongs; | 882 | int nr_ulongs; |
880 | 883 | ||
881 | if (q && depth > q->nr_requests * 2) { | 884 | if (q && depth > q->nr_requests * 2) { |
882 | depth = q->nr_requests * 2; | 885 | depth = q->nr_requests * 2; |
883 | printk(KERN_ERR "%s: adjusted depth to %d\n", | 886 | printk(KERN_ERR "%s: adjusted depth to %d\n", |
884 | __FUNCTION__, depth); | 887 | __FUNCTION__, depth); |
885 | } | 888 | } |
886 | 889 | ||
887 | tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); | 890 | tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); |
888 | if (!tag_index) | 891 | if (!tag_index) |
889 | goto fail; | 892 | goto fail; |
890 | 893 | ||
891 | nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; | 894 | nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; |
892 | tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); | 895 | tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); |
893 | if (!tag_map) | 896 | if (!tag_map) |
894 | goto fail; | 897 | goto fail; |
895 | 898 | ||
896 | tags->real_max_depth = depth; | 899 | tags->real_max_depth = depth; |
897 | tags->max_depth = depth; | 900 | tags->max_depth = depth; |
898 | tags->tag_index = tag_index; | 901 | tags->tag_index = tag_index; |
899 | tags->tag_map = tag_map; | 902 | tags->tag_map = tag_map; |
900 | 903 | ||
901 | return 0; | 904 | return 0; |
902 | fail: | 905 | fail: |
903 | kfree(tag_index); | 906 | kfree(tag_index); |
904 | return -ENOMEM; | 907 | return -ENOMEM; |
905 | } | 908 | } |
906 | 909 | ||
907 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, | 910 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, |
908 | int depth) | 911 | int depth) |
909 | { | 912 | { |
910 | struct blk_queue_tag *tags; | 913 | struct blk_queue_tag *tags; |
911 | 914 | ||
912 | tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); | 915 | tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); |
913 | if (!tags) | 916 | if (!tags) |
914 | goto fail; | 917 | goto fail; |
915 | 918 | ||
916 | if (init_tag_map(q, tags, depth)) | 919 | if (init_tag_map(q, tags, depth)) |
917 | goto fail; | 920 | goto fail; |
918 | 921 | ||
919 | INIT_LIST_HEAD(&tags->busy_list); | 922 | INIT_LIST_HEAD(&tags->busy_list); |
920 | tags->busy = 0; | 923 | tags->busy = 0; |
921 | atomic_set(&tags->refcnt, 1); | 924 | atomic_set(&tags->refcnt, 1); |
922 | return tags; | 925 | return tags; |
923 | fail: | 926 | fail: |
924 | kfree(tags); | 927 | kfree(tags); |
925 | return NULL; | 928 | return NULL; |
926 | } | 929 | } |
927 | 930 | ||
928 | /** | 931 | /** |
929 | * blk_init_tags - initialize the tag info for an external tag map | 932 | * blk_init_tags - initialize the tag info for an external tag map |
930 | * @depth: the maximum queue depth supported | 933 | * @depth: the maximum queue depth supported |
931 | * @tags: the tag to use | 934 | * @tags: the tag to use |
932 | **/ | 935 | **/ |
933 | struct blk_queue_tag *blk_init_tags(int depth) | 936 | struct blk_queue_tag *blk_init_tags(int depth) |
934 | { | 937 | { |
935 | return __blk_queue_init_tags(NULL, depth); | 938 | return __blk_queue_init_tags(NULL, depth); |
936 | } | 939 | } |
937 | EXPORT_SYMBOL(blk_init_tags); | 940 | EXPORT_SYMBOL(blk_init_tags); |
938 | 941 | ||
939 | /** | 942 | /** |
940 | * blk_queue_init_tags - initialize the queue tag info | 943 | * blk_queue_init_tags - initialize the queue tag info |
941 | * @q: the request queue for the device | 944 | * @q: the request queue for the device |
942 | * @depth: the maximum queue depth supported | 945 | * @depth: the maximum queue depth supported |
943 | * @tags: the tag to use | 946 | * @tags: the tag to use |
944 | **/ | 947 | **/ |
945 | int blk_queue_init_tags(struct request_queue *q, int depth, | 948 | int blk_queue_init_tags(struct request_queue *q, int depth, |
946 | struct blk_queue_tag *tags) | 949 | struct blk_queue_tag *tags) |
947 | { | 950 | { |
948 | int rc; | 951 | int rc; |
949 | 952 | ||
950 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); | 953 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); |
951 | 954 | ||
952 | if (!tags && !q->queue_tags) { | 955 | if (!tags && !q->queue_tags) { |
953 | tags = __blk_queue_init_tags(q, depth); | 956 | tags = __blk_queue_init_tags(q, depth); |
954 | 957 | ||
955 | if (!tags) | 958 | if (!tags) |
956 | goto fail; | 959 | goto fail; |
957 | } else if (q->queue_tags) { | 960 | } else if (q->queue_tags) { |
958 | if ((rc = blk_queue_resize_tags(q, depth))) | 961 | if ((rc = blk_queue_resize_tags(q, depth))) |
959 | return rc; | 962 | return rc; |
960 | set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | 963 | set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); |
961 | return 0; | 964 | return 0; |
962 | } else | 965 | } else |
963 | atomic_inc(&tags->refcnt); | 966 | atomic_inc(&tags->refcnt); |
964 | 967 | ||
965 | /* | 968 | /* |
966 | * assign it, all done | 969 | * assign it, all done |
967 | */ | 970 | */ |
968 | q->queue_tags = tags; | 971 | q->queue_tags = tags; |
969 | q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); | 972 | q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); |
970 | return 0; | 973 | return 0; |
971 | fail: | 974 | fail: |
972 | kfree(tags); | 975 | kfree(tags); |
973 | return -ENOMEM; | 976 | return -ENOMEM; |
974 | } | 977 | } |
975 | 978 | ||
976 | EXPORT_SYMBOL(blk_queue_init_tags); | 979 | EXPORT_SYMBOL(blk_queue_init_tags); |
977 | 980 | ||
978 | /** | 981 | /** |
979 | * blk_queue_resize_tags - change the queueing depth | 982 | * blk_queue_resize_tags - change the queueing depth |
980 | * @q: the request queue for the device | 983 | * @q: the request queue for the device |
981 | * @new_depth: the new max command queueing depth | 984 | * @new_depth: the new max command queueing depth |
982 | * | 985 | * |
983 | * Notes: | 986 | * Notes: |
984 | * Must be called with the queue lock held. | 987 | * Must be called with the queue lock held. |
985 | **/ | 988 | **/ |
986 | int blk_queue_resize_tags(struct request_queue *q, int new_depth) | 989 | int blk_queue_resize_tags(struct request_queue *q, int new_depth) |
987 | { | 990 | { |
988 | struct blk_queue_tag *bqt = q->queue_tags; | 991 | struct blk_queue_tag *bqt = q->queue_tags; |
989 | struct request **tag_index; | 992 | struct request **tag_index; |
990 | unsigned long *tag_map; | 993 | unsigned long *tag_map; |
991 | int max_depth, nr_ulongs; | 994 | int max_depth, nr_ulongs; |
992 | 995 | ||
993 | if (!bqt) | 996 | if (!bqt) |
994 | return -ENXIO; | 997 | return -ENXIO; |
995 | 998 | ||
996 | /* | 999 | /* |
997 | * if we already have large enough real_max_depth. just | 1000 | * if we already have large enough real_max_depth. just |
998 | * adjust max_depth. *NOTE* as requests with tag value | 1001 | * adjust max_depth. *NOTE* as requests with tag value |
999 | * between new_depth and real_max_depth can be in-flight, tag | 1002 | * between new_depth and real_max_depth can be in-flight, tag |
1000 | * map can not be shrunk blindly here. | 1003 | * map can not be shrunk blindly here. |
1001 | */ | 1004 | */ |
1002 | if (new_depth <= bqt->real_max_depth) { | 1005 | if (new_depth <= bqt->real_max_depth) { |
1003 | bqt->max_depth = new_depth; | 1006 | bqt->max_depth = new_depth; |
1004 | return 0; | 1007 | return 0; |
1005 | } | 1008 | } |
1006 | 1009 | ||
1007 | /* | 1010 | /* |
1008 | * Currently cannot replace a shared tag map with a new | 1011 | * Currently cannot replace a shared tag map with a new |
1009 | * one, so error out if this is the case | 1012 | * one, so error out if this is the case |
1010 | */ | 1013 | */ |
1011 | if (atomic_read(&bqt->refcnt) != 1) | 1014 | if (atomic_read(&bqt->refcnt) != 1) |
1012 | return -EBUSY; | 1015 | return -EBUSY; |
1013 | 1016 | ||
1014 | /* | 1017 | /* |
1015 | * save the old state info, so we can copy it back | 1018 | * save the old state info, so we can copy it back |
1016 | */ | 1019 | */ |
1017 | tag_index = bqt->tag_index; | 1020 | tag_index = bqt->tag_index; |
1018 | tag_map = bqt->tag_map; | 1021 | tag_map = bqt->tag_map; |
1019 | max_depth = bqt->real_max_depth; | 1022 | max_depth = bqt->real_max_depth; |
1020 | 1023 | ||
1021 | if (init_tag_map(q, bqt, new_depth)) | 1024 | if (init_tag_map(q, bqt, new_depth)) |
1022 | return -ENOMEM; | 1025 | return -ENOMEM; |
1023 | 1026 | ||
1024 | memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); | 1027 | memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); |
1025 | nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; | 1028 | nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; |
1026 | memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); | 1029 | memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); |
1027 | 1030 | ||
1028 | kfree(tag_index); | 1031 | kfree(tag_index); |
1029 | kfree(tag_map); | 1032 | kfree(tag_map); |
1030 | return 0; | 1033 | return 0; |
1031 | } | 1034 | } |
1032 | 1035 | ||
1033 | EXPORT_SYMBOL(blk_queue_resize_tags); | 1036 | EXPORT_SYMBOL(blk_queue_resize_tags); |
1034 | 1037 | ||
1035 | /** | 1038 | /** |
1036 | * blk_queue_end_tag - end tag operations for a request | 1039 | * blk_queue_end_tag - end tag operations for a request |
1037 | * @q: the request queue for the device | 1040 | * @q: the request queue for the device |
1038 | * @rq: the request that has completed | 1041 | * @rq: the request that has completed |
1039 | * | 1042 | * |
1040 | * Description: | 1043 | * Description: |
1041 | * Typically called when end_that_request_first() returns 0, meaning | 1044 | * Typically called when end_that_request_first() returns 0, meaning |
1042 | * all transfers have been done for a request. It's important to call | 1045 | * all transfers have been done for a request. It's important to call |
1043 | * this function before end_that_request_last(), as that will put the | 1046 | * this function before end_that_request_last(), as that will put the |
1044 | * request back on the free list thus corrupting the internal tag list. | 1047 | * request back on the free list thus corrupting the internal tag list. |
1045 | * | 1048 | * |
1046 | * Notes: | 1049 | * Notes: |
1047 | * queue lock must be held. | 1050 | * queue lock must be held. |
1048 | **/ | 1051 | **/ |
1049 | void blk_queue_end_tag(struct request_queue *q, struct request *rq) | 1052 | void blk_queue_end_tag(struct request_queue *q, struct request *rq) |
1050 | { | 1053 | { |
1051 | struct blk_queue_tag *bqt = q->queue_tags; | 1054 | struct blk_queue_tag *bqt = q->queue_tags; |
1052 | int tag = rq->tag; | 1055 | int tag = rq->tag; |
1053 | 1056 | ||
1054 | BUG_ON(tag == -1); | 1057 | BUG_ON(tag == -1); |
1055 | 1058 | ||
1056 | if (unlikely(tag >= bqt->real_max_depth)) | 1059 | if (unlikely(tag >= bqt->real_max_depth)) |
1057 | /* | 1060 | /* |
1058 | * This can happen after tag depth has been reduced. | 1061 | * This can happen after tag depth has been reduced. |
1059 | * FIXME: how about a warning or info message here? | 1062 | * FIXME: how about a warning or info message here? |
1060 | */ | 1063 | */ |
1061 | return; | 1064 | return; |
1062 | 1065 | ||
1063 | list_del_init(&rq->queuelist); | 1066 | list_del_init(&rq->queuelist); |
1064 | rq->cmd_flags &= ~REQ_QUEUED; | 1067 | rq->cmd_flags &= ~REQ_QUEUED; |
1065 | rq->tag = -1; | 1068 | rq->tag = -1; |
1066 | 1069 | ||
1067 | if (unlikely(bqt->tag_index[tag] == NULL)) | 1070 | if (unlikely(bqt->tag_index[tag] == NULL)) |
1068 | printk(KERN_ERR "%s: tag %d is missing\n", | 1071 | printk(KERN_ERR "%s: tag %d is missing\n", |
1069 | __FUNCTION__, tag); | 1072 | __FUNCTION__, tag); |
1070 | 1073 | ||
1071 | bqt->tag_index[tag] = NULL; | 1074 | bqt->tag_index[tag] = NULL; |
1072 | 1075 | ||
1073 | /* | 1076 | /* |
1074 | * We use test_and_clear_bit's memory ordering properties here. | 1077 | * We use test_and_clear_bit's memory ordering properties here. |
1075 | * The tag_map bit acts as a lock for tag_index[bit], so we need | 1078 | * The tag_map bit acts as a lock for tag_index[bit], so we need |
1076 | * a barrer before clearing the bit (precisely: release semantics). | 1079 | * a barrer before clearing the bit (precisely: release semantics). |
1077 | * Could use clear_bit_unlock when it is merged. | 1080 | * Could use clear_bit_unlock when it is merged. |
1078 | */ | 1081 | */ |
1079 | if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) { | 1082 | if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) { |
1080 | printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", | 1083 | printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", |
1081 | __FUNCTION__, tag); | 1084 | __FUNCTION__, tag); |
1082 | return; | 1085 | return; |
1083 | } | 1086 | } |
1084 | 1087 | ||
1085 | bqt->busy--; | 1088 | bqt->busy--; |
1086 | } | 1089 | } |
1087 | 1090 | ||
1088 | EXPORT_SYMBOL(blk_queue_end_tag); | 1091 | EXPORT_SYMBOL(blk_queue_end_tag); |
1089 | 1092 | ||
1090 | /** | 1093 | /** |
1091 | * blk_queue_start_tag - find a free tag and assign it | 1094 | * blk_queue_start_tag - find a free tag and assign it |
1092 | * @q: the request queue for the device | 1095 | * @q: the request queue for the device |
1093 | * @rq: the block request that needs tagging | 1096 | * @rq: the block request that needs tagging |
1094 | * | 1097 | * |
1095 | * Description: | 1098 | * Description: |
1096 | * This can either be used as a stand-alone helper, or possibly be | 1099 | * This can either be used as a stand-alone helper, or possibly be |
1097 | * assigned as the queue &prep_rq_fn (in which case &struct request | 1100 | * assigned as the queue &prep_rq_fn (in which case &struct request |
1098 | * automagically gets a tag assigned). Note that this function | 1101 | * automagically gets a tag assigned). Note that this function |
1099 | * assumes that any type of request can be queued! if this is not | 1102 | * assumes that any type of request can be queued! if this is not |
1100 | * true for your device, you must check the request type before | 1103 | * true for your device, you must check the request type before |
1101 | * calling this function. The request will also be removed from | 1104 | * calling this function. The request will also be removed from |
1102 | * the request queue, so it's the drivers responsibility to readd | 1105 | * the request queue, so it's the drivers responsibility to readd |
1103 | * it if it should need to be restarted for some reason. | 1106 | * it if it should need to be restarted for some reason. |
1104 | * | 1107 | * |
1105 | * Notes: | 1108 | * Notes: |
1106 | * queue lock must be held. | 1109 | * queue lock must be held. |
1107 | **/ | 1110 | **/ |
1108 | int blk_queue_start_tag(struct request_queue *q, struct request *rq) | 1111 | int blk_queue_start_tag(struct request_queue *q, struct request *rq) |
1109 | { | 1112 | { |
1110 | struct blk_queue_tag *bqt = q->queue_tags; | 1113 | struct blk_queue_tag *bqt = q->queue_tags; |
1111 | int tag; | 1114 | int tag; |
1112 | 1115 | ||
1113 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { | 1116 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { |
1114 | printk(KERN_ERR | 1117 | printk(KERN_ERR |
1115 | "%s: request %p for device [%s] already tagged %d", | 1118 | "%s: request %p for device [%s] already tagged %d", |
1116 | __FUNCTION__, rq, | 1119 | __FUNCTION__, rq, |
1117 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); | 1120 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); |
1118 | BUG(); | 1121 | BUG(); |
1119 | } | 1122 | } |
1120 | 1123 | ||
1121 | /* | 1124 | /* |
1122 | * Protect against shared tag maps, as we may not have exclusive | 1125 | * Protect against shared tag maps, as we may not have exclusive |
1123 | * access to the tag map. | 1126 | * access to the tag map. |
1124 | */ | 1127 | */ |
1125 | do { | 1128 | do { |
1126 | tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); | 1129 | tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); |
1127 | if (tag >= bqt->max_depth) | 1130 | if (tag >= bqt->max_depth) |
1128 | return 1; | 1131 | return 1; |
1129 | 1132 | ||
1130 | } while (test_and_set_bit(tag, bqt->tag_map)); | 1133 | } while (test_and_set_bit(tag, bqt->tag_map)); |
1131 | /* | 1134 | /* |
1132 | * We rely on test_and_set_bit providing lock memory ordering semantics | 1135 | * We rely on test_and_set_bit providing lock memory ordering semantics |
1133 | * (could use test_and_set_bit_lock when it is merged). | 1136 | * (could use test_and_set_bit_lock when it is merged). |
1134 | */ | 1137 | */ |
1135 | 1138 | ||
1136 | rq->cmd_flags |= REQ_QUEUED; | 1139 | rq->cmd_flags |= REQ_QUEUED; |
1137 | rq->tag = tag; | 1140 | rq->tag = tag; |
1138 | bqt->tag_index[tag] = rq; | 1141 | bqt->tag_index[tag] = rq; |
1139 | blkdev_dequeue_request(rq); | 1142 | blkdev_dequeue_request(rq); |
1140 | list_add(&rq->queuelist, &bqt->busy_list); | 1143 | list_add(&rq->queuelist, &bqt->busy_list); |
1141 | bqt->busy++; | 1144 | bqt->busy++; |
1142 | return 0; | 1145 | return 0; |
1143 | } | 1146 | } |
1144 | 1147 | ||
1145 | EXPORT_SYMBOL(blk_queue_start_tag); | 1148 | EXPORT_SYMBOL(blk_queue_start_tag); |
1146 | 1149 | ||
1147 | /** | 1150 | /** |
1148 | * blk_queue_invalidate_tags - invalidate all pending tags | 1151 | * blk_queue_invalidate_tags - invalidate all pending tags |
1149 | * @q: the request queue for the device | 1152 | * @q: the request queue for the device |
1150 | * | 1153 | * |
1151 | * Description: | 1154 | * Description: |
1152 | * Hardware conditions may dictate a need to stop all pending requests. | 1155 | * Hardware conditions may dictate a need to stop all pending requests. |
1153 | * In this case, we will safely clear the block side of the tag queue and | 1156 | * In this case, we will safely clear the block side of the tag queue and |
1154 | * readd all requests to the request queue in the right order. | 1157 | * readd all requests to the request queue in the right order. |
1155 | * | 1158 | * |
1156 | * Notes: | 1159 | * Notes: |
1157 | * queue lock must be held. | 1160 | * queue lock must be held. |
1158 | **/ | 1161 | **/ |
1159 | void blk_queue_invalidate_tags(struct request_queue *q) | 1162 | void blk_queue_invalidate_tags(struct request_queue *q) |
1160 | { | 1163 | { |
1161 | struct blk_queue_tag *bqt = q->queue_tags; | 1164 | struct blk_queue_tag *bqt = q->queue_tags; |
1162 | struct list_head *tmp, *n; | 1165 | struct list_head *tmp, *n; |
1163 | struct request *rq; | 1166 | struct request *rq; |
1164 | 1167 | ||
1165 | list_for_each_safe(tmp, n, &bqt->busy_list) { | 1168 | list_for_each_safe(tmp, n, &bqt->busy_list) { |
1166 | rq = list_entry_rq(tmp); | 1169 | rq = list_entry_rq(tmp); |
1167 | 1170 | ||
1168 | if (rq->tag == -1) { | 1171 | if (rq->tag == -1) { |
1169 | printk(KERN_ERR | 1172 | printk(KERN_ERR |
1170 | "%s: bad tag found on list\n", __FUNCTION__); | 1173 | "%s: bad tag found on list\n", __FUNCTION__); |
1171 | list_del_init(&rq->queuelist); | 1174 | list_del_init(&rq->queuelist); |
1172 | rq->cmd_flags &= ~REQ_QUEUED; | 1175 | rq->cmd_flags &= ~REQ_QUEUED; |
1173 | } else | 1176 | } else |
1174 | blk_queue_end_tag(q, rq); | 1177 | blk_queue_end_tag(q, rq); |
1175 | 1178 | ||
1176 | rq->cmd_flags &= ~REQ_STARTED; | 1179 | rq->cmd_flags &= ~REQ_STARTED; |
1177 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); | 1180 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); |
1178 | } | 1181 | } |
1179 | } | 1182 | } |
1180 | 1183 | ||
1181 | EXPORT_SYMBOL(blk_queue_invalidate_tags); | 1184 | EXPORT_SYMBOL(blk_queue_invalidate_tags); |
1182 | 1185 | ||
1183 | void blk_dump_rq_flags(struct request *rq, char *msg) | 1186 | void blk_dump_rq_flags(struct request *rq, char *msg) |
1184 | { | 1187 | { |
1185 | int bit; | 1188 | int bit; |
1186 | 1189 | ||
1187 | printk("%s: dev %s: type=%x, flags=%x\n", msg, | 1190 | printk("%s: dev %s: type=%x, flags=%x\n", msg, |
1188 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, | 1191 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, |
1189 | rq->cmd_flags); | 1192 | rq->cmd_flags); |
1190 | 1193 | ||
1191 | printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, | 1194 | printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, |
1192 | rq->nr_sectors, | 1195 | rq->nr_sectors, |
1193 | rq->current_nr_sectors); | 1196 | rq->current_nr_sectors); |
1194 | printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); | 1197 | printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); |
1195 | 1198 | ||
1196 | if (blk_pc_request(rq)) { | 1199 | if (blk_pc_request(rq)) { |
1197 | printk("cdb: "); | 1200 | printk("cdb: "); |
1198 | for (bit = 0; bit < sizeof(rq->cmd); bit++) | 1201 | for (bit = 0; bit < sizeof(rq->cmd); bit++) |
1199 | printk("%02x ", rq->cmd[bit]); | 1202 | printk("%02x ", rq->cmd[bit]); |
1200 | printk("\n"); | 1203 | printk("\n"); |
1201 | } | 1204 | } |
1202 | } | 1205 | } |
1203 | 1206 | ||
1204 | EXPORT_SYMBOL(blk_dump_rq_flags); | 1207 | EXPORT_SYMBOL(blk_dump_rq_flags); |
1205 | 1208 | ||
1206 | void blk_recount_segments(struct request_queue *q, struct bio *bio) | 1209 | void blk_recount_segments(struct request_queue *q, struct bio *bio) |
1207 | { | 1210 | { |
1208 | struct request rq; | 1211 | struct request rq; |
1209 | struct bio *nxt = bio->bi_next; | 1212 | struct bio *nxt = bio->bi_next; |
1210 | rq.q = q; | 1213 | rq.q = q; |
1211 | rq.bio = rq.biotail = bio; | 1214 | rq.bio = rq.biotail = bio; |
1212 | bio->bi_next = NULL; | 1215 | bio->bi_next = NULL; |
1213 | blk_recalc_rq_segments(&rq); | 1216 | blk_recalc_rq_segments(&rq); |
1214 | bio->bi_next = nxt; | 1217 | bio->bi_next = nxt; |
1215 | bio->bi_phys_segments = rq.nr_phys_segments; | 1218 | bio->bi_phys_segments = rq.nr_phys_segments; |
1216 | bio->bi_hw_segments = rq.nr_hw_segments; | 1219 | bio->bi_hw_segments = rq.nr_hw_segments; |
1217 | bio->bi_flags |= (1 << BIO_SEG_VALID); | 1220 | bio->bi_flags |= (1 << BIO_SEG_VALID); |
1218 | } | 1221 | } |
1219 | EXPORT_SYMBOL(blk_recount_segments); | 1222 | EXPORT_SYMBOL(blk_recount_segments); |
1220 | 1223 | ||
1221 | static void blk_recalc_rq_segments(struct request *rq) | 1224 | static void blk_recalc_rq_segments(struct request *rq) |
1222 | { | 1225 | { |
1223 | int nr_phys_segs; | 1226 | int nr_phys_segs; |
1224 | int nr_hw_segs; | 1227 | int nr_hw_segs; |
1225 | unsigned int phys_size; | 1228 | unsigned int phys_size; |
1226 | unsigned int hw_size; | 1229 | unsigned int hw_size; |
1227 | struct bio_vec *bv, *bvprv = NULL; | 1230 | struct bio_vec *bv, *bvprv = NULL; |
1228 | int seg_size; | 1231 | int seg_size; |
1229 | int hw_seg_size; | 1232 | int hw_seg_size; |
1230 | int cluster; | 1233 | int cluster; |
1231 | struct req_iterator iter; | 1234 | struct req_iterator iter; |
1232 | int high, highprv = 1; | 1235 | int high, highprv = 1; |
1233 | struct request_queue *q = rq->q; | 1236 | struct request_queue *q = rq->q; |
1234 | 1237 | ||
1235 | if (!rq->bio) | 1238 | if (!rq->bio) |
1236 | return; | 1239 | return; |
1237 | 1240 | ||
1238 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); | 1241 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); |
1239 | hw_seg_size = seg_size = 0; | 1242 | hw_seg_size = seg_size = 0; |
1240 | phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; | 1243 | phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; |
1241 | rq_for_each_segment(bv, rq, iter) { | 1244 | rq_for_each_segment(bv, rq, iter) { |
1242 | /* | 1245 | /* |
1243 | * the trick here is making sure that a high page is never | 1246 | * the trick here is making sure that a high page is never |
1244 | * considered part of another segment, since that might | 1247 | * considered part of another segment, since that might |
1245 | * change with the bounce page. | 1248 | * change with the bounce page. |
1246 | */ | 1249 | */ |
1247 | high = page_to_pfn(bv->bv_page) > q->bounce_pfn; | 1250 | high = page_to_pfn(bv->bv_page) > q->bounce_pfn; |
1248 | if (high || highprv) | 1251 | if (high || highprv) |
1249 | goto new_hw_segment; | 1252 | goto new_hw_segment; |
1250 | if (cluster) { | 1253 | if (cluster) { |
1251 | if (seg_size + bv->bv_len > q->max_segment_size) | 1254 | if (seg_size + bv->bv_len > q->max_segment_size) |
1252 | goto new_segment; | 1255 | goto new_segment; |
1253 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) | 1256 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) |
1254 | goto new_segment; | 1257 | goto new_segment; |
1255 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) | 1258 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) |
1256 | goto new_segment; | 1259 | goto new_segment; |
1257 | if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | 1260 | if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) |
1258 | goto new_hw_segment; | 1261 | goto new_hw_segment; |
1259 | 1262 | ||
1260 | seg_size += bv->bv_len; | 1263 | seg_size += bv->bv_len; |
1261 | hw_seg_size += bv->bv_len; | 1264 | hw_seg_size += bv->bv_len; |
1262 | bvprv = bv; | 1265 | bvprv = bv; |
1263 | continue; | 1266 | continue; |
1264 | } | 1267 | } |
1265 | new_segment: | 1268 | new_segment: |
1266 | if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && | 1269 | if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && |
1267 | !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | 1270 | !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) |
1268 | hw_seg_size += bv->bv_len; | 1271 | hw_seg_size += bv->bv_len; |
1269 | else { | 1272 | else { |
1270 | new_hw_segment: | 1273 | new_hw_segment: |
1271 | if (nr_hw_segs == 1 && | 1274 | if (nr_hw_segs == 1 && |
1272 | hw_seg_size > rq->bio->bi_hw_front_size) | 1275 | hw_seg_size > rq->bio->bi_hw_front_size) |
1273 | rq->bio->bi_hw_front_size = hw_seg_size; | 1276 | rq->bio->bi_hw_front_size = hw_seg_size; |
1274 | hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; | 1277 | hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; |
1275 | nr_hw_segs++; | 1278 | nr_hw_segs++; |
1276 | } | 1279 | } |
1277 | 1280 | ||
1278 | nr_phys_segs++; | 1281 | nr_phys_segs++; |
1279 | bvprv = bv; | 1282 | bvprv = bv; |
1280 | seg_size = bv->bv_len; | 1283 | seg_size = bv->bv_len; |
1281 | highprv = high; | 1284 | highprv = high; |
1282 | } | 1285 | } |
1283 | 1286 | ||
1284 | if (nr_hw_segs == 1 && | 1287 | if (nr_hw_segs == 1 && |
1285 | hw_seg_size > rq->bio->bi_hw_front_size) | 1288 | hw_seg_size > rq->bio->bi_hw_front_size) |
1286 | rq->bio->bi_hw_front_size = hw_seg_size; | 1289 | rq->bio->bi_hw_front_size = hw_seg_size; |
1287 | if (hw_seg_size > rq->biotail->bi_hw_back_size) | 1290 | if (hw_seg_size > rq->biotail->bi_hw_back_size) |
1288 | rq->biotail->bi_hw_back_size = hw_seg_size; | 1291 | rq->biotail->bi_hw_back_size = hw_seg_size; |
1289 | rq->nr_phys_segments = nr_phys_segs; | 1292 | rq->nr_phys_segments = nr_phys_segs; |
1290 | rq->nr_hw_segments = nr_hw_segs; | 1293 | rq->nr_hw_segments = nr_hw_segs; |
1291 | } | 1294 | } |
1292 | 1295 | ||
1293 | static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, | 1296 | static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, |
1294 | struct bio *nxt) | 1297 | struct bio *nxt) |
1295 | { | 1298 | { |
1296 | if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) | 1299 | if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) |
1297 | return 0; | 1300 | return 0; |
1298 | 1301 | ||
1299 | if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) | 1302 | if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) |
1300 | return 0; | 1303 | return 0; |
1301 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) | 1304 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) |
1302 | return 0; | 1305 | return 0; |
1303 | 1306 | ||
1304 | /* | 1307 | /* |
1305 | * bio and nxt are contigous in memory, check if the queue allows | 1308 | * bio and nxt are contigous in memory, check if the queue allows |
1306 | * these two to be merged into one | 1309 | * these two to be merged into one |
1307 | */ | 1310 | */ |
1308 | if (BIO_SEG_BOUNDARY(q, bio, nxt)) | 1311 | if (BIO_SEG_BOUNDARY(q, bio, nxt)) |
1309 | return 1; | 1312 | return 1; |
1310 | 1313 | ||
1311 | return 0; | 1314 | return 0; |
1312 | } | 1315 | } |
1313 | 1316 | ||
1314 | static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, | 1317 | static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, |
1315 | struct bio *nxt) | 1318 | struct bio *nxt) |
1316 | { | 1319 | { |
1317 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 1320 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
1318 | blk_recount_segments(q, bio); | 1321 | blk_recount_segments(q, bio); |
1319 | if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) | 1322 | if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) |
1320 | blk_recount_segments(q, nxt); | 1323 | blk_recount_segments(q, nxt); |
1321 | if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || | 1324 | if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || |
1322 | BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) | 1325 | BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) |
1323 | return 0; | 1326 | return 0; |
1324 | if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) | 1327 | if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) |
1325 | return 0; | 1328 | return 0; |
1326 | 1329 | ||
1327 | return 1; | 1330 | return 1; |
1328 | } | 1331 | } |
1329 | 1332 | ||
1330 | /* | 1333 | /* |
1331 | * map a request to scatterlist, return number of sg entries setup. Caller | 1334 | * map a request to scatterlist, return number of sg entries setup. Caller |
1332 | * must make sure sg can hold rq->nr_phys_segments entries | 1335 | * must make sure sg can hold rq->nr_phys_segments entries |
1333 | */ | 1336 | */ |
1334 | int blk_rq_map_sg(struct request_queue *q, struct request *rq, | 1337 | int blk_rq_map_sg(struct request_queue *q, struct request *rq, |
1335 | struct scatterlist *sg) | 1338 | struct scatterlist *sg) |
1336 | { | 1339 | { |
1337 | struct bio_vec *bvec, *bvprv; | 1340 | struct bio_vec *bvec, *bvprv; |
1338 | struct req_iterator iter; | 1341 | struct req_iterator iter; |
1339 | int nsegs, cluster; | 1342 | int nsegs, cluster; |
1340 | 1343 | ||
1341 | nsegs = 0; | 1344 | nsegs = 0; |
1342 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); | 1345 | cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); |
1343 | 1346 | ||
1344 | /* | 1347 | /* |
1345 | * for each bio in rq | 1348 | * for each bio in rq |
1346 | */ | 1349 | */ |
1347 | bvprv = NULL; | 1350 | bvprv = NULL; |
1348 | rq_for_each_segment(bvec, rq, iter) { | 1351 | rq_for_each_segment(bvec, rq, iter) { |
1349 | int nbytes = bvec->bv_len; | 1352 | int nbytes = bvec->bv_len; |
1350 | 1353 | ||
1351 | if (bvprv && cluster) { | 1354 | if (bvprv && cluster) { |
1352 | if (sg[nsegs - 1].length + nbytes > q->max_segment_size) | 1355 | if (sg[nsegs - 1].length + nbytes > q->max_segment_size) |
1353 | goto new_segment; | 1356 | goto new_segment; |
1354 | 1357 | ||
1355 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) | 1358 | if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) |
1356 | goto new_segment; | 1359 | goto new_segment; |
1357 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) | 1360 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) |
1358 | goto new_segment; | 1361 | goto new_segment; |
1359 | 1362 | ||
1360 | sg[nsegs - 1].length += nbytes; | 1363 | sg[nsegs - 1].length += nbytes; |
1361 | } else { | 1364 | } else { |
1362 | new_segment: | 1365 | new_segment: |
1363 | memset(&sg[nsegs],0,sizeof(struct scatterlist)); | 1366 | memset(&sg[nsegs],0,sizeof(struct scatterlist)); |
1364 | sg[nsegs].page = bvec->bv_page; | 1367 | sg[nsegs].page = bvec->bv_page; |
1365 | sg[nsegs].length = nbytes; | 1368 | sg[nsegs].length = nbytes; |
1366 | sg[nsegs].offset = bvec->bv_offset; | 1369 | sg[nsegs].offset = bvec->bv_offset; |
1367 | 1370 | ||
1368 | nsegs++; | 1371 | nsegs++; |
1369 | } | 1372 | } |
1370 | bvprv = bvec; | 1373 | bvprv = bvec; |
1371 | } /* segments in rq */ | 1374 | } /* segments in rq */ |
1372 | 1375 | ||
1373 | return nsegs; | 1376 | return nsegs; |
1374 | } | 1377 | } |
1375 | 1378 | ||
1376 | EXPORT_SYMBOL(blk_rq_map_sg); | 1379 | EXPORT_SYMBOL(blk_rq_map_sg); |
1377 | 1380 | ||
1378 | /* | 1381 | /* |
1379 | * the standard queue merge functions, can be overridden with device | 1382 | * the standard queue merge functions, can be overridden with device |
1380 | * specific ones if so desired | 1383 | * specific ones if so desired |
1381 | */ | 1384 | */ |
1382 | 1385 | ||
1383 | static inline int ll_new_mergeable(struct request_queue *q, | 1386 | static inline int ll_new_mergeable(struct request_queue *q, |
1384 | struct request *req, | 1387 | struct request *req, |
1385 | struct bio *bio) | 1388 | struct bio *bio) |
1386 | { | 1389 | { |
1387 | int nr_phys_segs = bio_phys_segments(q, bio); | 1390 | int nr_phys_segs = bio_phys_segments(q, bio); |
1388 | 1391 | ||
1389 | if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { | 1392 | if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { |
1390 | req->cmd_flags |= REQ_NOMERGE; | 1393 | req->cmd_flags |= REQ_NOMERGE; |
1391 | if (req == q->last_merge) | 1394 | if (req == q->last_merge) |
1392 | q->last_merge = NULL; | 1395 | q->last_merge = NULL; |
1393 | return 0; | 1396 | return 0; |
1394 | } | 1397 | } |
1395 | 1398 | ||
1396 | /* | 1399 | /* |
1397 | * A hw segment is just getting larger, bump just the phys | 1400 | * A hw segment is just getting larger, bump just the phys |
1398 | * counter. | 1401 | * counter. |
1399 | */ | 1402 | */ |
1400 | req->nr_phys_segments += nr_phys_segs; | 1403 | req->nr_phys_segments += nr_phys_segs; |
1401 | return 1; | 1404 | return 1; |
1402 | } | 1405 | } |
1403 | 1406 | ||
1404 | static inline int ll_new_hw_segment(struct request_queue *q, | 1407 | static inline int ll_new_hw_segment(struct request_queue *q, |
1405 | struct request *req, | 1408 | struct request *req, |
1406 | struct bio *bio) | 1409 | struct bio *bio) |
1407 | { | 1410 | { |
1408 | int nr_hw_segs = bio_hw_segments(q, bio); | 1411 | int nr_hw_segs = bio_hw_segments(q, bio); |
1409 | int nr_phys_segs = bio_phys_segments(q, bio); | 1412 | int nr_phys_segs = bio_phys_segments(q, bio); |
1410 | 1413 | ||
1411 | if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments | 1414 | if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments |
1412 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { | 1415 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { |
1413 | req->cmd_flags |= REQ_NOMERGE; | 1416 | req->cmd_flags |= REQ_NOMERGE; |
1414 | if (req == q->last_merge) | 1417 | if (req == q->last_merge) |
1415 | q->last_merge = NULL; | 1418 | q->last_merge = NULL; |
1416 | return 0; | 1419 | return 0; |
1417 | } | 1420 | } |
1418 | 1421 | ||
1419 | /* | 1422 | /* |
1420 | * This will form the start of a new hw segment. Bump both | 1423 | * This will form the start of a new hw segment. Bump both |
1421 | * counters. | 1424 | * counters. |
1422 | */ | 1425 | */ |
1423 | req->nr_hw_segments += nr_hw_segs; | 1426 | req->nr_hw_segments += nr_hw_segs; |
1424 | req->nr_phys_segments += nr_phys_segs; | 1427 | req->nr_phys_segments += nr_phys_segs; |
1425 | return 1; | 1428 | return 1; |
1426 | } | 1429 | } |
1427 | 1430 | ||
1428 | static int ll_back_merge_fn(struct request_queue *q, struct request *req, | 1431 | static int ll_back_merge_fn(struct request_queue *q, struct request *req, |
1429 | struct bio *bio) | 1432 | struct bio *bio) |
1430 | { | 1433 | { |
1431 | unsigned short max_sectors; | 1434 | unsigned short max_sectors; |
1432 | int len; | 1435 | int len; |
1433 | 1436 | ||
1434 | if (unlikely(blk_pc_request(req))) | 1437 | if (unlikely(blk_pc_request(req))) |
1435 | max_sectors = q->max_hw_sectors; | 1438 | max_sectors = q->max_hw_sectors; |
1436 | else | 1439 | else |
1437 | max_sectors = q->max_sectors; | 1440 | max_sectors = q->max_sectors; |
1438 | 1441 | ||
1439 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | 1442 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { |
1440 | req->cmd_flags |= REQ_NOMERGE; | 1443 | req->cmd_flags |= REQ_NOMERGE; |
1441 | if (req == q->last_merge) | 1444 | if (req == q->last_merge) |
1442 | q->last_merge = NULL; | 1445 | q->last_merge = NULL; |
1443 | return 0; | 1446 | return 0; |
1444 | } | 1447 | } |
1445 | if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) | 1448 | if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) |
1446 | blk_recount_segments(q, req->biotail); | 1449 | blk_recount_segments(q, req->biotail); |
1447 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 1450 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
1448 | blk_recount_segments(q, bio); | 1451 | blk_recount_segments(q, bio); |
1449 | len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; | 1452 | len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; |
1450 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && | 1453 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && |
1451 | !BIOVEC_VIRT_OVERSIZE(len)) { | 1454 | !BIOVEC_VIRT_OVERSIZE(len)) { |
1452 | int mergeable = ll_new_mergeable(q, req, bio); | 1455 | int mergeable = ll_new_mergeable(q, req, bio); |
1453 | 1456 | ||
1454 | if (mergeable) { | 1457 | if (mergeable) { |
1455 | if (req->nr_hw_segments == 1) | 1458 | if (req->nr_hw_segments == 1) |
1456 | req->bio->bi_hw_front_size = len; | 1459 | req->bio->bi_hw_front_size = len; |
1457 | if (bio->bi_hw_segments == 1) | 1460 | if (bio->bi_hw_segments == 1) |
1458 | bio->bi_hw_back_size = len; | 1461 | bio->bi_hw_back_size = len; |
1459 | } | 1462 | } |
1460 | return mergeable; | 1463 | return mergeable; |
1461 | } | 1464 | } |
1462 | 1465 | ||
1463 | return ll_new_hw_segment(q, req, bio); | 1466 | return ll_new_hw_segment(q, req, bio); |
1464 | } | 1467 | } |
1465 | 1468 | ||
1466 | static int ll_front_merge_fn(struct request_queue *q, struct request *req, | 1469 | static int ll_front_merge_fn(struct request_queue *q, struct request *req, |
1467 | struct bio *bio) | 1470 | struct bio *bio) |
1468 | { | 1471 | { |
1469 | unsigned short max_sectors; | 1472 | unsigned short max_sectors; |
1470 | int len; | 1473 | int len; |
1471 | 1474 | ||
1472 | if (unlikely(blk_pc_request(req))) | 1475 | if (unlikely(blk_pc_request(req))) |
1473 | max_sectors = q->max_hw_sectors; | 1476 | max_sectors = q->max_hw_sectors; |
1474 | else | 1477 | else |
1475 | max_sectors = q->max_sectors; | 1478 | max_sectors = q->max_sectors; |
1476 | 1479 | ||
1477 | 1480 | ||
1478 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | 1481 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { |
1479 | req->cmd_flags |= REQ_NOMERGE; | 1482 | req->cmd_flags |= REQ_NOMERGE; |
1480 | if (req == q->last_merge) | 1483 | if (req == q->last_merge) |
1481 | q->last_merge = NULL; | 1484 | q->last_merge = NULL; |
1482 | return 0; | 1485 | return 0; |
1483 | } | 1486 | } |
1484 | len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; | 1487 | len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; |
1485 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 1488 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
1486 | blk_recount_segments(q, bio); | 1489 | blk_recount_segments(q, bio); |
1487 | if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) | 1490 | if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) |
1488 | blk_recount_segments(q, req->bio); | 1491 | blk_recount_segments(q, req->bio); |
1489 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && | 1492 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && |
1490 | !BIOVEC_VIRT_OVERSIZE(len)) { | 1493 | !BIOVEC_VIRT_OVERSIZE(len)) { |
1491 | int mergeable = ll_new_mergeable(q, req, bio); | 1494 | int mergeable = ll_new_mergeable(q, req, bio); |
1492 | 1495 | ||
1493 | if (mergeable) { | 1496 | if (mergeable) { |
1494 | if (bio->bi_hw_segments == 1) | 1497 | if (bio->bi_hw_segments == 1) |
1495 | bio->bi_hw_front_size = len; | 1498 | bio->bi_hw_front_size = len; |
1496 | if (req->nr_hw_segments == 1) | 1499 | if (req->nr_hw_segments == 1) |
1497 | req->biotail->bi_hw_back_size = len; | 1500 | req->biotail->bi_hw_back_size = len; |
1498 | } | 1501 | } |
1499 | return mergeable; | 1502 | return mergeable; |
1500 | } | 1503 | } |
1501 | 1504 | ||
1502 | return ll_new_hw_segment(q, req, bio); | 1505 | return ll_new_hw_segment(q, req, bio); |
1503 | } | 1506 | } |
1504 | 1507 | ||
1505 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | 1508 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, |
1506 | struct request *next) | 1509 | struct request *next) |
1507 | { | 1510 | { |
1508 | int total_phys_segments; | 1511 | int total_phys_segments; |
1509 | int total_hw_segments; | 1512 | int total_hw_segments; |
1510 | 1513 | ||
1511 | /* | 1514 | /* |
1512 | * First check if the either of the requests are re-queued | 1515 | * First check if the either of the requests are re-queued |
1513 | * requests. Can't merge them if they are. | 1516 | * requests. Can't merge them if they are. |
1514 | */ | 1517 | */ |
1515 | if (req->special || next->special) | 1518 | if (req->special || next->special) |
1516 | return 0; | 1519 | return 0; |
1517 | 1520 | ||
1518 | /* | 1521 | /* |
1519 | * Will it become too large? | 1522 | * Will it become too large? |
1520 | */ | 1523 | */ |
1521 | if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) | 1524 | if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) |
1522 | return 0; | 1525 | return 0; |
1523 | 1526 | ||
1524 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; | 1527 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; |
1525 | if (blk_phys_contig_segment(q, req->biotail, next->bio)) | 1528 | if (blk_phys_contig_segment(q, req->biotail, next->bio)) |
1526 | total_phys_segments--; | 1529 | total_phys_segments--; |
1527 | 1530 | ||
1528 | if (total_phys_segments > q->max_phys_segments) | 1531 | if (total_phys_segments > q->max_phys_segments) |
1529 | return 0; | 1532 | return 0; |
1530 | 1533 | ||
1531 | total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; | 1534 | total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; |
1532 | if (blk_hw_contig_segment(q, req->biotail, next->bio)) { | 1535 | if (blk_hw_contig_segment(q, req->biotail, next->bio)) { |
1533 | int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; | 1536 | int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; |
1534 | /* | 1537 | /* |
1535 | * propagate the combined length to the end of the requests | 1538 | * propagate the combined length to the end of the requests |
1536 | */ | 1539 | */ |
1537 | if (req->nr_hw_segments == 1) | 1540 | if (req->nr_hw_segments == 1) |
1538 | req->bio->bi_hw_front_size = len; | 1541 | req->bio->bi_hw_front_size = len; |
1539 | if (next->nr_hw_segments == 1) | 1542 | if (next->nr_hw_segments == 1) |
1540 | next->biotail->bi_hw_back_size = len; | 1543 | next->biotail->bi_hw_back_size = len; |
1541 | total_hw_segments--; | 1544 | total_hw_segments--; |
1542 | } | 1545 | } |
1543 | 1546 | ||
1544 | if (total_hw_segments > q->max_hw_segments) | 1547 | if (total_hw_segments > q->max_hw_segments) |
1545 | return 0; | 1548 | return 0; |
1546 | 1549 | ||
1547 | /* Merge is OK... */ | 1550 | /* Merge is OK... */ |
1548 | req->nr_phys_segments = total_phys_segments; | 1551 | req->nr_phys_segments = total_phys_segments; |
1549 | req->nr_hw_segments = total_hw_segments; | 1552 | req->nr_hw_segments = total_hw_segments; |
1550 | return 1; | 1553 | return 1; |
1551 | } | 1554 | } |
1552 | 1555 | ||
1553 | /* | 1556 | /* |
1554 | * "plug" the device if there are no outstanding requests: this will | 1557 | * "plug" the device if there are no outstanding requests: this will |
1555 | * force the transfer to start only after we have put all the requests | 1558 | * force the transfer to start only after we have put all the requests |
1556 | * on the list. | 1559 | * on the list. |
1557 | * | 1560 | * |
1558 | * This is called with interrupts off and no requests on the queue and | 1561 | * This is called with interrupts off and no requests on the queue and |
1559 | * with the queue lock held. | 1562 | * with the queue lock held. |
1560 | */ | 1563 | */ |
1561 | void blk_plug_device(struct request_queue *q) | 1564 | void blk_plug_device(struct request_queue *q) |
1562 | { | 1565 | { |
1563 | WARN_ON(!irqs_disabled()); | 1566 | WARN_ON(!irqs_disabled()); |
1564 | 1567 | ||
1565 | /* | 1568 | /* |
1566 | * don't plug a stopped queue, it must be paired with blk_start_queue() | 1569 | * don't plug a stopped queue, it must be paired with blk_start_queue() |
1567 | * which will restart the queueing | 1570 | * which will restart the queueing |
1568 | */ | 1571 | */ |
1569 | if (blk_queue_stopped(q)) | 1572 | if (blk_queue_stopped(q)) |
1570 | return; | 1573 | return; |
1571 | 1574 | ||
1572 | if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { | 1575 | if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { |
1573 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); | 1576 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); |
1574 | blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); | 1577 | blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); |
1575 | } | 1578 | } |
1576 | } | 1579 | } |
1577 | 1580 | ||
1578 | EXPORT_SYMBOL(blk_plug_device); | 1581 | EXPORT_SYMBOL(blk_plug_device); |
1579 | 1582 | ||
1580 | /* | 1583 | /* |
1581 | * remove the queue from the plugged list, if present. called with | 1584 | * remove the queue from the plugged list, if present. called with |
1582 | * queue lock held and interrupts disabled. | 1585 | * queue lock held and interrupts disabled. |
1583 | */ | 1586 | */ |
1584 | int blk_remove_plug(struct request_queue *q) | 1587 | int blk_remove_plug(struct request_queue *q) |
1585 | { | 1588 | { |
1586 | WARN_ON(!irqs_disabled()); | 1589 | WARN_ON(!irqs_disabled()); |
1587 | 1590 | ||
1588 | if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) | 1591 | if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) |
1589 | return 0; | 1592 | return 0; |
1590 | 1593 | ||
1591 | del_timer(&q->unplug_timer); | 1594 | del_timer(&q->unplug_timer); |
1592 | return 1; | 1595 | return 1; |
1593 | } | 1596 | } |
1594 | 1597 | ||
1595 | EXPORT_SYMBOL(blk_remove_plug); | 1598 | EXPORT_SYMBOL(blk_remove_plug); |
1596 | 1599 | ||
1597 | /* | 1600 | /* |
1598 | * remove the plug and let it rip.. | 1601 | * remove the plug and let it rip.. |
1599 | */ | 1602 | */ |
1600 | void __generic_unplug_device(struct request_queue *q) | 1603 | void __generic_unplug_device(struct request_queue *q) |
1601 | { | 1604 | { |
1602 | if (unlikely(blk_queue_stopped(q))) | 1605 | if (unlikely(blk_queue_stopped(q))) |
1603 | return; | 1606 | return; |
1604 | 1607 | ||
1605 | if (!blk_remove_plug(q)) | 1608 | if (!blk_remove_plug(q)) |
1606 | return; | 1609 | return; |
1607 | 1610 | ||
1608 | q->request_fn(q); | 1611 | q->request_fn(q); |
1609 | } | 1612 | } |
1610 | EXPORT_SYMBOL(__generic_unplug_device); | 1613 | EXPORT_SYMBOL(__generic_unplug_device); |
1611 | 1614 | ||
1612 | /** | 1615 | /** |
1613 | * generic_unplug_device - fire a request queue | 1616 | * generic_unplug_device - fire a request queue |
1614 | * @q: The &struct request_queue in question | 1617 | * @q: The &struct request_queue in question |
1615 | * | 1618 | * |
1616 | * Description: | 1619 | * Description: |
1617 | * Linux uses plugging to build bigger requests queues before letting | 1620 | * Linux uses plugging to build bigger requests queues before letting |
1618 | * the device have at them. If a queue is plugged, the I/O scheduler | 1621 | * the device have at them. If a queue is plugged, the I/O scheduler |
1619 | * is still adding and merging requests on the queue. Once the queue | 1622 | * is still adding and merging requests on the queue. Once the queue |
1620 | * gets unplugged, the request_fn defined for the queue is invoked and | 1623 | * gets unplugged, the request_fn defined for the queue is invoked and |
1621 | * transfers started. | 1624 | * transfers started. |
1622 | **/ | 1625 | **/ |
1623 | void generic_unplug_device(struct request_queue *q) | 1626 | void generic_unplug_device(struct request_queue *q) |
1624 | { | 1627 | { |
1625 | spin_lock_irq(q->queue_lock); | 1628 | spin_lock_irq(q->queue_lock); |
1626 | __generic_unplug_device(q); | 1629 | __generic_unplug_device(q); |
1627 | spin_unlock_irq(q->queue_lock); | 1630 | spin_unlock_irq(q->queue_lock); |
1628 | } | 1631 | } |
1629 | EXPORT_SYMBOL(generic_unplug_device); | 1632 | EXPORT_SYMBOL(generic_unplug_device); |
1630 | 1633 | ||
1631 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, | 1634 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, |
1632 | struct page *page) | 1635 | struct page *page) |
1633 | { | 1636 | { |
1634 | struct request_queue *q = bdi->unplug_io_data; | 1637 | struct request_queue *q = bdi->unplug_io_data; |
1635 | 1638 | ||
1636 | /* | 1639 | /* |
1637 | * devices don't necessarily have an ->unplug_fn defined | 1640 | * devices don't necessarily have an ->unplug_fn defined |
1638 | */ | 1641 | */ |
1639 | if (q->unplug_fn) { | 1642 | if (q->unplug_fn) { |
1640 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, | 1643 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, |
1641 | q->rq.count[READ] + q->rq.count[WRITE]); | 1644 | q->rq.count[READ] + q->rq.count[WRITE]); |
1642 | 1645 | ||
1643 | q->unplug_fn(q); | 1646 | q->unplug_fn(q); |
1644 | } | 1647 | } |
1645 | } | 1648 | } |
1646 | 1649 | ||
1647 | static void blk_unplug_work(struct work_struct *work) | 1650 | static void blk_unplug_work(struct work_struct *work) |
1648 | { | 1651 | { |
1649 | struct request_queue *q = | 1652 | struct request_queue *q = |
1650 | container_of(work, struct request_queue, unplug_work); | 1653 | container_of(work, struct request_queue, unplug_work); |
1651 | 1654 | ||
1652 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, | 1655 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, |
1653 | q->rq.count[READ] + q->rq.count[WRITE]); | 1656 | q->rq.count[READ] + q->rq.count[WRITE]); |
1654 | 1657 | ||
1655 | q->unplug_fn(q); | 1658 | q->unplug_fn(q); |
1656 | } | 1659 | } |
1657 | 1660 | ||
1658 | static void blk_unplug_timeout(unsigned long data) | 1661 | static void blk_unplug_timeout(unsigned long data) |
1659 | { | 1662 | { |
1660 | struct request_queue *q = (struct request_queue *)data; | 1663 | struct request_queue *q = (struct request_queue *)data; |
1661 | 1664 | ||
1662 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, | 1665 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, |
1663 | q->rq.count[READ] + q->rq.count[WRITE]); | 1666 | q->rq.count[READ] + q->rq.count[WRITE]); |
1664 | 1667 | ||
1665 | kblockd_schedule_work(&q->unplug_work); | 1668 | kblockd_schedule_work(&q->unplug_work); |
1666 | } | 1669 | } |
1667 | 1670 | ||
1668 | /** | 1671 | /** |
1669 | * blk_start_queue - restart a previously stopped queue | 1672 | * blk_start_queue - restart a previously stopped queue |
1670 | * @q: The &struct request_queue in question | 1673 | * @q: The &struct request_queue in question |
1671 | * | 1674 | * |
1672 | * Description: | 1675 | * Description: |
1673 | * blk_start_queue() will clear the stop flag on the queue, and call | 1676 | * blk_start_queue() will clear the stop flag on the queue, and call |
1674 | * the request_fn for the queue if it was in a stopped state when | 1677 | * the request_fn for the queue if it was in a stopped state when |
1675 | * entered. Also see blk_stop_queue(). Queue lock must be held. | 1678 | * entered. Also see blk_stop_queue(). Queue lock must be held. |
1676 | **/ | 1679 | **/ |
1677 | void blk_start_queue(struct request_queue *q) | 1680 | void blk_start_queue(struct request_queue *q) |
1678 | { | 1681 | { |
1679 | WARN_ON(!irqs_disabled()); | 1682 | WARN_ON(!irqs_disabled()); |
1680 | 1683 | ||
1681 | clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); | 1684 | clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); |
1682 | 1685 | ||
1683 | /* | 1686 | /* |
1684 | * one level of recursion is ok and is much faster than kicking | 1687 | * one level of recursion is ok and is much faster than kicking |
1685 | * the unplug handling | 1688 | * the unplug handling |
1686 | */ | 1689 | */ |
1687 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { | 1690 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { |
1688 | q->request_fn(q); | 1691 | q->request_fn(q); |
1689 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); | 1692 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); |
1690 | } else { | 1693 | } else { |
1691 | blk_plug_device(q); | 1694 | blk_plug_device(q); |
1692 | kblockd_schedule_work(&q->unplug_work); | 1695 | kblockd_schedule_work(&q->unplug_work); |
1693 | } | 1696 | } |
1694 | } | 1697 | } |
1695 | 1698 | ||
1696 | EXPORT_SYMBOL(blk_start_queue); | 1699 | EXPORT_SYMBOL(blk_start_queue); |
1697 | 1700 | ||
1698 | /** | 1701 | /** |
1699 | * blk_stop_queue - stop a queue | 1702 | * blk_stop_queue - stop a queue |
1700 | * @q: The &struct request_queue in question | 1703 | * @q: The &struct request_queue in question |
1701 | * | 1704 | * |
1702 | * Description: | 1705 | * Description: |
1703 | * The Linux block layer assumes that a block driver will consume all | 1706 | * The Linux block layer assumes that a block driver will consume all |
1704 | * entries on the request queue when the request_fn strategy is called. | 1707 | * entries on the request queue when the request_fn strategy is called. |
1705 | * Often this will not happen, because of hardware limitations (queue | 1708 | * Often this will not happen, because of hardware limitations (queue |
1706 | * depth settings). If a device driver gets a 'queue full' response, | 1709 | * depth settings). If a device driver gets a 'queue full' response, |
1707 | * or if it simply chooses not to queue more I/O at one point, it can | 1710 | * or if it simply chooses not to queue more I/O at one point, it can |
1708 | * call this function to prevent the request_fn from being called until | 1711 | * call this function to prevent the request_fn from being called until |
1709 | * the driver has signalled it's ready to go again. This happens by calling | 1712 | * the driver has signalled it's ready to go again. This happens by calling |
1710 | * blk_start_queue() to restart queue operations. Queue lock must be held. | 1713 | * blk_start_queue() to restart queue operations. Queue lock must be held. |
1711 | **/ | 1714 | **/ |
1712 | void blk_stop_queue(struct request_queue *q) | 1715 | void blk_stop_queue(struct request_queue *q) |
1713 | { | 1716 | { |
1714 | blk_remove_plug(q); | 1717 | blk_remove_plug(q); |
1715 | set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); | 1718 | set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); |
1716 | } | 1719 | } |
1717 | EXPORT_SYMBOL(blk_stop_queue); | 1720 | EXPORT_SYMBOL(blk_stop_queue); |
1718 | 1721 | ||
1719 | /** | 1722 | /** |
1720 | * blk_sync_queue - cancel any pending callbacks on a queue | 1723 | * blk_sync_queue - cancel any pending callbacks on a queue |
1721 | * @q: the queue | 1724 | * @q: the queue |
1722 | * | 1725 | * |
1723 | * Description: | 1726 | * Description: |
1724 | * The block layer may perform asynchronous callback activity | 1727 | * The block layer may perform asynchronous callback activity |
1725 | * on a queue, such as calling the unplug function after a timeout. | 1728 | * on a queue, such as calling the unplug function after a timeout. |
1726 | * A block device may call blk_sync_queue to ensure that any | 1729 | * A block device may call blk_sync_queue to ensure that any |
1727 | * such activity is cancelled, thus allowing it to release resources | 1730 | * such activity is cancelled, thus allowing it to release resources |
1728 | * that the callbacks might use. The caller must already have made sure | 1731 | * that the callbacks might use. The caller must already have made sure |
1729 | * that its ->make_request_fn will not re-add plugging prior to calling | 1732 | * that its ->make_request_fn will not re-add plugging prior to calling |
1730 | * this function. | 1733 | * this function. |
1731 | * | 1734 | * |
1732 | */ | 1735 | */ |
1733 | void blk_sync_queue(struct request_queue *q) | 1736 | void blk_sync_queue(struct request_queue *q) |
1734 | { | 1737 | { |
1735 | del_timer_sync(&q->unplug_timer); | 1738 | del_timer_sync(&q->unplug_timer); |
1736 | } | 1739 | } |
1737 | EXPORT_SYMBOL(blk_sync_queue); | 1740 | EXPORT_SYMBOL(blk_sync_queue); |
1738 | 1741 | ||
1739 | /** | 1742 | /** |
1740 | * blk_run_queue - run a single device queue | 1743 | * blk_run_queue - run a single device queue |
1741 | * @q: The queue to run | 1744 | * @q: The queue to run |
1742 | */ | 1745 | */ |
1743 | void blk_run_queue(struct request_queue *q) | 1746 | void blk_run_queue(struct request_queue *q) |
1744 | { | 1747 | { |
1745 | unsigned long flags; | 1748 | unsigned long flags; |
1746 | 1749 | ||
1747 | spin_lock_irqsave(q->queue_lock, flags); | 1750 | spin_lock_irqsave(q->queue_lock, flags); |
1748 | blk_remove_plug(q); | 1751 | blk_remove_plug(q); |
1749 | 1752 | ||
1750 | /* | 1753 | /* |
1751 | * Only recurse once to avoid overrunning the stack, let the unplug | 1754 | * Only recurse once to avoid overrunning the stack, let the unplug |
1752 | * handling reinvoke the handler shortly if we already got there. | 1755 | * handling reinvoke the handler shortly if we already got there. |
1753 | */ | 1756 | */ |
1754 | if (!elv_queue_empty(q)) { | 1757 | if (!elv_queue_empty(q)) { |
1755 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { | 1758 | if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { |
1756 | q->request_fn(q); | 1759 | q->request_fn(q); |
1757 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); | 1760 | clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); |
1758 | } else { | 1761 | } else { |
1759 | blk_plug_device(q); | 1762 | blk_plug_device(q); |
1760 | kblockd_schedule_work(&q->unplug_work); | 1763 | kblockd_schedule_work(&q->unplug_work); |
1761 | } | 1764 | } |
1762 | } | 1765 | } |
1763 | 1766 | ||
1764 | spin_unlock_irqrestore(q->queue_lock, flags); | 1767 | spin_unlock_irqrestore(q->queue_lock, flags); |
1765 | } | 1768 | } |
1766 | EXPORT_SYMBOL(blk_run_queue); | 1769 | EXPORT_SYMBOL(blk_run_queue); |
1767 | 1770 | ||
1768 | /** | 1771 | /** |
1769 | * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed | 1772 | * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed |
1770 | * @kobj: the kobj belonging of the request queue to be released | 1773 | * @kobj: the kobj belonging of the request queue to be released |
1771 | * | 1774 | * |
1772 | * Description: | 1775 | * Description: |
1773 | * blk_cleanup_queue is the pair to blk_init_queue() or | 1776 | * blk_cleanup_queue is the pair to blk_init_queue() or |
1774 | * blk_queue_make_request(). It should be called when a request queue is | 1777 | * blk_queue_make_request(). It should be called when a request queue is |
1775 | * being released; typically when a block device is being de-registered. | 1778 | * being released; typically when a block device is being de-registered. |
1776 | * Currently, its primary task it to free all the &struct request | 1779 | * Currently, its primary task it to free all the &struct request |
1777 | * structures that were allocated to the queue and the queue itself. | 1780 | * structures that were allocated to the queue and the queue itself. |
1778 | * | 1781 | * |
1779 | * Caveat: | 1782 | * Caveat: |
1780 | * Hopefully the low level driver will have finished any | 1783 | * Hopefully the low level driver will have finished any |
1781 | * outstanding requests first... | 1784 | * outstanding requests first... |
1782 | **/ | 1785 | **/ |
1783 | static void blk_release_queue(struct kobject *kobj) | 1786 | static void blk_release_queue(struct kobject *kobj) |
1784 | { | 1787 | { |
1785 | struct request_queue *q = | 1788 | struct request_queue *q = |
1786 | container_of(kobj, struct request_queue, kobj); | 1789 | container_of(kobj, struct request_queue, kobj); |
1787 | struct request_list *rl = &q->rq; | 1790 | struct request_list *rl = &q->rq; |
1788 | 1791 | ||
1789 | blk_sync_queue(q); | 1792 | blk_sync_queue(q); |
1790 | 1793 | ||
1791 | if (rl->rq_pool) | 1794 | if (rl->rq_pool) |
1792 | mempool_destroy(rl->rq_pool); | 1795 | mempool_destroy(rl->rq_pool); |
1793 | 1796 | ||
1794 | if (q->queue_tags) | 1797 | if (q->queue_tags) |
1795 | __blk_queue_free_tags(q); | 1798 | __blk_queue_free_tags(q); |
1796 | 1799 | ||
1797 | blk_trace_shutdown(q); | 1800 | blk_trace_shutdown(q); |
1798 | 1801 | ||
1799 | kmem_cache_free(requestq_cachep, q); | 1802 | kmem_cache_free(requestq_cachep, q); |
1800 | } | 1803 | } |
1801 | 1804 | ||
1802 | void blk_put_queue(struct request_queue *q) | 1805 | void blk_put_queue(struct request_queue *q) |
1803 | { | 1806 | { |
1804 | kobject_put(&q->kobj); | 1807 | kobject_put(&q->kobj); |
1805 | } | 1808 | } |
1806 | EXPORT_SYMBOL(blk_put_queue); | 1809 | EXPORT_SYMBOL(blk_put_queue); |
1807 | 1810 | ||
1808 | void blk_cleanup_queue(struct request_queue * q) | 1811 | void blk_cleanup_queue(struct request_queue * q) |
1809 | { | 1812 | { |
1810 | mutex_lock(&q->sysfs_lock); | 1813 | mutex_lock(&q->sysfs_lock); |
1811 | set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); | 1814 | set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); |
1812 | mutex_unlock(&q->sysfs_lock); | 1815 | mutex_unlock(&q->sysfs_lock); |
1813 | 1816 | ||
1814 | if (q->elevator) | 1817 | if (q->elevator) |
1815 | elevator_exit(q->elevator); | 1818 | elevator_exit(q->elevator); |
1816 | 1819 | ||
1817 | blk_put_queue(q); | 1820 | blk_put_queue(q); |
1818 | } | 1821 | } |
1819 | 1822 | ||
1820 | EXPORT_SYMBOL(blk_cleanup_queue); | 1823 | EXPORT_SYMBOL(blk_cleanup_queue); |
1821 | 1824 | ||
1822 | static int blk_init_free_list(struct request_queue *q) | 1825 | static int blk_init_free_list(struct request_queue *q) |
1823 | { | 1826 | { |
1824 | struct request_list *rl = &q->rq; | 1827 | struct request_list *rl = &q->rq; |
1825 | 1828 | ||
1826 | rl->count[READ] = rl->count[WRITE] = 0; | 1829 | rl->count[READ] = rl->count[WRITE] = 0; |
1827 | rl->starved[READ] = rl->starved[WRITE] = 0; | 1830 | rl->starved[READ] = rl->starved[WRITE] = 0; |
1828 | rl->elvpriv = 0; | 1831 | rl->elvpriv = 0; |
1829 | init_waitqueue_head(&rl->wait[READ]); | 1832 | init_waitqueue_head(&rl->wait[READ]); |
1830 | init_waitqueue_head(&rl->wait[WRITE]); | 1833 | init_waitqueue_head(&rl->wait[WRITE]); |
1831 | 1834 | ||
1832 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 1835 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
1833 | mempool_free_slab, request_cachep, q->node); | 1836 | mempool_free_slab, request_cachep, q->node); |
1834 | 1837 | ||
1835 | if (!rl->rq_pool) | 1838 | if (!rl->rq_pool) |
1836 | return -ENOMEM; | 1839 | return -ENOMEM; |
1837 | 1840 | ||
1838 | return 0; | 1841 | return 0; |
1839 | } | 1842 | } |
1840 | 1843 | ||
1841 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) | 1844 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) |
1842 | { | 1845 | { |
1843 | return blk_alloc_queue_node(gfp_mask, -1); | 1846 | return blk_alloc_queue_node(gfp_mask, -1); |
1844 | } | 1847 | } |
1845 | EXPORT_SYMBOL(blk_alloc_queue); | 1848 | EXPORT_SYMBOL(blk_alloc_queue); |
1846 | 1849 | ||
1847 | static struct kobj_type queue_ktype; | 1850 | static struct kobj_type queue_ktype; |
1848 | 1851 | ||
1849 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | 1852 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) |
1850 | { | 1853 | { |
1851 | struct request_queue *q; | 1854 | struct request_queue *q; |
1852 | 1855 | ||
1853 | q = kmem_cache_alloc_node(requestq_cachep, | 1856 | q = kmem_cache_alloc_node(requestq_cachep, |
1854 | gfp_mask | __GFP_ZERO, node_id); | 1857 | gfp_mask | __GFP_ZERO, node_id); |
1855 | if (!q) | 1858 | if (!q) |
1856 | return NULL; | 1859 | return NULL; |
1857 | 1860 | ||
1858 | init_timer(&q->unplug_timer); | 1861 | init_timer(&q->unplug_timer); |
1859 | 1862 | ||
1860 | kobject_set_name(&q->kobj, "%s", "queue"); | 1863 | kobject_set_name(&q->kobj, "%s", "queue"); |
1861 | q->kobj.ktype = &queue_ktype; | 1864 | q->kobj.ktype = &queue_ktype; |
1862 | kobject_init(&q->kobj); | 1865 | kobject_init(&q->kobj); |
1863 | 1866 | ||
1864 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; | 1867 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; |
1865 | q->backing_dev_info.unplug_io_data = q; | 1868 | q->backing_dev_info.unplug_io_data = q; |
1866 | 1869 | ||
1867 | mutex_init(&q->sysfs_lock); | 1870 | mutex_init(&q->sysfs_lock); |
1868 | 1871 | ||
1869 | return q; | 1872 | return q; |
1870 | } | 1873 | } |
1871 | EXPORT_SYMBOL(blk_alloc_queue_node); | 1874 | EXPORT_SYMBOL(blk_alloc_queue_node); |
1872 | 1875 | ||
1873 | /** | 1876 | /** |
1874 | * blk_init_queue - prepare a request queue for use with a block device | 1877 | * blk_init_queue - prepare a request queue for use with a block device |
1875 | * @rfn: The function to be called to process requests that have been | 1878 | * @rfn: The function to be called to process requests that have been |
1876 | * placed on the queue. | 1879 | * placed on the queue. |
1877 | * @lock: Request queue spin lock | 1880 | * @lock: Request queue spin lock |
1878 | * | 1881 | * |
1879 | * Description: | 1882 | * Description: |
1880 | * If a block device wishes to use the standard request handling procedures, | 1883 | * If a block device wishes to use the standard request handling procedures, |
1881 | * which sorts requests and coalesces adjacent requests, then it must | 1884 | * which sorts requests and coalesces adjacent requests, then it must |
1882 | * call blk_init_queue(). The function @rfn will be called when there | 1885 | * call blk_init_queue(). The function @rfn will be called when there |
1883 | * are requests on the queue that need to be processed. If the device | 1886 | * are requests on the queue that need to be processed. If the device |
1884 | * supports plugging, then @rfn may not be called immediately when requests | 1887 | * supports plugging, then @rfn may not be called immediately when requests |
1885 | * are available on the queue, but may be called at some time later instead. | 1888 | * are available on the queue, but may be called at some time later instead. |
1886 | * Plugged queues are generally unplugged when a buffer belonging to one | 1889 | * Plugged queues are generally unplugged when a buffer belonging to one |
1887 | * of the requests on the queue is needed, or due to memory pressure. | 1890 | * of the requests on the queue is needed, or due to memory pressure. |
1888 | * | 1891 | * |
1889 | * @rfn is not required, or even expected, to remove all requests off the | 1892 | * @rfn is not required, or even expected, to remove all requests off the |
1890 | * queue, but only as many as it can handle at a time. If it does leave | 1893 | * queue, but only as many as it can handle at a time. If it does leave |
1891 | * requests on the queue, it is responsible for arranging that the requests | 1894 | * requests on the queue, it is responsible for arranging that the requests |
1892 | * get dealt with eventually. | 1895 | * get dealt with eventually. |
1893 | * | 1896 | * |
1894 | * The queue spin lock must be held while manipulating the requests on the | 1897 | * The queue spin lock must be held while manipulating the requests on the |
1895 | * request queue; this lock will be taken also from interrupt context, so irq | 1898 | * request queue; this lock will be taken also from interrupt context, so irq |
1896 | * disabling is needed for it. | 1899 | * disabling is needed for it. |
1897 | * | 1900 | * |
1898 | * Function returns a pointer to the initialized request queue, or NULL if | 1901 | * Function returns a pointer to the initialized request queue, or NULL if |
1899 | * it didn't succeed. | 1902 | * it didn't succeed. |
1900 | * | 1903 | * |
1901 | * Note: | 1904 | * Note: |
1902 | * blk_init_queue() must be paired with a blk_cleanup_queue() call | 1905 | * blk_init_queue() must be paired with a blk_cleanup_queue() call |
1903 | * when the block device is deactivated (such as at module unload). | 1906 | * when the block device is deactivated (such as at module unload). |
1904 | **/ | 1907 | **/ |
1905 | 1908 | ||
1906 | struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) | 1909 | struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) |
1907 | { | 1910 | { |
1908 | return blk_init_queue_node(rfn, lock, -1); | 1911 | return blk_init_queue_node(rfn, lock, -1); |
1909 | } | 1912 | } |
1910 | EXPORT_SYMBOL(blk_init_queue); | 1913 | EXPORT_SYMBOL(blk_init_queue); |
1911 | 1914 | ||
1912 | struct request_queue * | 1915 | struct request_queue * |
1913 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | 1916 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) |
1914 | { | 1917 | { |
1915 | struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); | 1918 | struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); |
1916 | 1919 | ||
1917 | if (!q) | 1920 | if (!q) |
1918 | return NULL; | 1921 | return NULL; |
1919 | 1922 | ||
1920 | q->node = node_id; | 1923 | q->node = node_id; |
1921 | if (blk_init_free_list(q)) { | 1924 | if (blk_init_free_list(q)) { |
1922 | kmem_cache_free(requestq_cachep, q); | 1925 | kmem_cache_free(requestq_cachep, q); |
1923 | return NULL; | 1926 | return NULL; |
1924 | } | 1927 | } |
1925 | 1928 | ||
1926 | /* | 1929 | /* |
1927 | * if caller didn't supply a lock, they get per-queue locking with | 1930 | * if caller didn't supply a lock, they get per-queue locking with |
1928 | * our embedded lock | 1931 | * our embedded lock |
1929 | */ | 1932 | */ |
1930 | if (!lock) { | 1933 | if (!lock) { |
1931 | spin_lock_init(&q->__queue_lock); | 1934 | spin_lock_init(&q->__queue_lock); |
1932 | lock = &q->__queue_lock; | 1935 | lock = &q->__queue_lock; |
1933 | } | 1936 | } |
1934 | 1937 | ||
1935 | q->request_fn = rfn; | 1938 | q->request_fn = rfn; |
1936 | q->prep_rq_fn = NULL; | 1939 | q->prep_rq_fn = NULL; |
1937 | q->unplug_fn = generic_unplug_device; | 1940 | q->unplug_fn = generic_unplug_device; |
1938 | q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); | 1941 | q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); |
1939 | q->queue_lock = lock; | 1942 | q->queue_lock = lock; |
1940 | 1943 | ||
1941 | blk_queue_segment_boundary(q, 0xffffffff); | 1944 | blk_queue_segment_boundary(q, 0xffffffff); |
1942 | 1945 | ||
1943 | blk_queue_make_request(q, __make_request); | 1946 | blk_queue_make_request(q, __make_request); |
1944 | blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); | 1947 | blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); |
1945 | 1948 | ||
1946 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); | 1949 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); |
1947 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); | 1950 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); |
1948 | 1951 | ||
1949 | q->sg_reserved_size = INT_MAX; | 1952 | q->sg_reserved_size = INT_MAX; |
1950 | 1953 | ||
1951 | /* | 1954 | /* |
1952 | * all done | 1955 | * all done |
1953 | */ | 1956 | */ |
1954 | if (!elevator_init(q, NULL)) { | 1957 | if (!elevator_init(q, NULL)) { |
1955 | blk_queue_congestion_threshold(q); | 1958 | blk_queue_congestion_threshold(q); |
1956 | return q; | 1959 | return q; |
1957 | } | 1960 | } |
1958 | 1961 | ||
1959 | blk_put_queue(q); | 1962 | blk_put_queue(q); |
1960 | return NULL; | 1963 | return NULL; |
1961 | } | 1964 | } |
1962 | EXPORT_SYMBOL(blk_init_queue_node); | 1965 | EXPORT_SYMBOL(blk_init_queue_node); |
1963 | 1966 | ||
1964 | int blk_get_queue(struct request_queue *q) | 1967 | int blk_get_queue(struct request_queue *q) |
1965 | { | 1968 | { |
1966 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | 1969 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { |
1967 | kobject_get(&q->kobj); | 1970 | kobject_get(&q->kobj); |
1968 | return 0; | 1971 | return 0; |
1969 | } | 1972 | } |
1970 | 1973 | ||
1971 | return 1; | 1974 | return 1; |
1972 | } | 1975 | } |
1973 | 1976 | ||
1974 | EXPORT_SYMBOL(blk_get_queue); | 1977 | EXPORT_SYMBOL(blk_get_queue); |
1975 | 1978 | ||
1976 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 1979 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
1977 | { | 1980 | { |
1978 | if (rq->cmd_flags & REQ_ELVPRIV) | 1981 | if (rq->cmd_flags & REQ_ELVPRIV) |
1979 | elv_put_request(q, rq); | 1982 | elv_put_request(q, rq); |
1980 | mempool_free(rq, q->rq.rq_pool); | 1983 | mempool_free(rq, q->rq.rq_pool); |
1981 | } | 1984 | } |
1982 | 1985 | ||
1983 | static struct request * | 1986 | static struct request * |
1984 | blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) | 1987 | blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) |
1985 | { | 1988 | { |
1986 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 1989 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
1987 | 1990 | ||
1988 | if (!rq) | 1991 | if (!rq) |
1989 | return NULL; | 1992 | return NULL; |
1990 | 1993 | ||
1991 | /* | 1994 | /* |
1992 | * first three bits are identical in rq->cmd_flags and bio->bi_rw, | 1995 | * first three bits are identical in rq->cmd_flags and bio->bi_rw, |
1993 | * see bio.h and blkdev.h | 1996 | * see bio.h and blkdev.h |
1994 | */ | 1997 | */ |
1995 | rq->cmd_flags = rw | REQ_ALLOCED; | 1998 | rq->cmd_flags = rw | REQ_ALLOCED; |
1996 | 1999 | ||
1997 | if (priv) { | 2000 | if (priv) { |
1998 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { | 2001 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { |
1999 | mempool_free(rq, q->rq.rq_pool); | 2002 | mempool_free(rq, q->rq.rq_pool); |
2000 | return NULL; | 2003 | return NULL; |
2001 | } | 2004 | } |
2002 | rq->cmd_flags |= REQ_ELVPRIV; | 2005 | rq->cmd_flags |= REQ_ELVPRIV; |
2003 | } | 2006 | } |
2004 | 2007 | ||
2005 | return rq; | 2008 | return rq; |
2006 | } | 2009 | } |
2007 | 2010 | ||
2008 | /* | 2011 | /* |
2009 | * ioc_batching returns true if the ioc is a valid batching request and | 2012 | * ioc_batching returns true if the ioc is a valid batching request and |
2010 | * should be given priority access to a request. | 2013 | * should be given priority access to a request. |
2011 | */ | 2014 | */ |
2012 | static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) | 2015 | static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) |
2013 | { | 2016 | { |
2014 | if (!ioc) | 2017 | if (!ioc) |
2015 | return 0; | 2018 | return 0; |
2016 | 2019 | ||
2017 | /* | 2020 | /* |
2018 | * Make sure the process is able to allocate at least 1 request | 2021 | * Make sure the process is able to allocate at least 1 request |
2019 | * even if the batch times out, otherwise we could theoretically | 2022 | * even if the batch times out, otherwise we could theoretically |
2020 | * lose wakeups. | 2023 | * lose wakeups. |
2021 | */ | 2024 | */ |
2022 | return ioc->nr_batch_requests == q->nr_batching || | 2025 | return ioc->nr_batch_requests == q->nr_batching || |
2023 | (ioc->nr_batch_requests > 0 | 2026 | (ioc->nr_batch_requests > 0 |
2024 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); | 2027 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); |
2025 | } | 2028 | } |
2026 | 2029 | ||
2027 | /* | 2030 | /* |
2028 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This | 2031 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This |
2029 | * will cause the process to be a "batcher" on all queues in the system. This | 2032 | * will cause the process to be a "batcher" on all queues in the system. This |
2030 | * is the behaviour we want though - once it gets a wakeup it should be given | 2033 | * is the behaviour we want though - once it gets a wakeup it should be given |
2031 | * a nice run. | 2034 | * a nice run. |
2032 | */ | 2035 | */ |
2033 | static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) | 2036 | static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) |
2034 | { | 2037 | { |
2035 | if (!ioc || ioc_batching(q, ioc)) | 2038 | if (!ioc || ioc_batching(q, ioc)) |
2036 | return; | 2039 | return; |
2037 | 2040 | ||
2038 | ioc->nr_batch_requests = q->nr_batching; | 2041 | ioc->nr_batch_requests = q->nr_batching; |
2039 | ioc->last_waited = jiffies; | 2042 | ioc->last_waited = jiffies; |
2040 | } | 2043 | } |
2041 | 2044 | ||
2042 | static void __freed_request(struct request_queue *q, int rw) | 2045 | static void __freed_request(struct request_queue *q, int rw) |
2043 | { | 2046 | { |
2044 | struct request_list *rl = &q->rq; | 2047 | struct request_list *rl = &q->rq; |
2045 | 2048 | ||
2046 | if (rl->count[rw] < queue_congestion_off_threshold(q)) | 2049 | if (rl->count[rw] < queue_congestion_off_threshold(q)) |
2047 | blk_clear_queue_congested(q, rw); | 2050 | blk_clear_queue_congested(q, rw); |
2048 | 2051 | ||
2049 | if (rl->count[rw] + 1 <= q->nr_requests) { | 2052 | if (rl->count[rw] + 1 <= q->nr_requests) { |
2050 | if (waitqueue_active(&rl->wait[rw])) | 2053 | if (waitqueue_active(&rl->wait[rw])) |
2051 | wake_up(&rl->wait[rw]); | 2054 | wake_up(&rl->wait[rw]); |
2052 | 2055 | ||
2053 | blk_clear_queue_full(q, rw); | 2056 | blk_clear_queue_full(q, rw); |
2054 | } | 2057 | } |
2055 | } | 2058 | } |
2056 | 2059 | ||
2057 | /* | 2060 | /* |
2058 | * A request has just been released. Account for it, update the full and | 2061 | * A request has just been released. Account for it, update the full and |
2059 | * congestion status, wake up any waiters. Called under q->queue_lock. | 2062 | * congestion status, wake up any waiters. Called under q->queue_lock. |
2060 | */ | 2063 | */ |
2061 | static void freed_request(struct request_queue *q, int rw, int priv) | 2064 | static void freed_request(struct request_queue *q, int rw, int priv) |
2062 | { | 2065 | { |
2063 | struct request_list *rl = &q->rq; | 2066 | struct request_list *rl = &q->rq; |
2064 | 2067 | ||
2065 | rl->count[rw]--; | 2068 | rl->count[rw]--; |
2066 | if (priv) | 2069 | if (priv) |
2067 | rl->elvpriv--; | 2070 | rl->elvpriv--; |
2068 | 2071 | ||
2069 | __freed_request(q, rw); | 2072 | __freed_request(q, rw); |
2070 | 2073 | ||
2071 | if (unlikely(rl->starved[rw ^ 1])) | 2074 | if (unlikely(rl->starved[rw ^ 1])) |
2072 | __freed_request(q, rw ^ 1); | 2075 | __freed_request(q, rw ^ 1); |
2073 | } | 2076 | } |
2074 | 2077 | ||
2075 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) | 2078 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) |
2076 | /* | 2079 | /* |
2077 | * Get a free request, queue_lock must be held. | 2080 | * Get a free request, queue_lock must be held. |
2078 | * Returns NULL on failure, with queue_lock held. | 2081 | * Returns NULL on failure, with queue_lock held. |
2079 | * Returns !NULL on success, with queue_lock *not held*. | 2082 | * Returns !NULL on success, with queue_lock *not held*. |
2080 | */ | 2083 | */ |
2081 | static struct request *get_request(struct request_queue *q, int rw_flags, | 2084 | static struct request *get_request(struct request_queue *q, int rw_flags, |
2082 | struct bio *bio, gfp_t gfp_mask) | 2085 | struct bio *bio, gfp_t gfp_mask) |
2083 | { | 2086 | { |
2084 | struct request *rq = NULL; | 2087 | struct request *rq = NULL; |
2085 | struct request_list *rl = &q->rq; | 2088 | struct request_list *rl = &q->rq; |
2086 | struct io_context *ioc = NULL; | 2089 | struct io_context *ioc = NULL; |
2087 | const int rw = rw_flags & 0x01; | 2090 | const int rw = rw_flags & 0x01; |
2088 | int may_queue, priv; | 2091 | int may_queue, priv; |
2089 | 2092 | ||
2090 | may_queue = elv_may_queue(q, rw_flags); | 2093 | may_queue = elv_may_queue(q, rw_flags); |
2091 | if (may_queue == ELV_MQUEUE_NO) | 2094 | if (may_queue == ELV_MQUEUE_NO) |
2092 | goto rq_starved; | 2095 | goto rq_starved; |
2093 | 2096 | ||
2094 | if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { | 2097 | if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { |
2095 | if (rl->count[rw]+1 >= q->nr_requests) { | 2098 | if (rl->count[rw]+1 >= q->nr_requests) { |
2096 | ioc = current_io_context(GFP_ATOMIC, q->node); | 2099 | ioc = current_io_context(GFP_ATOMIC, q->node); |
2097 | /* | 2100 | /* |
2098 | * The queue will fill after this allocation, so set | 2101 | * The queue will fill after this allocation, so set |
2099 | * it as full, and mark this process as "batching". | 2102 | * it as full, and mark this process as "batching". |
2100 | * This process will be allowed to complete a batch of | 2103 | * This process will be allowed to complete a batch of |
2101 | * requests, others will be blocked. | 2104 | * requests, others will be blocked. |
2102 | */ | 2105 | */ |
2103 | if (!blk_queue_full(q, rw)) { | 2106 | if (!blk_queue_full(q, rw)) { |
2104 | ioc_set_batching(q, ioc); | 2107 | ioc_set_batching(q, ioc); |
2105 | blk_set_queue_full(q, rw); | 2108 | blk_set_queue_full(q, rw); |
2106 | } else { | 2109 | } else { |
2107 | if (may_queue != ELV_MQUEUE_MUST | 2110 | if (may_queue != ELV_MQUEUE_MUST |
2108 | && !ioc_batching(q, ioc)) { | 2111 | && !ioc_batching(q, ioc)) { |
2109 | /* | 2112 | /* |
2110 | * The queue is full and the allocating | 2113 | * The queue is full and the allocating |
2111 | * process is not a "batcher", and not | 2114 | * process is not a "batcher", and not |
2112 | * exempted by the IO scheduler | 2115 | * exempted by the IO scheduler |
2113 | */ | 2116 | */ |
2114 | goto out; | 2117 | goto out; |
2115 | } | 2118 | } |
2116 | } | 2119 | } |
2117 | } | 2120 | } |
2118 | blk_set_queue_congested(q, rw); | 2121 | blk_set_queue_congested(q, rw); |
2119 | } | 2122 | } |
2120 | 2123 | ||
2121 | /* | 2124 | /* |
2122 | * Only allow batching queuers to allocate up to 50% over the defined | 2125 | * Only allow batching queuers to allocate up to 50% over the defined |
2123 | * limit of requests, otherwise we could have thousands of requests | 2126 | * limit of requests, otherwise we could have thousands of requests |
2124 | * allocated with any setting of ->nr_requests | 2127 | * allocated with any setting of ->nr_requests |
2125 | */ | 2128 | */ |
2126 | if (rl->count[rw] >= (3 * q->nr_requests / 2)) | 2129 | if (rl->count[rw] >= (3 * q->nr_requests / 2)) |
2127 | goto out; | 2130 | goto out; |
2128 | 2131 | ||
2129 | rl->count[rw]++; | 2132 | rl->count[rw]++; |
2130 | rl->starved[rw] = 0; | 2133 | rl->starved[rw] = 0; |
2131 | 2134 | ||
2132 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 2135 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
2133 | if (priv) | 2136 | if (priv) |
2134 | rl->elvpriv++; | 2137 | rl->elvpriv++; |
2135 | 2138 | ||
2136 | spin_unlock_irq(q->queue_lock); | 2139 | spin_unlock_irq(q->queue_lock); |
2137 | 2140 | ||
2138 | rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); | 2141 | rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); |
2139 | if (unlikely(!rq)) { | 2142 | if (unlikely(!rq)) { |
2140 | /* | 2143 | /* |
2141 | * Allocation failed presumably due to memory. Undo anything | 2144 | * Allocation failed presumably due to memory. Undo anything |
2142 | * we might have messed up. | 2145 | * we might have messed up. |
2143 | * | 2146 | * |
2144 | * Allocating task should really be put onto the front of the | 2147 | * Allocating task should really be put onto the front of the |
2145 | * wait queue, but this is pretty rare. | 2148 | * wait queue, but this is pretty rare. |
2146 | */ | 2149 | */ |
2147 | spin_lock_irq(q->queue_lock); | 2150 | spin_lock_irq(q->queue_lock); |
2148 | freed_request(q, rw, priv); | 2151 | freed_request(q, rw, priv); |
2149 | 2152 | ||
2150 | /* | 2153 | /* |
2151 | * in the very unlikely event that allocation failed and no | 2154 | * in the very unlikely event that allocation failed and no |
2152 | * requests for this direction was pending, mark us starved | 2155 | * requests for this direction was pending, mark us starved |
2153 | * so that freeing of a request in the other direction will | 2156 | * so that freeing of a request in the other direction will |
2154 | * notice us. another possible fix would be to split the | 2157 | * notice us. another possible fix would be to split the |
2155 | * rq mempool into READ and WRITE | 2158 | * rq mempool into READ and WRITE |
2156 | */ | 2159 | */ |
2157 | rq_starved: | 2160 | rq_starved: |
2158 | if (unlikely(rl->count[rw] == 0)) | 2161 | if (unlikely(rl->count[rw] == 0)) |
2159 | rl->starved[rw] = 1; | 2162 | rl->starved[rw] = 1; |
2160 | 2163 | ||
2161 | goto out; | 2164 | goto out; |
2162 | } | 2165 | } |
2163 | 2166 | ||
2164 | /* | 2167 | /* |
2165 | * ioc may be NULL here, and ioc_batching will be false. That's | 2168 | * ioc may be NULL here, and ioc_batching will be false. That's |
2166 | * OK, if the queue is under the request limit then requests need | 2169 | * OK, if the queue is under the request limit then requests need |
2167 | * not count toward the nr_batch_requests limit. There will always | 2170 | * not count toward the nr_batch_requests limit. There will always |
2168 | * be some limit enforced by BLK_BATCH_TIME. | 2171 | * be some limit enforced by BLK_BATCH_TIME. |
2169 | */ | 2172 | */ |
2170 | if (ioc_batching(q, ioc)) | 2173 | if (ioc_batching(q, ioc)) |
2171 | ioc->nr_batch_requests--; | 2174 | ioc->nr_batch_requests--; |
2172 | 2175 | ||
2173 | rq_init(q, rq); | 2176 | rq_init(q, rq); |
2174 | 2177 | ||
2175 | blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); | 2178 | blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); |
2176 | out: | 2179 | out: |
2177 | return rq; | 2180 | return rq; |
2178 | } | 2181 | } |
2179 | 2182 | ||
2180 | /* | 2183 | /* |
2181 | * No available requests for this queue, unplug the device and wait for some | 2184 | * No available requests for this queue, unplug the device and wait for some |
2182 | * requests to become available. | 2185 | * requests to become available. |
2183 | * | 2186 | * |
2184 | * Called with q->queue_lock held, and returns with it unlocked. | 2187 | * Called with q->queue_lock held, and returns with it unlocked. |
2185 | */ | 2188 | */ |
2186 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, | 2189 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, |
2187 | struct bio *bio) | 2190 | struct bio *bio) |
2188 | { | 2191 | { |
2189 | const int rw = rw_flags & 0x01; | 2192 | const int rw = rw_flags & 0x01; |
2190 | struct request *rq; | 2193 | struct request *rq; |
2191 | 2194 | ||
2192 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 2195 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
2193 | while (!rq) { | 2196 | while (!rq) { |
2194 | DEFINE_WAIT(wait); | 2197 | DEFINE_WAIT(wait); |
2195 | struct request_list *rl = &q->rq; | 2198 | struct request_list *rl = &q->rq; |
2196 | 2199 | ||
2197 | prepare_to_wait_exclusive(&rl->wait[rw], &wait, | 2200 | prepare_to_wait_exclusive(&rl->wait[rw], &wait, |
2198 | TASK_UNINTERRUPTIBLE); | 2201 | TASK_UNINTERRUPTIBLE); |
2199 | 2202 | ||
2200 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 2203 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
2201 | 2204 | ||
2202 | if (!rq) { | 2205 | if (!rq) { |
2203 | struct io_context *ioc; | 2206 | struct io_context *ioc; |
2204 | 2207 | ||
2205 | blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); | 2208 | blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); |
2206 | 2209 | ||
2207 | __generic_unplug_device(q); | 2210 | __generic_unplug_device(q); |
2208 | spin_unlock_irq(q->queue_lock); | 2211 | spin_unlock_irq(q->queue_lock); |
2209 | io_schedule(); | 2212 | io_schedule(); |
2210 | 2213 | ||
2211 | /* | 2214 | /* |
2212 | * After sleeping, we become a "batching" process and | 2215 | * After sleeping, we become a "batching" process and |
2213 | * will be able to allocate at least one request, and | 2216 | * will be able to allocate at least one request, and |
2214 | * up to a big batch of them for a small period time. | 2217 | * up to a big batch of them for a small period time. |
2215 | * See ioc_batching, ioc_set_batching | 2218 | * See ioc_batching, ioc_set_batching |
2216 | */ | 2219 | */ |
2217 | ioc = current_io_context(GFP_NOIO, q->node); | 2220 | ioc = current_io_context(GFP_NOIO, q->node); |
2218 | ioc_set_batching(q, ioc); | 2221 | ioc_set_batching(q, ioc); |
2219 | 2222 | ||
2220 | spin_lock_irq(q->queue_lock); | 2223 | spin_lock_irq(q->queue_lock); |
2221 | } | 2224 | } |
2222 | finish_wait(&rl->wait[rw], &wait); | 2225 | finish_wait(&rl->wait[rw], &wait); |
2223 | } | 2226 | } |
2224 | 2227 | ||
2225 | return rq; | 2228 | return rq; |
2226 | } | 2229 | } |
2227 | 2230 | ||
2228 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 2231 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
2229 | { | 2232 | { |
2230 | struct request *rq; | 2233 | struct request *rq; |
2231 | 2234 | ||
2232 | BUG_ON(rw != READ && rw != WRITE); | 2235 | BUG_ON(rw != READ && rw != WRITE); |
2233 | 2236 | ||
2234 | spin_lock_irq(q->queue_lock); | 2237 | spin_lock_irq(q->queue_lock); |
2235 | if (gfp_mask & __GFP_WAIT) { | 2238 | if (gfp_mask & __GFP_WAIT) { |
2236 | rq = get_request_wait(q, rw, NULL); | 2239 | rq = get_request_wait(q, rw, NULL); |
2237 | } else { | 2240 | } else { |
2238 | rq = get_request(q, rw, NULL, gfp_mask); | 2241 | rq = get_request(q, rw, NULL, gfp_mask); |
2239 | if (!rq) | 2242 | if (!rq) |
2240 | spin_unlock_irq(q->queue_lock); | 2243 | spin_unlock_irq(q->queue_lock); |
2241 | } | 2244 | } |
2242 | /* q->queue_lock is unlocked at this point */ | 2245 | /* q->queue_lock is unlocked at this point */ |
2243 | 2246 | ||
2244 | return rq; | 2247 | return rq; |
2245 | } | 2248 | } |
2246 | EXPORT_SYMBOL(blk_get_request); | 2249 | EXPORT_SYMBOL(blk_get_request); |
2247 | 2250 | ||
2248 | /** | 2251 | /** |
2249 | * blk_start_queueing - initiate dispatch of requests to device | 2252 | * blk_start_queueing - initiate dispatch of requests to device |
2250 | * @q: request queue to kick into gear | 2253 | * @q: request queue to kick into gear |
2251 | * | 2254 | * |
2252 | * This is basically a helper to remove the need to know whether a queue | 2255 | * This is basically a helper to remove the need to know whether a queue |
2253 | * is plugged or not if someone just wants to initiate dispatch of requests | 2256 | * is plugged or not if someone just wants to initiate dispatch of requests |
2254 | * for this queue. | 2257 | * for this queue. |
2255 | * | 2258 | * |
2256 | * The queue lock must be held with interrupts disabled. | 2259 | * The queue lock must be held with interrupts disabled. |
2257 | */ | 2260 | */ |
2258 | void blk_start_queueing(struct request_queue *q) | 2261 | void blk_start_queueing(struct request_queue *q) |
2259 | { | 2262 | { |
2260 | if (!blk_queue_plugged(q)) | 2263 | if (!blk_queue_plugged(q)) |
2261 | q->request_fn(q); | 2264 | q->request_fn(q); |
2262 | else | 2265 | else |
2263 | __generic_unplug_device(q); | 2266 | __generic_unplug_device(q); |
2264 | } | 2267 | } |
2265 | EXPORT_SYMBOL(blk_start_queueing); | 2268 | EXPORT_SYMBOL(blk_start_queueing); |
2266 | 2269 | ||
2267 | /** | 2270 | /** |
2268 | * blk_requeue_request - put a request back on queue | 2271 | * blk_requeue_request - put a request back on queue |
2269 | * @q: request queue where request should be inserted | 2272 | * @q: request queue where request should be inserted |
2270 | * @rq: request to be inserted | 2273 | * @rq: request to be inserted |
2271 | * | 2274 | * |
2272 | * Description: | 2275 | * Description: |
2273 | * Drivers often keep queueing requests until the hardware cannot accept | 2276 | * Drivers often keep queueing requests until the hardware cannot accept |
2274 | * more, when that condition happens we need to put the request back | 2277 | * more, when that condition happens we need to put the request back |
2275 | * on the queue. Must be called with queue lock held. | 2278 | * on the queue. Must be called with queue lock held. |
2276 | */ | 2279 | */ |
2277 | void blk_requeue_request(struct request_queue *q, struct request *rq) | 2280 | void blk_requeue_request(struct request_queue *q, struct request *rq) |
2278 | { | 2281 | { |
2279 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); | 2282 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); |
2280 | 2283 | ||
2281 | if (blk_rq_tagged(rq)) | 2284 | if (blk_rq_tagged(rq)) |
2282 | blk_queue_end_tag(q, rq); | 2285 | blk_queue_end_tag(q, rq); |
2283 | 2286 | ||
2284 | elv_requeue_request(q, rq); | 2287 | elv_requeue_request(q, rq); |
2285 | } | 2288 | } |
2286 | 2289 | ||
2287 | EXPORT_SYMBOL(blk_requeue_request); | 2290 | EXPORT_SYMBOL(blk_requeue_request); |
2288 | 2291 | ||
2289 | /** | 2292 | /** |
2290 | * blk_insert_request - insert a special request in to a request queue | 2293 | * blk_insert_request - insert a special request in to a request queue |
2291 | * @q: request queue where request should be inserted | 2294 | * @q: request queue where request should be inserted |
2292 | * @rq: request to be inserted | 2295 | * @rq: request to be inserted |
2293 | * @at_head: insert request at head or tail of queue | 2296 | * @at_head: insert request at head or tail of queue |
2294 | * @data: private data | 2297 | * @data: private data |
2295 | * | 2298 | * |
2296 | * Description: | 2299 | * Description: |
2297 | * Many block devices need to execute commands asynchronously, so they don't | 2300 | * Many block devices need to execute commands asynchronously, so they don't |
2298 | * block the whole kernel from preemption during request execution. This is | 2301 | * block the whole kernel from preemption during request execution. This is |
2299 | * accomplished normally by inserting aritficial requests tagged as | 2302 | * accomplished normally by inserting aritficial requests tagged as |
2300 | * REQ_SPECIAL in to the corresponding request queue, and letting them be | 2303 | * REQ_SPECIAL in to the corresponding request queue, and letting them be |
2301 | * scheduled for actual execution by the request queue. | 2304 | * scheduled for actual execution by the request queue. |
2302 | * | 2305 | * |
2303 | * We have the option of inserting the head or the tail of the queue. | 2306 | * We have the option of inserting the head or the tail of the queue. |
2304 | * Typically we use the tail for new ioctls and so forth. We use the head | 2307 | * Typically we use the tail for new ioctls and so forth. We use the head |
2305 | * of the queue for things like a QUEUE_FULL message from a device, or a | 2308 | * of the queue for things like a QUEUE_FULL message from a device, or a |
2306 | * host that is unable to accept a particular command. | 2309 | * host that is unable to accept a particular command. |
2307 | */ | 2310 | */ |
2308 | void blk_insert_request(struct request_queue *q, struct request *rq, | 2311 | void blk_insert_request(struct request_queue *q, struct request *rq, |
2309 | int at_head, void *data) | 2312 | int at_head, void *data) |
2310 | { | 2313 | { |
2311 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 2314 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
2312 | unsigned long flags; | 2315 | unsigned long flags; |
2313 | 2316 | ||
2314 | /* | 2317 | /* |
2315 | * tell I/O scheduler that this isn't a regular read/write (ie it | 2318 | * tell I/O scheduler that this isn't a regular read/write (ie it |
2316 | * must not attempt merges on this) and that it acts as a soft | 2319 | * must not attempt merges on this) and that it acts as a soft |
2317 | * barrier | 2320 | * barrier |
2318 | */ | 2321 | */ |
2319 | rq->cmd_type = REQ_TYPE_SPECIAL; | 2322 | rq->cmd_type = REQ_TYPE_SPECIAL; |
2320 | rq->cmd_flags |= REQ_SOFTBARRIER; | 2323 | rq->cmd_flags |= REQ_SOFTBARRIER; |
2321 | 2324 | ||
2322 | rq->special = data; | 2325 | rq->special = data; |
2323 | 2326 | ||
2324 | spin_lock_irqsave(q->queue_lock, flags); | 2327 | spin_lock_irqsave(q->queue_lock, flags); |
2325 | 2328 | ||
2326 | /* | 2329 | /* |
2327 | * If command is tagged, release the tag | 2330 | * If command is tagged, release the tag |
2328 | */ | 2331 | */ |
2329 | if (blk_rq_tagged(rq)) | 2332 | if (blk_rq_tagged(rq)) |
2330 | blk_queue_end_tag(q, rq); | 2333 | blk_queue_end_tag(q, rq); |
2331 | 2334 | ||
2332 | drive_stat_acct(rq, rq->nr_sectors, 1); | 2335 | drive_stat_acct(rq, rq->nr_sectors, 1); |
2333 | __elv_add_request(q, rq, where, 0); | 2336 | __elv_add_request(q, rq, where, 0); |
2334 | blk_start_queueing(q); | 2337 | blk_start_queueing(q); |
2335 | spin_unlock_irqrestore(q->queue_lock, flags); | 2338 | spin_unlock_irqrestore(q->queue_lock, flags); |
2336 | } | 2339 | } |
2337 | 2340 | ||
2338 | EXPORT_SYMBOL(blk_insert_request); | 2341 | EXPORT_SYMBOL(blk_insert_request); |
2339 | 2342 | ||
2340 | static int __blk_rq_unmap_user(struct bio *bio) | 2343 | static int __blk_rq_unmap_user(struct bio *bio) |
2341 | { | 2344 | { |
2342 | int ret = 0; | 2345 | int ret = 0; |
2343 | 2346 | ||
2344 | if (bio) { | 2347 | if (bio) { |
2345 | if (bio_flagged(bio, BIO_USER_MAPPED)) | 2348 | if (bio_flagged(bio, BIO_USER_MAPPED)) |
2346 | bio_unmap_user(bio); | 2349 | bio_unmap_user(bio); |
2347 | else | 2350 | else |
2348 | ret = bio_uncopy_user(bio); | 2351 | ret = bio_uncopy_user(bio); |
2349 | } | 2352 | } |
2350 | 2353 | ||
2351 | return ret; | 2354 | return ret; |
2352 | } | 2355 | } |
2353 | 2356 | ||
2354 | int blk_rq_append_bio(struct request_queue *q, struct request *rq, | 2357 | int blk_rq_append_bio(struct request_queue *q, struct request *rq, |
2355 | struct bio *bio) | 2358 | struct bio *bio) |
2356 | { | 2359 | { |
2357 | if (!rq->bio) | 2360 | if (!rq->bio) |
2358 | blk_rq_bio_prep(q, rq, bio); | 2361 | blk_rq_bio_prep(q, rq, bio); |
2359 | else if (!ll_back_merge_fn(q, rq, bio)) | 2362 | else if (!ll_back_merge_fn(q, rq, bio)) |
2360 | return -EINVAL; | 2363 | return -EINVAL; |
2361 | else { | 2364 | else { |
2362 | rq->biotail->bi_next = bio; | 2365 | rq->biotail->bi_next = bio; |
2363 | rq->biotail = bio; | 2366 | rq->biotail = bio; |
2364 | 2367 | ||
2365 | rq->data_len += bio->bi_size; | 2368 | rq->data_len += bio->bi_size; |
2366 | } | 2369 | } |
2367 | return 0; | 2370 | return 0; |
2368 | } | 2371 | } |
2369 | EXPORT_SYMBOL(blk_rq_append_bio); | 2372 | EXPORT_SYMBOL(blk_rq_append_bio); |
2370 | 2373 | ||
2371 | static int __blk_rq_map_user(struct request_queue *q, struct request *rq, | 2374 | static int __blk_rq_map_user(struct request_queue *q, struct request *rq, |
2372 | void __user *ubuf, unsigned int len) | 2375 | void __user *ubuf, unsigned int len) |
2373 | { | 2376 | { |
2374 | unsigned long uaddr; | 2377 | unsigned long uaddr; |
2375 | struct bio *bio, *orig_bio; | 2378 | struct bio *bio, *orig_bio; |
2376 | int reading, ret; | 2379 | int reading, ret; |
2377 | 2380 | ||
2378 | reading = rq_data_dir(rq) == READ; | 2381 | reading = rq_data_dir(rq) == READ; |
2379 | 2382 | ||
2380 | /* | 2383 | /* |
2381 | * if alignment requirement is satisfied, map in user pages for | 2384 | * if alignment requirement is satisfied, map in user pages for |
2382 | * direct dma. else, set up kernel bounce buffers | 2385 | * direct dma. else, set up kernel bounce buffers |
2383 | */ | 2386 | */ |
2384 | uaddr = (unsigned long) ubuf; | 2387 | uaddr = (unsigned long) ubuf; |
2385 | if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) | 2388 | if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) |
2386 | bio = bio_map_user(q, NULL, uaddr, len, reading); | 2389 | bio = bio_map_user(q, NULL, uaddr, len, reading); |
2387 | else | 2390 | else |
2388 | bio = bio_copy_user(q, uaddr, len, reading); | 2391 | bio = bio_copy_user(q, uaddr, len, reading); |
2389 | 2392 | ||
2390 | if (IS_ERR(bio)) | 2393 | if (IS_ERR(bio)) |
2391 | return PTR_ERR(bio); | 2394 | return PTR_ERR(bio); |
2392 | 2395 | ||
2393 | orig_bio = bio; | 2396 | orig_bio = bio; |
2394 | blk_queue_bounce(q, &bio); | 2397 | blk_queue_bounce(q, &bio); |
2395 | 2398 | ||
2396 | /* | 2399 | /* |
2397 | * We link the bounce buffer in and could have to traverse it | 2400 | * We link the bounce buffer in and could have to traverse it |
2398 | * later so we have to get a ref to prevent it from being freed | 2401 | * later so we have to get a ref to prevent it from being freed |
2399 | */ | 2402 | */ |
2400 | bio_get(bio); | 2403 | bio_get(bio); |
2401 | 2404 | ||
2402 | ret = blk_rq_append_bio(q, rq, bio); | 2405 | ret = blk_rq_append_bio(q, rq, bio); |
2403 | if (!ret) | 2406 | if (!ret) |
2404 | return bio->bi_size; | 2407 | return bio->bi_size; |
2405 | 2408 | ||
2406 | /* if it was boucned we must call the end io function */ | 2409 | /* if it was boucned we must call the end io function */ |
2407 | bio_endio(bio, 0); | 2410 | bio_endio(bio, 0); |
2408 | __blk_rq_unmap_user(orig_bio); | 2411 | __blk_rq_unmap_user(orig_bio); |
2409 | bio_put(bio); | 2412 | bio_put(bio); |
2410 | return ret; | 2413 | return ret; |
2411 | } | 2414 | } |
2412 | 2415 | ||
2413 | /** | 2416 | /** |
2414 | * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage | 2417 | * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage |
2415 | * @q: request queue where request should be inserted | 2418 | * @q: request queue where request should be inserted |
2416 | * @rq: request structure to fill | 2419 | * @rq: request structure to fill |
2417 | * @ubuf: the user buffer | 2420 | * @ubuf: the user buffer |
2418 | * @len: length of user data | 2421 | * @len: length of user data |
2419 | * | 2422 | * |
2420 | * Description: | 2423 | * Description: |
2421 | * Data will be mapped directly for zero copy io, if possible. Otherwise | 2424 | * Data will be mapped directly for zero copy io, if possible. Otherwise |
2422 | * a kernel bounce buffer is used. | 2425 | * a kernel bounce buffer is used. |
2423 | * | 2426 | * |
2424 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | 2427 | * A matching blk_rq_unmap_user() must be issued at the end of io, while |
2425 | * still in process context. | 2428 | * still in process context. |
2426 | * | 2429 | * |
2427 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | 2430 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() |
2428 | * before being submitted to the device, as pages mapped may be out of | 2431 | * before being submitted to the device, as pages mapped may be out of |
2429 | * reach. It's the callers responsibility to make sure this happens. The | 2432 | * reach. It's the callers responsibility to make sure this happens. The |
2430 | * original bio must be passed back in to blk_rq_unmap_user() for proper | 2433 | * original bio must be passed back in to blk_rq_unmap_user() for proper |
2431 | * unmapping. | 2434 | * unmapping. |
2432 | */ | 2435 | */ |
2433 | int blk_rq_map_user(struct request_queue *q, struct request *rq, | 2436 | int blk_rq_map_user(struct request_queue *q, struct request *rq, |
2434 | void __user *ubuf, unsigned long len) | 2437 | void __user *ubuf, unsigned long len) |
2435 | { | 2438 | { |
2436 | unsigned long bytes_read = 0; | 2439 | unsigned long bytes_read = 0; |
2437 | struct bio *bio = NULL; | 2440 | struct bio *bio = NULL; |
2438 | int ret; | 2441 | int ret; |
2439 | 2442 | ||
2440 | if (len > (q->max_hw_sectors << 9)) | 2443 | if (len > (q->max_hw_sectors << 9)) |
2441 | return -EINVAL; | 2444 | return -EINVAL; |
2442 | if (!len || !ubuf) | 2445 | if (!len || !ubuf) |
2443 | return -EINVAL; | 2446 | return -EINVAL; |
2444 | 2447 | ||
2445 | while (bytes_read != len) { | 2448 | while (bytes_read != len) { |
2446 | unsigned long map_len, end, start; | 2449 | unsigned long map_len, end, start; |
2447 | 2450 | ||
2448 | map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); | 2451 | map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); |
2449 | end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) | 2452 | end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) |
2450 | >> PAGE_SHIFT; | 2453 | >> PAGE_SHIFT; |
2451 | start = (unsigned long)ubuf >> PAGE_SHIFT; | 2454 | start = (unsigned long)ubuf >> PAGE_SHIFT; |
2452 | 2455 | ||
2453 | /* | 2456 | /* |
2454 | * A bad offset could cause us to require BIO_MAX_PAGES + 1 | 2457 | * A bad offset could cause us to require BIO_MAX_PAGES + 1 |
2455 | * pages. If this happens we just lower the requested | 2458 | * pages. If this happens we just lower the requested |
2456 | * mapping len by a page so that we can fit | 2459 | * mapping len by a page so that we can fit |
2457 | */ | 2460 | */ |
2458 | if (end - start > BIO_MAX_PAGES) | 2461 | if (end - start > BIO_MAX_PAGES) |
2459 | map_len -= PAGE_SIZE; | 2462 | map_len -= PAGE_SIZE; |
2460 | 2463 | ||
2461 | ret = __blk_rq_map_user(q, rq, ubuf, map_len); | 2464 | ret = __blk_rq_map_user(q, rq, ubuf, map_len); |
2462 | if (ret < 0) | 2465 | if (ret < 0) |
2463 | goto unmap_rq; | 2466 | goto unmap_rq; |
2464 | if (!bio) | 2467 | if (!bio) |
2465 | bio = rq->bio; | 2468 | bio = rq->bio; |
2466 | bytes_read += ret; | 2469 | bytes_read += ret; |
2467 | ubuf += ret; | 2470 | ubuf += ret; |
2468 | } | 2471 | } |
2469 | 2472 | ||
2470 | rq->buffer = rq->data = NULL; | 2473 | rq->buffer = rq->data = NULL; |
2471 | return 0; | 2474 | return 0; |
2472 | unmap_rq: | 2475 | unmap_rq: |
2473 | blk_rq_unmap_user(bio); | 2476 | blk_rq_unmap_user(bio); |
2474 | return ret; | 2477 | return ret; |
2475 | } | 2478 | } |
2476 | 2479 | ||
2477 | EXPORT_SYMBOL(blk_rq_map_user); | 2480 | EXPORT_SYMBOL(blk_rq_map_user); |
2478 | 2481 | ||
2479 | /** | 2482 | /** |
2480 | * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage | 2483 | * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage |
2481 | * @q: request queue where request should be inserted | 2484 | * @q: request queue where request should be inserted |
2482 | * @rq: request to map data to | 2485 | * @rq: request to map data to |
2483 | * @iov: pointer to the iovec | 2486 | * @iov: pointer to the iovec |
2484 | * @iov_count: number of elements in the iovec | 2487 | * @iov_count: number of elements in the iovec |
2485 | * @len: I/O byte count | 2488 | * @len: I/O byte count |
2486 | * | 2489 | * |
2487 | * Description: | 2490 | * Description: |
2488 | * Data will be mapped directly for zero copy io, if possible. Otherwise | 2491 | * Data will be mapped directly for zero copy io, if possible. Otherwise |
2489 | * a kernel bounce buffer is used. | 2492 | * a kernel bounce buffer is used. |
2490 | * | 2493 | * |
2491 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | 2494 | * A matching blk_rq_unmap_user() must be issued at the end of io, while |
2492 | * still in process context. | 2495 | * still in process context. |
2493 | * | 2496 | * |
2494 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | 2497 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() |
2495 | * before being submitted to the device, as pages mapped may be out of | 2498 | * before being submitted to the device, as pages mapped may be out of |
2496 | * reach. It's the callers responsibility to make sure this happens. The | 2499 | * reach. It's the callers responsibility to make sure this happens. The |
2497 | * original bio must be passed back in to blk_rq_unmap_user() for proper | 2500 | * original bio must be passed back in to blk_rq_unmap_user() for proper |
2498 | * unmapping. | 2501 | * unmapping. |
2499 | */ | 2502 | */ |
2500 | int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | 2503 | int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, |
2501 | struct sg_iovec *iov, int iov_count, unsigned int len) | 2504 | struct sg_iovec *iov, int iov_count, unsigned int len) |
2502 | { | 2505 | { |
2503 | struct bio *bio; | 2506 | struct bio *bio; |
2504 | 2507 | ||
2505 | if (!iov || iov_count <= 0) | 2508 | if (!iov || iov_count <= 0) |
2506 | return -EINVAL; | 2509 | return -EINVAL; |
2507 | 2510 | ||
2508 | /* we don't allow misaligned data like bio_map_user() does. If the | 2511 | /* we don't allow misaligned data like bio_map_user() does. If the |
2509 | * user is using sg, they're expected to know the alignment constraints | 2512 | * user is using sg, they're expected to know the alignment constraints |
2510 | * and respect them accordingly */ | 2513 | * and respect them accordingly */ |
2511 | bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); | 2514 | bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); |
2512 | if (IS_ERR(bio)) | 2515 | if (IS_ERR(bio)) |
2513 | return PTR_ERR(bio); | 2516 | return PTR_ERR(bio); |
2514 | 2517 | ||
2515 | if (bio->bi_size != len) { | 2518 | if (bio->bi_size != len) { |
2516 | bio_endio(bio, 0); | 2519 | bio_endio(bio, 0); |
2517 | bio_unmap_user(bio); | 2520 | bio_unmap_user(bio); |
2518 | return -EINVAL; | 2521 | return -EINVAL; |
2519 | } | 2522 | } |
2520 | 2523 | ||
2521 | bio_get(bio); | 2524 | bio_get(bio); |
2522 | blk_rq_bio_prep(q, rq, bio); | 2525 | blk_rq_bio_prep(q, rq, bio); |
2523 | rq->buffer = rq->data = NULL; | 2526 | rq->buffer = rq->data = NULL; |
2524 | return 0; | 2527 | return 0; |
2525 | } | 2528 | } |
2526 | 2529 | ||
2527 | EXPORT_SYMBOL(blk_rq_map_user_iov); | 2530 | EXPORT_SYMBOL(blk_rq_map_user_iov); |
2528 | 2531 | ||
2529 | /** | 2532 | /** |
2530 | * blk_rq_unmap_user - unmap a request with user data | 2533 | * blk_rq_unmap_user - unmap a request with user data |
2531 | * @bio: start of bio list | 2534 | * @bio: start of bio list |
2532 | * | 2535 | * |
2533 | * Description: | 2536 | * Description: |
2534 | * Unmap a rq previously mapped by blk_rq_map_user(). The caller must | 2537 | * Unmap a rq previously mapped by blk_rq_map_user(). The caller must |
2535 | * supply the original rq->bio from the blk_rq_map_user() return, since | 2538 | * supply the original rq->bio from the blk_rq_map_user() return, since |
2536 | * the io completion may have changed rq->bio. | 2539 | * the io completion may have changed rq->bio. |
2537 | */ | 2540 | */ |
2538 | int blk_rq_unmap_user(struct bio *bio) | 2541 | int blk_rq_unmap_user(struct bio *bio) |
2539 | { | 2542 | { |
2540 | struct bio *mapped_bio; | 2543 | struct bio *mapped_bio; |
2541 | int ret = 0, ret2; | 2544 | int ret = 0, ret2; |
2542 | 2545 | ||
2543 | while (bio) { | 2546 | while (bio) { |
2544 | mapped_bio = bio; | 2547 | mapped_bio = bio; |
2545 | if (unlikely(bio_flagged(bio, BIO_BOUNCED))) | 2548 | if (unlikely(bio_flagged(bio, BIO_BOUNCED))) |
2546 | mapped_bio = bio->bi_private; | 2549 | mapped_bio = bio->bi_private; |
2547 | 2550 | ||
2548 | ret2 = __blk_rq_unmap_user(mapped_bio); | 2551 | ret2 = __blk_rq_unmap_user(mapped_bio); |
2549 | if (ret2 && !ret) | 2552 | if (ret2 && !ret) |
2550 | ret = ret2; | 2553 | ret = ret2; |
2551 | 2554 | ||
2552 | mapped_bio = bio; | 2555 | mapped_bio = bio; |
2553 | bio = bio->bi_next; | 2556 | bio = bio->bi_next; |
2554 | bio_put(mapped_bio); | 2557 | bio_put(mapped_bio); |
2555 | } | 2558 | } |
2556 | 2559 | ||
2557 | return ret; | 2560 | return ret; |
2558 | } | 2561 | } |
2559 | 2562 | ||
2560 | EXPORT_SYMBOL(blk_rq_unmap_user); | 2563 | EXPORT_SYMBOL(blk_rq_unmap_user); |
2561 | 2564 | ||
2562 | /** | 2565 | /** |
2563 | * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage | 2566 | * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage |
2564 | * @q: request queue where request should be inserted | 2567 | * @q: request queue where request should be inserted |
2565 | * @rq: request to fill | 2568 | * @rq: request to fill |
2566 | * @kbuf: the kernel buffer | 2569 | * @kbuf: the kernel buffer |
2567 | * @len: length of user data | 2570 | * @len: length of user data |
2568 | * @gfp_mask: memory allocation flags | 2571 | * @gfp_mask: memory allocation flags |
2569 | */ | 2572 | */ |
2570 | int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, | 2573 | int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, |
2571 | unsigned int len, gfp_t gfp_mask) | 2574 | unsigned int len, gfp_t gfp_mask) |
2572 | { | 2575 | { |
2573 | struct bio *bio; | 2576 | struct bio *bio; |
2574 | 2577 | ||
2575 | if (len > (q->max_hw_sectors << 9)) | 2578 | if (len > (q->max_hw_sectors << 9)) |
2576 | return -EINVAL; | 2579 | return -EINVAL; |
2577 | if (!len || !kbuf) | 2580 | if (!len || !kbuf) |
2578 | return -EINVAL; | 2581 | return -EINVAL; |
2579 | 2582 | ||
2580 | bio = bio_map_kern(q, kbuf, len, gfp_mask); | 2583 | bio = bio_map_kern(q, kbuf, len, gfp_mask); |
2581 | if (IS_ERR(bio)) | 2584 | if (IS_ERR(bio)) |
2582 | return PTR_ERR(bio); | 2585 | return PTR_ERR(bio); |
2583 | 2586 | ||
2584 | if (rq_data_dir(rq) == WRITE) | 2587 | if (rq_data_dir(rq) == WRITE) |
2585 | bio->bi_rw |= (1 << BIO_RW); | 2588 | bio->bi_rw |= (1 << BIO_RW); |
2586 | 2589 | ||
2587 | blk_rq_bio_prep(q, rq, bio); | 2590 | blk_rq_bio_prep(q, rq, bio); |
2588 | blk_queue_bounce(q, &rq->bio); | 2591 | blk_queue_bounce(q, &rq->bio); |
2589 | rq->buffer = rq->data = NULL; | 2592 | rq->buffer = rq->data = NULL; |
2590 | return 0; | 2593 | return 0; |
2591 | } | 2594 | } |
2592 | 2595 | ||
2593 | EXPORT_SYMBOL(blk_rq_map_kern); | 2596 | EXPORT_SYMBOL(blk_rq_map_kern); |
2594 | 2597 | ||
2595 | /** | 2598 | /** |
2596 | * blk_execute_rq_nowait - insert a request into queue for execution | 2599 | * blk_execute_rq_nowait - insert a request into queue for execution |
2597 | * @q: queue to insert the request in | 2600 | * @q: queue to insert the request in |
2598 | * @bd_disk: matching gendisk | 2601 | * @bd_disk: matching gendisk |
2599 | * @rq: request to insert | 2602 | * @rq: request to insert |
2600 | * @at_head: insert request at head or tail of queue | 2603 | * @at_head: insert request at head or tail of queue |
2601 | * @done: I/O completion handler | 2604 | * @done: I/O completion handler |
2602 | * | 2605 | * |
2603 | * Description: | 2606 | * Description: |
2604 | * Insert a fully prepared request at the back of the io scheduler queue | 2607 | * Insert a fully prepared request at the back of the io scheduler queue |
2605 | * for execution. Don't wait for completion. | 2608 | * for execution. Don't wait for completion. |
2606 | */ | 2609 | */ |
2607 | void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | 2610 | void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, |
2608 | struct request *rq, int at_head, | 2611 | struct request *rq, int at_head, |
2609 | rq_end_io_fn *done) | 2612 | rq_end_io_fn *done) |
2610 | { | 2613 | { |
2611 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 2614 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
2612 | 2615 | ||
2613 | rq->rq_disk = bd_disk; | 2616 | rq->rq_disk = bd_disk; |
2614 | rq->cmd_flags |= REQ_NOMERGE; | 2617 | rq->cmd_flags |= REQ_NOMERGE; |
2615 | rq->end_io = done; | 2618 | rq->end_io = done; |
2616 | WARN_ON(irqs_disabled()); | 2619 | WARN_ON(irqs_disabled()); |
2617 | spin_lock_irq(q->queue_lock); | 2620 | spin_lock_irq(q->queue_lock); |
2618 | __elv_add_request(q, rq, where, 1); | 2621 | __elv_add_request(q, rq, where, 1); |
2619 | __generic_unplug_device(q); | 2622 | __generic_unplug_device(q); |
2620 | spin_unlock_irq(q->queue_lock); | 2623 | spin_unlock_irq(q->queue_lock); |
2621 | } | 2624 | } |
2622 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); | 2625 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); |
2623 | 2626 | ||
2624 | /** | 2627 | /** |
2625 | * blk_execute_rq - insert a request into queue for execution | 2628 | * blk_execute_rq - insert a request into queue for execution |
2626 | * @q: queue to insert the request in | 2629 | * @q: queue to insert the request in |
2627 | * @bd_disk: matching gendisk | 2630 | * @bd_disk: matching gendisk |
2628 | * @rq: request to insert | 2631 | * @rq: request to insert |
2629 | * @at_head: insert request at head or tail of queue | 2632 | * @at_head: insert request at head or tail of queue |
2630 | * | 2633 | * |
2631 | * Description: | 2634 | * Description: |
2632 | * Insert a fully prepared request at the back of the io scheduler queue | 2635 | * Insert a fully prepared request at the back of the io scheduler queue |
2633 | * for execution and wait for completion. | 2636 | * for execution and wait for completion. |
2634 | */ | 2637 | */ |
2635 | int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, | 2638 | int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, |
2636 | struct request *rq, int at_head) | 2639 | struct request *rq, int at_head) |
2637 | { | 2640 | { |
2638 | DECLARE_COMPLETION_ONSTACK(wait); | 2641 | DECLARE_COMPLETION_ONSTACK(wait); |
2639 | char sense[SCSI_SENSE_BUFFERSIZE]; | 2642 | char sense[SCSI_SENSE_BUFFERSIZE]; |
2640 | int err = 0; | 2643 | int err = 0; |
2641 | 2644 | ||
2642 | /* | 2645 | /* |
2643 | * we need an extra reference to the request, so we can look at | 2646 | * we need an extra reference to the request, so we can look at |
2644 | * it after io completion | 2647 | * it after io completion |
2645 | */ | 2648 | */ |
2646 | rq->ref_count++; | 2649 | rq->ref_count++; |
2647 | 2650 | ||
2648 | if (!rq->sense) { | 2651 | if (!rq->sense) { |
2649 | memset(sense, 0, sizeof(sense)); | 2652 | memset(sense, 0, sizeof(sense)); |
2650 | rq->sense = sense; | 2653 | rq->sense = sense; |
2651 | rq->sense_len = 0; | 2654 | rq->sense_len = 0; |
2652 | } | 2655 | } |
2653 | 2656 | ||
2654 | rq->end_io_data = &wait; | 2657 | rq->end_io_data = &wait; |
2655 | blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); | 2658 | blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); |
2656 | wait_for_completion(&wait); | 2659 | wait_for_completion(&wait); |
2657 | 2660 | ||
2658 | if (rq->errors) | 2661 | if (rq->errors) |
2659 | err = -EIO; | 2662 | err = -EIO; |
2660 | 2663 | ||
2661 | return err; | 2664 | return err; |
2662 | } | 2665 | } |
2663 | 2666 | ||
2664 | EXPORT_SYMBOL(blk_execute_rq); | 2667 | EXPORT_SYMBOL(blk_execute_rq); |
2665 | 2668 | ||
2666 | /** | 2669 | /** |
2667 | * blkdev_issue_flush - queue a flush | 2670 | * blkdev_issue_flush - queue a flush |
2668 | * @bdev: blockdev to issue flush for | 2671 | * @bdev: blockdev to issue flush for |
2669 | * @error_sector: error sector | 2672 | * @error_sector: error sector |
2670 | * | 2673 | * |
2671 | * Description: | 2674 | * Description: |
2672 | * Issue a flush for the block device in question. Caller can supply | 2675 | * Issue a flush for the block device in question. Caller can supply |
2673 | * room for storing the error offset in case of a flush error, if they | 2676 | * room for storing the error offset in case of a flush error, if they |
2674 | * wish to. Caller must run wait_for_completion() on its own. | 2677 | * wish to. Caller must run wait_for_completion() on its own. |
2675 | */ | 2678 | */ |
2676 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | 2679 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) |
2677 | { | 2680 | { |
2678 | struct request_queue *q; | 2681 | struct request_queue *q; |
2679 | 2682 | ||
2680 | if (bdev->bd_disk == NULL) | 2683 | if (bdev->bd_disk == NULL) |
2681 | return -ENXIO; | 2684 | return -ENXIO; |
2682 | 2685 | ||
2683 | q = bdev_get_queue(bdev); | 2686 | q = bdev_get_queue(bdev); |
2684 | if (!q) | 2687 | if (!q) |
2685 | return -ENXIO; | 2688 | return -ENXIO; |
2686 | if (!q->issue_flush_fn) | 2689 | if (!q->issue_flush_fn) |
2687 | return -EOPNOTSUPP; | 2690 | return -EOPNOTSUPP; |
2688 | 2691 | ||
2689 | return q->issue_flush_fn(q, bdev->bd_disk, error_sector); | 2692 | return q->issue_flush_fn(q, bdev->bd_disk, error_sector); |
2690 | } | 2693 | } |
2691 | 2694 | ||
2692 | EXPORT_SYMBOL(blkdev_issue_flush); | 2695 | EXPORT_SYMBOL(blkdev_issue_flush); |
2693 | 2696 | ||
2694 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) | 2697 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) |
2695 | { | 2698 | { |
2696 | int rw = rq_data_dir(rq); | 2699 | int rw = rq_data_dir(rq); |
2697 | 2700 | ||
2698 | if (!blk_fs_request(rq) || !rq->rq_disk) | 2701 | if (!blk_fs_request(rq) || !rq->rq_disk) |
2699 | return; | 2702 | return; |
2700 | 2703 | ||
2701 | if (!new_io) { | 2704 | if (!new_io) { |
2702 | __disk_stat_inc(rq->rq_disk, merges[rw]); | 2705 | __disk_stat_inc(rq->rq_disk, merges[rw]); |
2703 | } else { | 2706 | } else { |
2704 | disk_round_stats(rq->rq_disk); | 2707 | disk_round_stats(rq->rq_disk); |
2705 | rq->rq_disk->in_flight++; | 2708 | rq->rq_disk->in_flight++; |
2706 | } | 2709 | } |
2707 | } | 2710 | } |
2708 | 2711 | ||
2709 | /* | 2712 | /* |
2710 | * add-request adds a request to the linked list. | 2713 | * add-request adds a request to the linked list. |
2711 | * queue lock is held and interrupts disabled, as we muck with the | 2714 | * queue lock is held and interrupts disabled, as we muck with the |
2712 | * request queue list. | 2715 | * request queue list. |
2713 | */ | 2716 | */ |
2714 | static inline void add_request(struct request_queue * q, struct request * req) | 2717 | static inline void add_request(struct request_queue * q, struct request * req) |
2715 | { | 2718 | { |
2716 | drive_stat_acct(req, req->nr_sectors, 1); | 2719 | drive_stat_acct(req, req->nr_sectors, 1); |
2717 | 2720 | ||
2718 | /* | 2721 | /* |
2719 | * elevator indicated where it wants this request to be | 2722 | * elevator indicated where it wants this request to be |
2720 | * inserted at elevator_merge time | 2723 | * inserted at elevator_merge time |
2721 | */ | 2724 | */ |
2722 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); | 2725 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); |
2723 | } | 2726 | } |
2724 | 2727 | ||
2725 | /* | 2728 | /* |
2726 | * disk_round_stats() - Round off the performance stats on a struct | 2729 | * disk_round_stats() - Round off the performance stats on a struct |
2727 | * disk_stats. | 2730 | * disk_stats. |
2728 | * | 2731 | * |
2729 | * The average IO queue length and utilisation statistics are maintained | 2732 | * The average IO queue length and utilisation statistics are maintained |
2730 | * by observing the current state of the queue length and the amount of | 2733 | * by observing the current state of the queue length and the amount of |
2731 | * time it has been in this state for. | 2734 | * time it has been in this state for. |
2732 | * | 2735 | * |
2733 | * Normally, that accounting is done on IO completion, but that can result | 2736 | * Normally, that accounting is done on IO completion, but that can result |
2734 | * in more than a second's worth of IO being accounted for within any one | 2737 | * in more than a second's worth of IO being accounted for within any one |
2735 | * second, leading to >100% utilisation. To deal with that, we call this | 2738 | * second, leading to >100% utilisation. To deal with that, we call this |
2736 | * function to do a round-off before returning the results when reading | 2739 | * function to do a round-off before returning the results when reading |
2737 | * /proc/diskstats. This accounts immediately for all queue usage up to | 2740 | * /proc/diskstats. This accounts immediately for all queue usage up to |
2738 | * the current jiffies and restarts the counters again. | 2741 | * the current jiffies and restarts the counters again. |
2739 | */ | 2742 | */ |
2740 | void disk_round_stats(struct gendisk *disk) | 2743 | void disk_round_stats(struct gendisk *disk) |
2741 | { | 2744 | { |
2742 | unsigned long now = jiffies; | 2745 | unsigned long now = jiffies; |
2743 | 2746 | ||
2744 | if (now == disk->stamp) | 2747 | if (now == disk->stamp) |
2745 | return; | 2748 | return; |
2746 | 2749 | ||
2747 | if (disk->in_flight) { | 2750 | if (disk->in_flight) { |
2748 | __disk_stat_add(disk, time_in_queue, | 2751 | __disk_stat_add(disk, time_in_queue, |
2749 | disk->in_flight * (now - disk->stamp)); | 2752 | disk->in_flight * (now - disk->stamp)); |
2750 | __disk_stat_add(disk, io_ticks, (now - disk->stamp)); | 2753 | __disk_stat_add(disk, io_ticks, (now - disk->stamp)); |
2751 | } | 2754 | } |
2752 | disk->stamp = now; | 2755 | disk->stamp = now; |
2753 | } | 2756 | } |
2754 | 2757 | ||
2755 | EXPORT_SYMBOL_GPL(disk_round_stats); | 2758 | EXPORT_SYMBOL_GPL(disk_round_stats); |
2756 | 2759 | ||
2757 | /* | 2760 | /* |
2758 | * queue lock must be held | 2761 | * queue lock must be held |
2759 | */ | 2762 | */ |
2760 | void __blk_put_request(struct request_queue *q, struct request *req) | 2763 | void __blk_put_request(struct request_queue *q, struct request *req) |
2761 | { | 2764 | { |
2762 | if (unlikely(!q)) | 2765 | if (unlikely(!q)) |
2763 | return; | 2766 | return; |
2764 | if (unlikely(--req->ref_count)) | 2767 | if (unlikely(--req->ref_count)) |
2765 | return; | 2768 | return; |
2766 | 2769 | ||
2767 | elv_completed_request(q, req); | 2770 | elv_completed_request(q, req); |
2768 | 2771 | ||
2769 | /* | 2772 | /* |
2770 | * Request may not have originated from ll_rw_blk. if not, | 2773 | * Request may not have originated from ll_rw_blk. if not, |
2771 | * it didn't come out of our reserved rq pools | 2774 | * it didn't come out of our reserved rq pools |
2772 | */ | 2775 | */ |
2773 | if (req->cmd_flags & REQ_ALLOCED) { | 2776 | if (req->cmd_flags & REQ_ALLOCED) { |
2774 | int rw = rq_data_dir(req); | 2777 | int rw = rq_data_dir(req); |
2775 | int priv = req->cmd_flags & REQ_ELVPRIV; | 2778 | int priv = req->cmd_flags & REQ_ELVPRIV; |
2776 | 2779 | ||
2777 | BUG_ON(!list_empty(&req->queuelist)); | 2780 | BUG_ON(!list_empty(&req->queuelist)); |
2778 | BUG_ON(!hlist_unhashed(&req->hash)); | 2781 | BUG_ON(!hlist_unhashed(&req->hash)); |
2779 | 2782 | ||
2780 | blk_free_request(q, req); | 2783 | blk_free_request(q, req); |
2781 | freed_request(q, rw, priv); | 2784 | freed_request(q, rw, priv); |
2782 | } | 2785 | } |
2783 | } | 2786 | } |
2784 | 2787 | ||
2785 | EXPORT_SYMBOL_GPL(__blk_put_request); | 2788 | EXPORT_SYMBOL_GPL(__blk_put_request); |
2786 | 2789 | ||
2787 | void blk_put_request(struct request *req) | 2790 | void blk_put_request(struct request *req) |
2788 | { | 2791 | { |
2789 | unsigned long flags; | 2792 | unsigned long flags; |
2790 | struct request_queue *q = req->q; | 2793 | struct request_queue *q = req->q; |
2791 | 2794 | ||
2792 | /* | 2795 | /* |
2793 | * Gee, IDE calls in w/ NULL q. Fix IDE and remove the | 2796 | * Gee, IDE calls in w/ NULL q. Fix IDE and remove the |
2794 | * following if (q) test. | 2797 | * following if (q) test. |
2795 | */ | 2798 | */ |
2796 | if (q) { | 2799 | if (q) { |
2797 | spin_lock_irqsave(q->queue_lock, flags); | 2800 | spin_lock_irqsave(q->queue_lock, flags); |
2798 | __blk_put_request(q, req); | 2801 | __blk_put_request(q, req); |
2799 | spin_unlock_irqrestore(q->queue_lock, flags); | 2802 | spin_unlock_irqrestore(q->queue_lock, flags); |
2800 | } | 2803 | } |
2801 | } | 2804 | } |
2802 | 2805 | ||
2803 | EXPORT_SYMBOL(blk_put_request); | 2806 | EXPORT_SYMBOL(blk_put_request); |
2804 | 2807 | ||
2805 | /** | 2808 | /** |
2806 | * blk_end_sync_rq - executes a completion event on a request | 2809 | * blk_end_sync_rq - executes a completion event on a request |
2807 | * @rq: request to complete | 2810 | * @rq: request to complete |
2808 | * @error: end io status of the request | 2811 | * @error: end io status of the request |
2809 | */ | 2812 | */ |
2810 | void blk_end_sync_rq(struct request *rq, int error) | 2813 | void blk_end_sync_rq(struct request *rq, int error) |
2811 | { | 2814 | { |
2812 | struct completion *waiting = rq->end_io_data; | 2815 | struct completion *waiting = rq->end_io_data; |
2813 | 2816 | ||
2814 | rq->end_io_data = NULL; | 2817 | rq->end_io_data = NULL; |
2815 | __blk_put_request(rq->q, rq); | 2818 | __blk_put_request(rq->q, rq); |
2816 | 2819 | ||
2817 | /* | 2820 | /* |
2818 | * complete last, if this is a stack request the process (and thus | 2821 | * complete last, if this is a stack request the process (and thus |
2819 | * the rq pointer) could be invalid right after this complete() | 2822 | * the rq pointer) could be invalid right after this complete() |
2820 | */ | 2823 | */ |
2821 | complete(waiting); | 2824 | complete(waiting); |
2822 | } | 2825 | } |
2823 | EXPORT_SYMBOL(blk_end_sync_rq); | 2826 | EXPORT_SYMBOL(blk_end_sync_rq); |
2824 | 2827 | ||
2825 | /* | 2828 | /* |
2826 | * Has to be called with the request spinlock acquired | 2829 | * Has to be called with the request spinlock acquired |
2827 | */ | 2830 | */ |
2828 | static int attempt_merge(struct request_queue *q, struct request *req, | 2831 | static int attempt_merge(struct request_queue *q, struct request *req, |
2829 | struct request *next) | 2832 | struct request *next) |
2830 | { | 2833 | { |
2831 | if (!rq_mergeable(req) || !rq_mergeable(next)) | 2834 | if (!rq_mergeable(req) || !rq_mergeable(next)) |
2832 | return 0; | 2835 | return 0; |
2833 | 2836 | ||
2834 | /* | 2837 | /* |
2835 | * not contiguous | 2838 | * not contiguous |
2836 | */ | 2839 | */ |
2837 | if (req->sector + req->nr_sectors != next->sector) | 2840 | if (req->sector + req->nr_sectors != next->sector) |
2838 | return 0; | 2841 | return 0; |
2839 | 2842 | ||
2840 | if (rq_data_dir(req) != rq_data_dir(next) | 2843 | if (rq_data_dir(req) != rq_data_dir(next) |
2841 | || req->rq_disk != next->rq_disk | 2844 | || req->rq_disk != next->rq_disk |
2842 | || next->special) | 2845 | || next->special) |
2843 | return 0; | 2846 | return 0; |
2844 | 2847 | ||
2845 | /* | 2848 | /* |
2846 | * If we are allowed to merge, then append bio list | 2849 | * If we are allowed to merge, then append bio list |
2847 | * from next to rq and release next. merge_requests_fn | 2850 | * from next to rq and release next. merge_requests_fn |
2848 | * will have updated segment counts, update sector | 2851 | * will have updated segment counts, update sector |
2849 | * counts here. | 2852 | * counts here. |
2850 | */ | 2853 | */ |
2851 | if (!ll_merge_requests_fn(q, req, next)) | 2854 | if (!ll_merge_requests_fn(q, req, next)) |
2852 | return 0; | 2855 | return 0; |
2853 | 2856 | ||
2854 | /* | 2857 | /* |
2855 | * At this point we have either done a back merge | 2858 | * At this point we have either done a back merge |
2856 | * or front merge. We need the smaller start_time of | 2859 | * or front merge. We need the smaller start_time of |
2857 | * the merged requests to be the current request | 2860 | * the merged requests to be the current request |
2858 | * for accounting purposes. | 2861 | * for accounting purposes. |
2859 | */ | 2862 | */ |
2860 | if (time_after(req->start_time, next->start_time)) | 2863 | if (time_after(req->start_time, next->start_time)) |
2861 | req->start_time = next->start_time; | 2864 | req->start_time = next->start_time; |
2862 | 2865 | ||
2863 | req->biotail->bi_next = next->bio; | 2866 | req->biotail->bi_next = next->bio; |
2864 | req->biotail = next->biotail; | 2867 | req->biotail = next->biotail; |
2865 | 2868 | ||
2866 | req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; | 2869 | req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; |
2867 | 2870 | ||
2868 | elv_merge_requests(q, req, next); | 2871 | elv_merge_requests(q, req, next); |
2869 | 2872 | ||
2870 | if (req->rq_disk) { | 2873 | if (req->rq_disk) { |
2871 | disk_round_stats(req->rq_disk); | 2874 | disk_round_stats(req->rq_disk); |
2872 | req->rq_disk->in_flight--; | 2875 | req->rq_disk->in_flight--; |
2873 | } | 2876 | } |
2874 | 2877 | ||
2875 | req->ioprio = ioprio_best(req->ioprio, next->ioprio); | 2878 | req->ioprio = ioprio_best(req->ioprio, next->ioprio); |
2876 | 2879 | ||
2877 | __blk_put_request(q, next); | 2880 | __blk_put_request(q, next); |
2878 | return 1; | 2881 | return 1; |
2879 | } | 2882 | } |
2880 | 2883 | ||
2881 | static inline int attempt_back_merge(struct request_queue *q, | 2884 | static inline int attempt_back_merge(struct request_queue *q, |
2882 | struct request *rq) | 2885 | struct request *rq) |
2883 | { | 2886 | { |
2884 | struct request *next = elv_latter_request(q, rq); | 2887 | struct request *next = elv_latter_request(q, rq); |
2885 | 2888 | ||
2886 | if (next) | 2889 | if (next) |
2887 | return attempt_merge(q, rq, next); | 2890 | return attempt_merge(q, rq, next); |
2888 | 2891 | ||
2889 | return 0; | 2892 | return 0; |
2890 | } | 2893 | } |
2891 | 2894 | ||
2892 | static inline int attempt_front_merge(struct request_queue *q, | 2895 | static inline int attempt_front_merge(struct request_queue *q, |
2893 | struct request *rq) | 2896 | struct request *rq) |
2894 | { | 2897 | { |
2895 | struct request *prev = elv_former_request(q, rq); | 2898 | struct request *prev = elv_former_request(q, rq); |
2896 | 2899 | ||
2897 | if (prev) | 2900 | if (prev) |
2898 | return attempt_merge(q, prev, rq); | 2901 | return attempt_merge(q, prev, rq); |
2899 | 2902 | ||
2900 | return 0; | 2903 | return 0; |
2901 | } | 2904 | } |
2902 | 2905 | ||
2903 | static void init_request_from_bio(struct request *req, struct bio *bio) | 2906 | static void init_request_from_bio(struct request *req, struct bio *bio) |
2904 | { | 2907 | { |
2905 | req->cmd_type = REQ_TYPE_FS; | 2908 | req->cmd_type = REQ_TYPE_FS; |
2906 | 2909 | ||
2907 | /* | 2910 | /* |
2908 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) | 2911 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) |
2909 | */ | 2912 | */ |
2910 | if (bio_rw_ahead(bio) || bio_failfast(bio)) | 2913 | if (bio_rw_ahead(bio) || bio_failfast(bio)) |
2911 | req->cmd_flags |= REQ_FAILFAST; | 2914 | req->cmd_flags |= REQ_FAILFAST; |
2912 | 2915 | ||
2913 | /* | 2916 | /* |
2914 | * REQ_BARRIER implies no merging, but lets make it explicit | 2917 | * REQ_BARRIER implies no merging, but lets make it explicit |
2915 | */ | 2918 | */ |
2916 | if (unlikely(bio_barrier(bio))) | 2919 | if (unlikely(bio_barrier(bio))) |
2917 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | 2920 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); |
2918 | 2921 | ||
2919 | if (bio_sync(bio)) | 2922 | if (bio_sync(bio)) |
2920 | req->cmd_flags |= REQ_RW_SYNC; | 2923 | req->cmd_flags |= REQ_RW_SYNC; |
2921 | if (bio_rw_meta(bio)) | 2924 | if (bio_rw_meta(bio)) |
2922 | req->cmd_flags |= REQ_RW_META; | 2925 | req->cmd_flags |= REQ_RW_META; |
2923 | 2926 | ||
2924 | req->errors = 0; | 2927 | req->errors = 0; |
2925 | req->hard_sector = req->sector = bio->bi_sector; | 2928 | req->hard_sector = req->sector = bio->bi_sector; |
2926 | req->ioprio = bio_prio(bio); | 2929 | req->ioprio = bio_prio(bio); |
2927 | req->start_time = jiffies; | 2930 | req->start_time = jiffies; |
2928 | blk_rq_bio_prep(req->q, req, bio); | 2931 | blk_rq_bio_prep(req->q, req, bio); |
2929 | } | 2932 | } |
2930 | 2933 | ||
2931 | static int __make_request(struct request_queue *q, struct bio *bio) | 2934 | static int __make_request(struct request_queue *q, struct bio *bio) |
2932 | { | 2935 | { |
2933 | struct request *req; | 2936 | struct request *req; |
2934 | int el_ret, nr_sectors, barrier, err; | 2937 | int el_ret, nr_sectors, barrier, err; |
2935 | const unsigned short prio = bio_prio(bio); | 2938 | const unsigned short prio = bio_prio(bio); |
2936 | const int sync = bio_sync(bio); | 2939 | const int sync = bio_sync(bio); |
2937 | int rw_flags; | 2940 | int rw_flags; |
2938 | 2941 | ||
2939 | nr_sectors = bio_sectors(bio); | 2942 | nr_sectors = bio_sectors(bio); |
2940 | 2943 | ||
2941 | /* | 2944 | /* |
2942 | * low level driver can indicate that it wants pages above a | 2945 | * low level driver can indicate that it wants pages above a |
2943 | * certain limit bounced to low memory (ie for highmem, or even | 2946 | * certain limit bounced to low memory (ie for highmem, or even |
2944 | * ISA dma in theory) | 2947 | * ISA dma in theory) |
2945 | */ | 2948 | */ |
2946 | blk_queue_bounce(q, &bio); | 2949 | blk_queue_bounce(q, &bio); |
2947 | 2950 | ||
2948 | barrier = bio_barrier(bio); | 2951 | barrier = bio_barrier(bio); |
2949 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { | 2952 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { |
2950 | err = -EOPNOTSUPP; | 2953 | err = -EOPNOTSUPP; |
2951 | goto end_io; | 2954 | goto end_io; |
2952 | } | 2955 | } |
2953 | 2956 | ||
2954 | spin_lock_irq(q->queue_lock); | 2957 | spin_lock_irq(q->queue_lock); |
2955 | 2958 | ||
2956 | if (unlikely(barrier) || elv_queue_empty(q)) | 2959 | if (unlikely(barrier) || elv_queue_empty(q)) |
2957 | goto get_rq; | 2960 | goto get_rq; |
2958 | 2961 | ||
2959 | el_ret = elv_merge(q, &req, bio); | 2962 | el_ret = elv_merge(q, &req, bio); |
2960 | switch (el_ret) { | 2963 | switch (el_ret) { |
2961 | case ELEVATOR_BACK_MERGE: | 2964 | case ELEVATOR_BACK_MERGE: |
2962 | BUG_ON(!rq_mergeable(req)); | 2965 | BUG_ON(!rq_mergeable(req)); |
2963 | 2966 | ||
2964 | if (!ll_back_merge_fn(q, req, bio)) | 2967 | if (!ll_back_merge_fn(q, req, bio)) |
2965 | break; | 2968 | break; |
2966 | 2969 | ||
2967 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); | 2970 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); |
2968 | 2971 | ||
2969 | req->biotail->bi_next = bio; | 2972 | req->biotail->bi_next = bio; |
2970 | req->biotail = bio; | 2973 | req->biotail = bio; |
2971 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; | 2974 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; |
2972 | req->ioprio = ioprio_best(req->ioprio, prio); | 2975 | req->ioprio = ioprio_best(req->ioprio, prio); |
2973 | drive_stat_acct(req, nr_sectors, 0); | 2976 | drive_stat_acct(req, nr_sectors, 0); |
2974 | if (!attempt_back_merge(q, req)) | 2977 | if (!attempt_back_merge(q, req)) |
2975 | elv_merged_request(q, req, el_ret); | 2978 | elv_merged_request(q, req, el_ret); |
2976 | goto out; | 2979 | goto out; |
2977 | 2980 | ||
2978 | case ELEVATOR_FRONT_MERGE: | 2981 | case ELEVATOR_FRONT_MERGE: |
2979 | BUG_ON(!rq_mergeable(req)); | 2982 | BUG_ON(!rq_mergeable(req)); |
2980 | 2983 | ||
2981 | if (!ll_front_merge_fn(q, req, bio)) | 2984 | if (!ll_front_merge_fn(q, req, bio)) |
2982 | break; | 2985 | break; |
2983 | 2986 | ||
2984 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); | 2987 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); |
2985 | 2988 | ||
2986 | bio->bi_next = req->bio; | 2989 | bio->bi_next = req->bio; |
2987 | req->bio = bio; | 2990 | req->bio = bio; |
2988 | 2991 | ||
2989 | /* | 2992 | /* |
2990 | * may not be valid. if the low level driver said | 2993 | * may not be valid. if the low level driver said |
2991 | * it didn't need a bounce buffer then it better | 2994 | * it didn't need a bounce buffer then it better |
2992 | * not touch req->buffer either... | 2995 | * not touch req->buffer either... |
2993 | */ | 2996 | */ |
2994 | req->buffer = bio_data(bio); | 2997 | req->buffer = bio_data(bio); |
2995 | req->current_nr_sectors = bio_cur_sectors(bio); | 2998 | req->current_nr_sectors = bio_cur_sectors(bio); |
2996 | req->hard_cur_sectors = req->current_nr_sectors; | 2999 | req->hard_cur_sectors = req->current_nr_sectors; |
2997 | req->sector = req->hard_sector = bio->bi_sector; | 3000 | req->sector = req->hard_sector = bio->bi_sector; |
2998 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; | 3001 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; |
2999 | req->ioprio = ioprio_best(req->ioprio, prio); | 3002 | req->ioprio = ioprio_best(req->ioprio, prio); |
3000 | drive_stat_acct(req, nr_sectors, 0); | 3003 | drive_stat_acct(req, nr_sectors, 0); |
3001 | if (!attempt_front_merge(q, req)) | 3004 | if (!attempt_front_merge(q, req)) |
3002 | elv_merged_request(q, req, el_ret); | 3005 | elv_merged_request(q, req, el_ret); |
3003 | goto out; | 3006 | goto out; |
3004 | 3007 | ||
3005 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ | 3008 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ |
3006 | default: | 3009 | default: |
3007 | ; | 3010 | ; |
3008 | } | 3011 | } |
3009 | 3012 | ||
3010 | get_rq: | 3013 | get_rq: |
3011 | /* | 3014 | /* |
3012 | * This sync check and mask will be re-done in init_request_from_bio(), | 3015 | * This sync check and mask will be re-done in init_request_from_bio(), |
3013 | * but we need to set it earlier to expose the sync flag to the | 3016 | * but we need to set it earlier to expose the sync flag to the |
3014 | * rq allocator and io schedulers. | 3017 | * rq allocator and io schedulers. |
3015 | */ | 3018 | */ |
3016 | rw_flags = bio_data_dir(bio); | 3019 | rw_flags = bio_data_dir(bio); |
3017 | if (sync) | 3020 | if (sync) |
3018 | rw_flags |= REQ_RW_SYNC; | 3021 | rw_flags |= REQ_RW_SYNC; |
3019 | 3022 | ||
3020 | /* | 3023 | /* |
3021 | * Grab a free request. This is might sleep but can not fail. | 3024 | * Grab a free request. This is might sleep but can not fail. |
3022 | * Returns with the queue unlocked. | 3025 | * Returns with the queue unlocked. |
3023 | */ | 3026 | */ |
3024 | req = get_request_wait(q, rw_flags, bio); | 3027 | req = get_request_wait(q, rw_flags, bio); |
3025 | 3028 | ||
3026 | /* | 3029 | /* |
3027 | * After dropping the lock and possibly sleeping here, our request | 3030 | * After dropping the lock and possibly sleeping here, our request |
3028 | * may now be mergeable after it had proven unmergeable (above). | 3031 | * may now be mergeable after it had proven unmergeable (above). |
3029 | * We don't worry about that case for efficiency. It won't happen | 3032 | * We don't worry about that case for efficiency. It won't happen |
3030 | * often, and the elevators are able to handle it. | 3033 | * often, and the elevators are able to handle it. |
3031 | */ | 3034 | */ |
3032 | init_request_from_bio(req, bio); | 3035 | init_request_from_bio(req, bio); |
3033 | 3036 | ||
3034 | spin_lock_irq(q->queue_lock); | 3037 | spin_lock_irq(q->queue_lock); |
3035 | if (elv_queue_empty(q)) | 3038 | if (elv_queue_empty(q)) |
3036 | blk_plug_device(q); | 3039 | blk_plug_device(q); |
3037 | add_request(q, req); | 3040 | add_request(q, req); |
3038 | out: | 3041 | out: |
3039 | if (sync) | 3042 | if (sync) |
3040 | __generic_unplug_device(q); | 3043 | __generic_unplug_device(q); |
3041 | 3044 | ||
3042 | spin_unlock_irq(q->queue_lock); | 3045 | spin_unlock_irq(q->queue_lock); |
3043 | return 0; | 3046 | return 0; |
3044 | 3047 | ||
3045 | end_io: | 3048 | end_io: |
3046 | bio_endio(bio, err); | 3049 | bio_endio(bio, err); |
3047 | return 0; | 3050 | return 0; |
3048 | } | 3051 | } |
3049 | 3052 | ||
3050 | /* | 3053 | /* |
3051 | * If bio->bi_dev is a partition, remap the location | 3054 | * If bio->bi_dev is a partition, remap the location |
3052 | */ | 3055 | */ |
3053 | static inline void blk_partition_remap(struct bio *bio) | 3056 | static inline void blk_partition_remap(struct bio *bio) |
3054 | { | 3057 | { |
3055 | struct block_device *bdev = bio->bi_bdev; | 3058 | struct block_device *bdev = bio->bi_bdev; |
3056 | 3059 | ||
3057 | if (bdev != bdev->bd_contains) { | 3060 | if (bio_sectors(bio) && bdev != bdev->bd_contains) { |
3058 | struct hd_struct *p = bdev->bd_part; | 3061 | struct hd_struct *p = bdev->bd_part; |
3059 | const int rw = bio_data_dir(bio); | 3062 | const int rw = bio_data_dir(bio); |
3060 | 3063 | ||
3061 | p->sectors[rw] += bio_sectors(bio); | 3064 | p->sectors[rw] += bio_sectors(bio); |
3062 | p->ios[rw]++; | 3065 | p->ios[rw]++; |
3063 | 3066 | ||
3064 | bio->bi_sector += p->start_sect; | 3067 | bio->bi_sector += p->start_sect; |
3065 | bio->bi_bdev = bdev->bd_contains; | 3068 | bio->bi_bdev = bdev->bd_contains; |
3066 | 3069 | ||
3067 | blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, | 3070 | blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, |
3068 | bdev->bd_dev, bio->bi_sector, | 3071 | bdev->bd_dev, bio->bi_sector, |
3069 | bio->bi_sector - p->start_sect); | 3072 | bio->bi_sector - p->start_sect); |
3070 | } | 3073 | } |
3071 | } | 3074 | } |
3072 | 3075 | ||
3073 | static void handle_bad_sector(struct bio *bio) | 3076 | static void handle_bad_sector(struct bio *bio) |
3074 | { | 3077 | { |
3075 | char b[BDEVNAME_SIZE]; | 3078 | char b[BDEVNAME_SIZE]; |
3076 | 3079 | ||
3077 | printk(KERN_INFO "attempt to access beyond end of device\n"); | 3080 | printk(KERN_INFO "attempt to access beyond end of device\n"); |
3078 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", | 3081 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", |
3079 | bdevname(bio->bi_bdev, b), | 3082 | bdevname(bio->bi_bdev, b), |
3080 | bio->bi_rw, | 3083 | bio->bi_rw, |
3081 | (unsigned long long)bio->bi_sector + bio_sectors(bio), | 3084 | (unsigned long long)bio->bi_sector + bio_sectors(bio), |
3082 | (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); | 3085 | (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); |
3083 | 3086 | ||
3084 | set_bit(BIO_EOF, &bio->bi_flags); | 3087 | set_bit(BIO_EOF, &bio->bi_flags); |
3085 | } | 3088 | } |
3086 | 3089 | ||
3087 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 3090 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
3088 | 3091 | ||
3089 | static DECLARE_FAULT_ATTR(fail_make_request); | 3092 | static DECLARE_FAULT_ATTR(fail_make_request); |
3090 | 3093 | ||
3091 | static int __init setup_fail_make_request(char *str) | 3094 | static int __init setup_fail_make_request(char *str) |
3092 | { | 3095 | { |
3093 | return setup_fault_attr(&fail_make_request, str); | 3096 | return setup_fault_attr(&fail_make_request, str); |
3094 | } | 3097 | } |
3095 | __setup("fail_make_request=", setup_fail_make_request); | 3098 | __setup("fail_make_request=", setup_fail_make_request); |
3096 | 3099 | ||
3097 | static int should_fail_request(struct bio *bio) | 3100 | static int should_fail_request(struct bio *bio) |
3098 | { | 3101 | { |
3099 | if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || | 3102 | if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || |
3100 | (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) | 3103 | (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) |
3101 | return should_fail(&fail_make_request, bio->bi_size); | 3104 | return should_fail(&fail_make_request, bio->bi_size); |
3102 | 3105 | ||
3103 | return 0; | 3106 | return 0; |
3104 | } | 3107 | } |
3105 | 3108 | ||
3106 | static int __init fail_make_request_debugfs(void) | 3109 | static int __init fail_make_request_debugfs(void) |
3107 | { | 3110 | { |
3108 | return init_fault_attr_dentries(&fail_make_request, | 3111 | return init_fault_attr_dentries(&fail_make_request, |
3109 | "fail_make_request"); | 3112 | "fail_make_request"); |
3110 | } | 3113 | } |
3111 | 3114 | ||
3112 | late_initcall(fail_make_request_debugfs); | 3115 | late_initcall(fail_make_request_debugfs); |
3113 | 3116 | ||
3114 | #else /* CONFIG_FAIL_MAKE_REQUEST */ | 3117 | #else /* CONFIG_FAIL_MAKE_REQUEST */ |
3115 | 3118 | ||
3116 | static inline int should_fail_request(struct bio *bio) | 3119 | static inline int should_fail_request(struct bio *bio) |
3117 | { | 3120 | { |
3118 | return 0; | 3121 | return 0; |
3119 | } | 3122 | } |
3120 | 3123 | ||
3121 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ | 3124 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ |
3122 | 3125 | ||
3123 | /* | 3126 | /* |
3124 | * Check whether this bio extends beyond the end of the device. | 3127 | * Check whether this bio extends beyond the end of the device. |
3125 | */ | 3128 | */ |
3126 | static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) | 3129 | static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) |
3127 | { | 3130 | { |
3128 | sector_t maxsector; | 3131 | sector_t maxsector; |
3129 | 3132 | ||
3130 | if (!nr_sectors) | 3133 | if (!nr_sectors) |
3131 | return 0; | 3134 | return 0; |
3132 | 3135 | ||
3133 | /* Test device or partition size, when known. */ | 3136 | /* Test device or partition size, when known. */ |
3134 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; | 3137 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; |
3135 | if (maxsector) { | 3138 | if (maxsector) { |
3136 | sector_t sector = bio->bi_sector; | 3139 | sector_t sector = bio->bi_sector; |
3137 | 3140 | ||
3138 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { | 3141 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { |
3139 | /* | 3142 | /* |
3140 | * This may well happen - the kernel calls bread() | 3143 | * This may well happen - the kernel calls bread() |
3141 | * without checking the size of the device, e.g., when | 3144 | * without checking the size of the device, e.g., when |
3142 | * mounting a device. | 3145 | * mounting a device. |
3143 | */ | 3146 | */ |
3144 | handle_bad_sector(bio); | 3147 | handle_bad_sector(bio); |
3145 | return 1; | 3148 | return 1; |
3146 | } | 3149 | } |
3147 | } | 3150 | } |
3148 | 3151 | ||
3149 | return 0; | 3152 | return 0; |
3150 | } | 3153 | } |
3151 | 3154 | ||
3152 | /** | 3155 | /** |
3153 | * generic_make_request: hand a buffer to its device driver for I/O | 3156 | * generic_make_request: hand a buffer to its device driver for I/O |
3154 | * @bio: The bio describing the location in memory and on the device. | 3157 | * @bio: The bio describing the location in memory and on the device. |
3155 | * | 3158 | * |
3156 | * generic_make_request() is used to make I/O requests of block | 3159 | * generic_make_request() is used to make I/O requests of block |
3157 | * devices. It is passed a &struct bio, which describes the I/O that needs | 3160 | * devices. It is passed a &struct bio, which describes the I/O that needs |
3158 | * to be done. | 3161 | * to be done. |
3159 | * | 3162 | * |
3160 | * generic_make_request() does not return any status. The | 3163 | * generic_make_request() does not return any status. The |
3161 | * success/failure status of the request, along with notification of | 3164 | * success/failure status of the request, along with notification of |
3162 | * completion, is delivered asynchronously through the bio->bi_end_io | 3165 | * completion, is delivered asynchronously through the bio->bi_end_io |
3163 | * function described (one day) else where. | 3166 | * function described (one day) else where. |
3164 | * | 3167 | * |
3165 | * The caller of generic_make_request must make sure that bi_io_vec | 3168 | * The caller of generic_make_request must make sure that bi_io_vec |
3166 | * are set to describe the memory buffer, and that bi_dev and bi_sector are | 3169 | * are set to describe the memory buffer, and that bi_dev and bi_sector are |
3167 | * set to describe the device address, and the | 3170 | * set to describe the device address, and the |
3168 | * bi_end_io and optionally bi_private are set to describe how | 3171 | * bi_end_io and optionally bi_private are set to describe how |
3169 | * completion notification should be signaled. | 3172 | * completion notification should be signaled. |
3170 | * | 3173 | * |
3171 | * generic_make_request and the drivers it calls may use bi_next if this | 3174 | * generic_make_request and the drivers it calls may use bi_next if this |
3172 | * bio happens to be merged with someone else, and may change bi_dev and | 3175 | * bio happens to be merged with someone else, and may change bi_dev and |
3173 | * bi_sector for remaps as it sees fit. So the values of these fields | 3176 | * bi_sector for remaps as it sees fit. So the values of these fields |
3174 | * should NOT be depended on after the call to generic_make_request. | 3177 | * should NOT be depended on after the call to generic_make_request. |
3175 | */ | 3178 | */ |
3176 | static inline void __generic_make_request(struct bio *bio) | 3179 | static inline void __generic_make_request(struct bio *bio) |
3177 | { | 3180 | { |
3178 | struct request_queue *q; | 3181 | struct request_queue *q; |
3179 | sector_t old_sector; | 3182 | sector_t old_sector; |
3180 | int ret, nr_sectors = bio_sectors(bio); | 3183 | int ret, nr_sectors = bio_sectors(bio); |
3181 | dev_t old_dev; | 3184 | dev_t old_dev; |
3182 | 3185 | ||
3183 | might_sleep(); | 3186 | might_sleep(); |
3184 | 3187 | ||
3185 | if (bio_check_eod(bio, nr_sectors)) | 3188 | if (bio_check_eod(bio, nr_sectors)) |
3186 | goto end_io; | 3189 | goto end_io; |
3187 | 3190 | ||
3188 | /* | 3191 | /* |
3189 | * Resolve the mapping until finished. (drivers are | 3192 | * Resolve the mapping until finished. (drivers are |
3190 | * still free to implement/resolve their own stacking | 3193 | * still free to implement/resolve their own stacking |
3191 | * by explicitly returning 0) | 3194 | * by explicitly returning 0) |
3192 | * | 3195 | * |
3193 | * NOTE: we don't repeat the blk_size check for each new device. | 3196 | * NOTE: we don't repeat the blk_size check for each new device. |
3194 | * Stacking drivers are expected to know what they are doing. | 3197 | * Stacking drivers are expected to know what they are doing. |
3195 | */ | 3198 | */ |
3196 | old_sector = -1; | 3199 | old_sector = -1; |
3197 | old_dev = 0; | 3200 | old_dev = 0; |
3198 | do { | 3201 | do { |
3199 | char b[BDEVNAME_SIZE]; | 3202 | char b[BDEVNAME_SIZE]; |
3200 | 3203 | ||
3201 | q = bdev_get_queue(bio->bi_bdev); | 3204 | q = bdev_get_queue(bio->bi_bdev); |
3202 | if (!q) { | 3205 | if (!q) { |
3203 | printk(KERN_ERR | 3206 | printk(KERN_ERR |
3204 | "generic_make_request: Trying to access " | 3207 | "generic_make_request: Trying to access " |
3205 | "nonexistent block-device %s (%Lu)\n", | 3208 | "nonexistent block-device %s (%Lu)\n", |
3206 | bdevname(bio->bi_bdev, b), | 3209 | bdevname(bio->bi_bdev, b), |
3207 | (long long) bio->bi_sector); | 3210 | (long long) bio->bi_sector); |
3208 | end_io: | 3211 | end_io: |
3209 | bio_endio(bio, -EIO); | 3212 | bio_endio(bio, -EIO); |
3210 | break; | 3213 | break; |
3211 | } | 3214 | } |
3212 | 3215 | ||
3213 | if (unlikely(nr_sectors > q->max_hw_sectors)) { | 3216 | if (unlikely(nr_sectors > q->max_hw_sectors)) { |
3214 | printk("bio too big device %s (%u > %u)\n", | 3217 | printk("bio too big device %s (%u > %u)\n", |
3215 | bdevname(bio->bi_bdev, b), | 3218 | bdevname(bio->bi_bdev, b), |
3216 | bio_sectors(bio), | 3219 | bio_sectors(bio), |
3217 | q->max_hw_sectors); | 3220 | q->max_hw_sectors); |
3218 | goto end_io; | 3221 | goto end_io; |
3219 | } | 3222 | } |
3220 | 3223 | ||
3221 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 3224 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) |
3222 | goto end_io; | 3225 | goto end_io; |
3223 | 3226 | ||
3224 | if (should_fail_request(bio)) | 3227 | if (should_fail_request(bio)) |
3225 | goto end_io; | 3228 | goto end_io; |
3226 | 3229 | ||
3227 | /* | 3230 | /* |
3228 | * If this device has partitions, remap block n | 3231 | * If this device has partitions, remap block n |
3229 | * of partition p to block n+start(p) of the disk. | 3232 | * of partition p to block n+start(p) of the disk. |
3230 | */ | 3233 | */ |
3231 | blk_partition_remap(bio); | 3234 | blk_partition_remap(bio); |
3232 | 3235 | ||
3233 | if (old_sector != -1) | 3236 | if (old_sector != -1) |
3234 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, | 3237 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, |
3235 | old_sector); | 3238 | old_sector); |
3236 | 3239 | ||
3237 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); | 3240 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); |
3238 | 3241 | ||
3239 | old_sector = bio->bi_sector; | 3242 | old_sector = bio->bi_sector; |
3240 | old_dev = bio->bi_bdev->bd_dev; | 3243 | old_dev = bio->bi_bdev->bd_dev; |
3241 | 3244 | ||
3242 | if (bio_check_eod(bio, nr_sectors)) | 3245 | if (bio_check_eod(bio, nr_sectors)) |
3243 | goto end_io; | 3246 | goto end_io; |
3244 | 3247 | ||
3245 | ret = q->make_request_fn(q, bio); | 3248 | ret = q->make_request_fn(q, bio); |
3246 | } while (ret); | 3249 | } while (ret); |
3247 | } | 3250 | } |
3248 | 3251 | ||
3249 | /* | 3252 | /* |
3250 | * We only want one ->make_request_fn to be active at a time, | 3253 | * We only want one ->make_request_fn to be active at a time, |
3251 | * else stack usage with stacked devices could be a problem. | 3254 | * else stack usage with stacked devices could be a problem. |
3252 | * So use current->bio_{list,tail} to keep a list of requests | 3255 | * So use current->bio_{list,tail} to keep a list of requests |
3253 | * submited by a make_request_fn function. | 3256 | * submited by a make_request_fn function. |
3254 | * current->bio_tail is also used as a flag to say if | 3257 | * current->bio_tail is also used as a flag to say if |
3255 | * generic_make_request is currently active in this task or not. | 3258 | * generic_make_request is currently active in this task or not. |
3256 | * If it is NULL, then no make_request is active. If it is non-NULL, | 3259 | * If it is NULL, then no make_request is active. If it is non-NULL, |
3257 | * then a make_request is active, and new requests should be added | 3260 | * then a make_request is active, and new requests should be added |
3258 | * at the tail | 3261 | * at the tail |
3259 | */ | 3262 | */ |
3260 | void generic_make_request(struct bio *bio) | 3263 | void generic_make_request(struct bio *bio) |
3261 | { | 3264 | { |
3262 | if (current->bio_tail) { | 3265 | if (current->bio_tail) { |
3263 | /* make_request is active */ | 3266 | /* make_request is active */ |
3264 | *(current->bio_tail) = bio; | 3267 | *(current->bio_tail) = bio; |
3265 | bio->bi_next = NULL; | 3268 | bio->bi_next = NULL; |
3266 | current->bio_tail = &bio->bi_next; | 3269 | current->bio_tail = &bio->bi_next; |
3267 | return; | 3270 | return; |
3268 | } | 3271 | } |
3269 | /* following loop may be a bit non-obvious, and so deserves some | 3272 | /* following loop may be a bit non-obvious, and so deserves some |
3270 | * explanation. | 3273 | * explanation. |
3271 | * Before entering the loop, bio->bi_next is NULL (as all callers | 3274 | * Before entering the loop, bio->bi_next is NULL (as all callers |
3272 | * ensure that) so we have a list with a single bio. | 3275 | * ensure that) so we have a list with a single bio. |
3273 | * We pretend that we have just taken it off a longer list, so | 3276 | * We pretend that we have just taken it off a longer list, so |
3274 | * we assign bio_list to the next (which is NULL) and bio_tail | 3277 | * we assign bio_list to the next (which is NULL) and bio_tail |
3275 | * to &bio_list, thus initialising the bio_list of new bios to be | 3278 | * to &bio_list, thus initialising the bio_list of new bios to be |
3276 | * added. __generic_make_request may indeed add some more bios | 3279 | * added. __generic_make_request may indeed add some more bios |
3277 | * through a recursive call to generic_make_request. If it | 3280 | * through a recursive call to generic_make_request. If it |
3278 | * did, we find a non-NULL value in bio_list and re-enter the loop | 3281 | * did, we find a non-NULL value in bio_list and re-enter the loop |
3279 | * from the top. In this case we really did just take the bio | 3282 | * from the top. In this case we really did just take the bio |
3280 | * of the top of the list (no pretending) and so fixup bio_list and | 3283 | * of the top of the list (no pretending) and so fixup bio_list and |
3281 | * bio_tail or bi_next, and call into __generic_make_request again. | 3284 | * bio_tail or bi_next, and call into __generic_make_request again. |
3282 | * | 3285 | * |
3283 | * The loop was structured like this to make only one call to | 3286 | * The loop was structured like this to make only one call to |
3284 | * __generic_make_request (which is important as it is large and | 3287 | * __generic_make_request (which is important as it is large and |
3285 | * inlined) and to keep the structure simple. | 3288 | * inlined) and to keep the structure simple. |
3286 | */ | 3289 | */ |
3287 | BUG_ON(bio->bi_next); | 3290 | BUG_ON(bio->bi_next); |
3288 | do { | 3291 | do { |
3289 | current->bio_list = bio->bi_next; | 3292 | current->bio_list = bio->bi_next; |
3290 | if (bio->bi_next == NULL) | 3293 | if (bio->bi_next == NULL) |
3291 | current->bio_tail = ¤t->bio_list; | 3294 | current->bio_tail = ¤t->bio_list; |
3292 | else | 3295 | else |
3293 | bio->bi_next = NULL; | 3296 | bio->bi_next = NULL; |
3294 | __generic_make_request(bio); | 3297 | __generic_make_request(bio); |
3295 | bio = current->bio_list; | 3298 | bio = current->bio_list; |
3296 | } while (bio); | 3299 | } while (bio); |
3297 | current->bio_tail = NULL; /* deactivate */ | 3300 | current->bio_tail = NULL; /* deactivate */ |
3298 | } | 3301 | } |
3299 | 3302 | ||
3300 | EXPORT_SYMBOL(generic_make_request); | 3303 | EXPORT_SYMBOL(generic_make_request); |
3301 | 3304 | ||
3302 | /** | 3305 | /** |
3303 | * submit_bio: submit a bio to the block device layer for I/O | 3306 | * submit_bio: submit a bio to the block device layer for I/O |
3304 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | 3307 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) |
3305 | * @bio: The &struct bio which describes the I/O | 3308 | * @bio: The &struct bio which describes the I/O |
3306 | * | 3309 | * |
3307 | * submit_bio() is very similar in purpose to generic_make_request(), and | 3310 | * submit_bio() is very similar in purpose to generic_make_request(), and |
3308 | * uses that function to do most of the work. Both are fairly rough | 3311 | * uses that function to do most of the work. Both are fairly rough |
3309 | * interfaces, @bio must be presetup and ready for I/O. | 3312 | * interfaces, @bio must be presetup and ready for I/O. |
3310 | * | 3313 | * |
3311 | */ | 3314 | */ |
3312 | void submit_bio(int rw, struct bio *bio) | 3315 | void submit_bio(int rw, struct bio *bio) |
3313 | { | 3316 | { |
3314 | int count = bio_sectors(bio); | 3317 | int count = bio_sectors(bio); |
3315 | 3318 | ||
3316 | BIO_BUG_ON(!bio->bi_size); | ||
3317 | BIO_BUG_ON(!bio->bi_io_vec); | ||
3318 | bio->bi_rw |= rw; | 3319 | bio->bi_rw |= rw; |
3319 | if (rw & WRITE) { | ||
3320 | count_vm_events(PGPGOUT, count); | ||
3321 | } else { | ||
3322 | task_io_account_read(bio->bi_size); | ||
3323 | count_vm_events(PGPGIN, count); | ||
3324 | } | ||
3325 | 3320 | ||
3326 | if (unlikely(block_dump)) { | 3321 | /* |
3327 | char b[BDEVNAME_SIZE]; | 3322 | * If it's a regular read/write or a barrier with data attached, |
3328 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | 3323 | * go through the normal accounting stuff before submission. |
3329 | current->comm, current->pid, | 3324 | */ |
3330 | (rw & WRITE) ? "WRITE" : "READ", | 3325 | if (!bio_empty_barrier(bio)) { |
3331 | (unsigned long long)bio->bi_sector, | 3326 | |
3332 | bdevname(bio->bi_bdev,b)); | 3327 | BIO_BUG_ON(!bio->bi_size); |
3328 | BIO_BUG_ON(!bio->bi_io_vec); | ||
3329 | |||
3330 | if (rw & WRITE) { | ||
3331 | count_vm_events(PGPGOUT, count); | ||
3332 | } else { | ||
3333 | task_io_account_read(bio->bi_size); | ||
3334 | count_vm_events(PGPGIN, count); | ||
3335 | } | ||
3336 | |||
3337 | if (unlikely(block_dump)) { | ||
3338 | char b[BDEVNAME_SIZE]; | ||
3339 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | ||
3340 | current->comm, current->pid, | ||
3341 | (rw & WRITE) ? "WRITE" : "READ", | ||
3342 | (unsigned long long)bio->bi_sector, | ||
3343 | bdevname(bio->bi_bdev,b)); | ||
3344 | } | ||
3333 | } | 3345 | } |
3334 | 3346 | ||
3335 | generic_make_request(bio); | 3347 | generic_make_request(bio); |
3336 | } | 3348 | } |
3337 | 3349 | ||
3338 | EXPORT_SYMBOL(submit_bio); | 3350 | EXPORT_SYMBOL(submit_bio); |
3339 | 3351 | ||
3340 | static void blk_recalc_rq_sectors(struct request *rq, int nsect) | 3352 | static void blk_recalc_rq_sectors(struct request *rq, int nsect) |
3341 | { | 3353 | { |
3342 | if (blk_fs_request(rq)) { | 3354 | if (blk_fs_request(rq)) { |
3343 | rq->hard_sector += nsect; | 3355 | rq->hard_sector += nsect; |
3344 | rq->hard_nr_sectors -= nsect; | 3356 | rq->hard_nr_sectors -= nsect; |
3345 | 3357 | ||
3346 | /* | 3358 | /* |
3347 | * Move the I/O submission pointers ahead if required. | 3359 | * Move the I/O submission pointers ahead if required. |
3348 | */ | 3360 | */ |
3349 | if ((rq->nr_sectors >= rq->hard_nr_sectors) && | 3361 | if ((rq->nr_sectors >= rq->hard_nr_sectors) && |
3350 | (rq->sector <= rq->hard_sector)) { | 3362 | (rq->sector <= rq->hard_sector)) { |
3351 | rq->sector = rq->hard_sector; | 3363 | rq->sector = rq->hard_sector; |
3352 | rq->nr_sectors = rq->hard_nr_sectors; | 3364 | rq->nr_sectors = rq->hard_nr_sectors; |
3353 | rq->hard_cur_sectors = bio_cur_sectors(rq->bio); | 3365 | rq->hard_cur_sectors = bio_cur_sectors(rq->bio); |
3354 | rq->current_nr_sectors = rq->hard_cur_sectors; | 3366 | rq->current_nr_sectors = rq->hard_cur_sectors; |
3355 | rq->buffer = bio_data(rq->bio); | 3367 | rq->buffer = bio_data(rq->bio); |
3356 | } | 3368 | } |
3357 | 3369 | ||
3358 | /* | 3370 | /* |
3359 | * if total number of sectors is less than the first segment | 3371 | * if total number of sectors is less than the first segment |
3360 | * size, something has gone terribly wrong | 3372 | * size, something has gone terribly wrong |
3361 | */ | 3373 | */ |
3362 | if (rq->nr_sectors < rq->current_nr_sectors) { | 3374 | if (rq->nr_sectors < rq->current_nr_sectors) { |
3363 | printk("blk: request botched\n"); | 3375 | printk("blk: request botched\n"); |
3364 | rq->nr_sectors = rq->current_nr_sectors; | 3376 | rq->nr_sectors = rq->current_nr_sectors; |
3365 | } | 3377 | } |
3366 | } | 3378 | } |
3367 | } | 3379 | } |
3368 | 3380 | ||
3369 | static int __end_that_request_first(struct request *req, int uptodate, | 3381 | static int __end_that_request_first(struct request *req, int uptodate, |
3370 | int nr_bytes) | 3382 | int nr_bytes) |
3371 | { | 3383 | { |
3372 | int total_bytes, bio_nbytes, error, next_idx = 0; | 3384 | int total_bytes, bio_nbytes, error, next_idx = 0; |
3373 | struct bio *bio; | 3385 | struct bio *bio; |
3374 | 3386 | ||
3375 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); | 3387 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); |
3376 | 3388 | ||
3377 | /* | 3389 | /* |
3378 | * extend uptodate bool to allow < 0 value to be direct io error | 3390 | * extend uptodate bool to allow < 0 value to be direct io error |
3379 | */ | 3391 | */ |
3380 | error = 0; | 3392 | error = 0; |
3381 | if (end_io_error(uptodate)) | 3393 | if (end_io_error(uptodate)) |
3382 | error = !uptodate ? -EIO : uptodate; | 3394 | error = !uptodate ? -EIO : uptodate; |
3383 | 3395 | ||
3384 | /* | 3396 | /* |
3385 | * for a REQ_BLOCK_PC request, we want to carry any eventual | 3397 | * for a REQ_BLOCK_PC request, we want to carry any eventual |
3386 | * sense key with us all the way through | 3398 | * sense key with us all the way through |
3387 | */ | 3399 | */ |
3388 | if (!blk_pc_request(req)) | 3400 | if (!blk_pc_request(req)) |
3389 | req->errors = 0; | 3401 | req->errors = 0; |
3390 | 3402 | ||
3391 | if (!uptodate) { | 3403 | if (!uptodate) { |
3392 | if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) | 3404 | if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) |
3393 | printk("end_request: I/O error, dev %s, sector %llu\n", | 3405 | printk("end_request: I/O error, dev %s, sector %llu\n", |
3394 | req->rq_disk ? req->rq_disk->disk_name : "?", | 3406 | req->rq_disk ? req->rq_disk->disk_name : "?", |
3395 | (unsigned long long)req->sector); | 3407 | (unsigned long long)req->sector); |
3396 | } | 3408 | } |
3397 | 3409 | ||
3398 | if (blk_fs_request(req) && req->rq_disk) { | 3410 | if (blk_fs_request(req) && req->rq_disk) { |
3399 | const int rw = rq_data_dir(req); | 3411 | const int rw = rq_data_dir(req); |
3400 | 3412 | ||
3401 | disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); | 3413 | disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); |
3402 | } | 3414 | } |
3403 | 3415 | ||
3404 | total_bytes = bio_nbytes = 0; | 3416 | total_bytes = bio_nbytes = 0; |
3405 | while ((bio = req->bio) != NULL) { | 3417 | while ((bio = req->bio) != NULL) { |
3406 | int nbytes; | 3418 | int nbytes; |
3419 | |||
3420 | /* | ||
3421 | * For an empty barrier request, the low level driver must | ||
3422 | * store a potential error location in ->sector. We pass | ||
3423 | * that back up in ->bi_sector. | ||
3424 | */ | ||
3425 | if (blk_empty_barrier(req)) | ||
3426 | bio->bi_sector = req->sector; | ||
3407 | 3427 | ||
3408 | if (nr_bytes >= bio->bi_size) { | 3428 | if (nr_bytes >= bio->bi_size) { |
3409 | req->bio = bio->bi_next; | 3429 | req->bio = bio->bi_next; |
3410 | nbytes = bio->bi_size; | 3430 | nbytes = bio->bi_size; |
3411 | req_bio_endio(req, bio, nbytes, error); | 3431 | req_bio_endio(req, bio, nbytes, error); |
3412 | next_idx = 0; | 3432 | next_idx = 0; |
3413 | bio_nbytes = 0; | 3433 | bio_nbytes = 0; |
3414 | } else { | 3434 | } else { |
3415 | int idx = bio->bi_idx + next_idx; | 3435 | int idx = bio->bi_idx + next_idx; |
3416 | 3436 | ||
3417 | if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { | 3437 | if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { |
3418 | blk_dump_rq_flags(req, "__end_that"); | 3438 | blk_dump_rq_flags(req, "__end_that"); |
3419 | printk("%s: bio idx %d >= vcnt %d\n", | 3439 | printk("%s: bio idx %d >= vcnt %d\n", |
3420 | __FUNCTION__, | 3440 | __FUNCTION__, |
3421 | bio->bi_idx, bio->bi_vcnt); | 3441 | bio->bi_idx, bio->bi_vcnt); |
3422 | break; | 3442 | break; |
3423 | } | 3443 | } |
3424 | 3444 | ||
3425 | nbytes = bio_iovec_idx(bio, idx)->bv_len; | 3445 | nbytes = bio_iovec_idx(bio, idx)->bv_len; |
3426 | BIO_BUG_ON(nbytes > bio->bi_size); | 3446 | BIO_BUG_ON(nbytes > bio->bi_size); |
3427 | 3447 | ||
3428 | /* | 3448 | /* |
3429 | * not a complete bvec done | 3449 | * not a complete bvec done |
3430 | */ | 3450 | */ |
3431 | if (unlikely(nbytes > nr_bytes)) { | 3451 | if (unlikely(nbytes > nr_bytes)) { |
3432 | bio_nbytes += nr_bytes; | 3452 | bio_nbytes += nr_bytes; |
3433 | total_bytes += nr_bytes; | 3453 | total_bytes += nr_bytes; |
3434 | break; | 3454 | break; |
3435 | } | 3455 | } |
3436 | 3456 | ||
3437 | /* | 3457 | /* |
3438 | * advance to the next vector | 3458 | * advance to the next vector |
3439 | */ | 3459 | */ |
3440 | next_idx++; | 3460 | next_idx++; |
3441 | bio_nbytes += nbytes; | 3461 | bio_nbytes += nbytes; |
3442 | } | 3462 | } |
3443 | 3463 | ||
3444 | total_bytes += nbytes; | 3464 | total_bytes += nbytes; |
3445 | nr_bytes -= nbytes; | 3465 | nr_bytes -= nbytes; |
3446 | 3466 | ||
3447 | if ((bio = req->bio)) { | 3467 | if ((bio = req->bio)) { |
3448 | /* | 3468 | /* |
3449 | * end more in this run, or just return 'not-done' | 3469 | * end more in this run, or just return 'not-done' |
3450 | */ | 3470 | */ |
3451 | if (unlikely(nr_bytes <= 0)) | 3471 | if (unlikely(nr_bytes <= 0)) |
3452 | break; | 3472 | break; |
3453 | } | 3473 | } |
3454 | } | 3474 | } |
3455 | 3475 | ||
3456 | /* | 3476 | /* |
3457 | * completely done | 3477 | * completely done |
3458 | */ | 3478 | */ |
3459 | if (!req->bio) | 3479 | if (!req->bio) |
3460 | return 0; | 3480 | return 0; |
3461 | 3481 | ||
3462 | /* | 3482 | /* |
3463 | * if the request wasn't completed, update state | 3483 | * if the request wasn't completed, update state |
3464 | */ | 3484 | */ |
3465 | if (bio_nbytes) { | 3485 | if (bio_nbytes) { |
3466 | req_bio_endio(req, bio, bio_nbytes, error); | 3486 | req_bio_endio(req, bio, bio_nbytes, error); |
3467 | bio->bi_idx += next_idx; | 3487 | bio->bi_idx += next_idx; |
3468 | bio_iovec(bio)->bv_offset += nr_bytes; | 3488 | bio_iovec(bio)->bv_offset += nr_bytes; |
3469 | bio_iovec(bio)->bv_len -= nr_bytes; | 3489 | bio_iovec(bio)->bv_len -= nr_bytes; |
3470 | } | 3490 | } |
3471 | 3491 | ||
3472 | blk_recalc_rq_sectors(req, total_bytes >> 9); | 3492 | blk_recalc_rq_sectors(req, total_bytes >> 9); |
3473 | blk_recalc_rq_segments(req); | 3493 | blk_recalc_rq_segments(req); |
3474 | return 1; | 3494 | return 1; |
3475 | } | 3495 | } |
3476 | 3496 | ||
3477 | /** | 3497 | /** |
3478 | * end_that_request_first - end I/O on a request | 3498 | * end_that_request_first - end I/O on a request |
3479 | * @req: the request being processed | 3499 | * @req: the request being processed |
3480 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error | 3500 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error |
3481 | * @nr_sectors: number of sectors to end I/O on | 3501 | * @nr_sectors: number of sectors to end I/O on |
3482 | * | 3502 | * |
3483 | * Description: | 3503 | * Description: |
3484 | * Ends I/O on a number of sectors attached to @req, and sets it up | 3504 | * Ends I/O on a number of sectors attached to @req, and sets it up |
3485 | * for the next range of segments (if any) in the cluster. | 3505 | * for the next range of segments (if any) in the cluster. |
3486 | * | 3506 | * |
3487 | * Return: | 3507 | * Return: |
3488 | * 0 - we are done with this request, call end_that_request_last() | 3508 | * 0 - we are done with this request, call end_that_request_last() |
3489 | * 1 - still buffers pending for this request | 3509 | * 1 - still buffers pending for this request |
3490 | **/ | 3510 | **/ |
3491 | int end_that_request_first(struct request *req, int uptodate, int nr_sectors) | 3511 | int end_that_request_first(struct request *req, int uptodate, int nr_sectors) |
3492 | { | 3512 | { |
3493 | return __end_that_request_first(req, uptodate, nr_sectors << 9); | 3513 | return __end_that_request_first(req, uptodate, nr_sectors << 9); |
3494 | } | 3514 | } |
3495 | 3515 | ||
3496 | EXPORT_SYMBOL(end_that_request_first); | 3516 | EXPORT_SYMBOL(end_that_request_first); |
3497 | 3517 | ||
3498 | /** | 3518 | /** |
3499 | * end_that_request_chunk - end I/O on a request | 3519 | * end_that_request_chunk - end I/O on a request |
3500 | * @req: the request being processed | 3520 | * @req: the request being processed |
3501 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error | 3521 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error |
3502 | * @nr_bytes: number of bytes to complete | 3522 | * @nr_bytes: number of bytes to complete |
3503 | * | 3523 | * |
3504 | * Description: | 3524 | * Description: |
3505 | * Ends I/O on a number of bytes attached to @req, and sets it up | 3525 | * Ends I/O on a number of bytes attached to @req, and sets it up |
3506 | * for the next range of segments (if any). Like end_that_request_first(), | 3526 | * for the next range of segments (if any). Like end_that_request_first(), |
3507 | * but deals with bytes instead of sectors. | 3527 | * but deals with bytes instead of sectors. |
3508 | * | 3528 | * |
3509 | * Return: | 3529 | * Return: |
3510 | * 0 - we are done with this request, call end_that_request_last() | 3530 | * 0 - we are done with this request, call end_that_request_last() |
3511 | * 1 - still buffers pending for this request | 3531 | * 1 - still buffers pending for this request |
3512 | **/ | 3532 | **/ |
3513 | int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) | 3533 | int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) |
3514 | { | 3534 | { |
3515 | return __end_that_request_first(req, uptodate, nr_bytes); | 3535 | return __end_that_request_first(req, uptodate, nr_bytes); |
3516 | } | 3536 | } |
3517 | 3537 | ||
3518 | EXPORT_SYMBOL(end_that_request_chunk); | 3538 | EXPORT_SYMBOL(end_that_request_chunk); |
3519 | 3539 | ||
3520 | /* | 3540 | /* |
3521 | * splice the completion data to a local structure and hand off to | 3541 | * splice the completion data to a local structure and hand off to |
3522 | * process_completion_queue() to complete the requests | 3542 | * process_completion_queue() to complete the requests |
3523 | */ | 3543 | */ |
3524 | static void blk_done_softirq(struct softirq_action *h) | 3544 | static void blk_done_softirq(struct softirq_action *h) |
3525 | { | 3545 | { |
3526 | struct list_head *cpu_list, local_list; | 3546 | struct list_head *cpu_list, local_list; |
3527 | 3547 | ||
3528 | local_irq_disable(); | 3548 | local_irq_disable(); |
3529 | cpu_list = &__get_cpu_var(blk_cpu_done); | 3549 | cpu_list = &__get_cpu_var(blk_cpu_done); |
3530 | list_replace_init(cpu_list, &local_list); | 3550 | list_replace_init(cpu_list, &local_list); |
3531 | local_irq_enable(); | 3551 | local_irq_enable(); |
3532 | 3552 | ||
3533 | while (!list_empty(&local_list)) { | 3553 | while (!list_empty(&local_list)) { |
3534 | struct request *rq = list_entry(local_list.next, struct request, donelist); | 3554 | struct request *rq = list_entry(local_list.next, struct request, donelist); |
3535 | 3555 | ||
3536 | list_del_init(&rq->donelist); | 3556 | list_del_init(&rq->donelist); |
3537 | rq->q->softirq_done_fn(rq); | 3557 | rq->q->softirq_done_fn(rq); |
3538 | } | 3558 | } |
3539 | } | 3559 | } |
3540 | 3560 | ||
3541 | static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action, | 3561 | static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action, |
3542 | void *hcpu) | 3562 | void *hcpu) |
3543 | { | 3563 | { |
3544 | /* | 3564 | /* |
3545 | * If a CPU goes away, splice its entries to the current CPU | 3565 | * If a CPU goes away, splice its entries to the current CPU |
3546 | * and trigger a run of the softirq | 3566 | * and trigger a run of the softirq |
3547 | */ | 3567 | */ |
3548 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | 3568 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { |
3549 | int cpu = (unsigned long) hcpu; | 3569 | int cpu = (unsigned long) hcpu; |
3550 | 3570 | ||
3551 | local_irq_disable(); | 3571 | local_irq_disable(); |
3552 | list_splice_init(&per_cpu(blk_cpu_done, cpu), | 3572 | list_splice_init(&per_cpu(blk_cpu_done, cpu), |
3553 | &__get_cpu_var(blk_cpu_done)); | 3573 | &__get_cpu_var(blk_cpu_done)); |
3554 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | 3574 | raise_softirq_irqoff(BLOCK_SOFTIRQ); |
3555 | local_irq_enable(); | 3575 | local_irq_enable(); |
3556 | } | 3576 | } |
3557 | 3577 | ||
3558 | return NOTIFY_OK; | 3578 | return NOTIFY_OK; |
3559 | } | 3579 | } |
3560 | 3580 | ||
3561 | 3581 | ||
3562 | static struct notifier_block blk_cpu_notifier __cpuinitdata = { | 3582 | static struct notifier_block blk_cpu_notifier __cpuinitdata = { |
3563 | .notifier_call = blk_cpu_notify, | 3583 | .notifier_call = blk_cpu_notify, |
3564 | }; | 3584 | }; |
3565 | 3585 | ||
3566 | /** | 3586 | /** |
3567 | * blk_complete_request - end I/O on a request | 3587 | * blk_complete_request - end I/O on a request |
3568 | * @req: the request being processed | 3588 | * @req: the request being processed |
3569 | * | 3589 | * |
3570 | * Description: | 3590 | * Description: |
3571 | * Ends all I/O on a request. It does not handle partial completions, | 3591 | * Ends all I/O on a request. It does not handle partial completions, |
3572 | * unless the driver actually implements this in its completion callback | 3592 | * unless the driver actually implements this in its completion callback |
3573 | * through requeueing. The actual completion happens out-of-order, | 3593 | * through requeueing. The actual completion happens out-of-order, |
3574 | * through a softirq handler. The user must have registered a completion | 3594 | * through a softirq handler. The user must have registered a completion |
3575 | * callback through blk_queue_softirq_done(). | 3595 | * callback through blk_queue_softirq_done(). |
3576 | **/ | 3596 | **/ |
3577 | 3597 | ||
3578 | void blk_complete_request(struct request *req) | 3598 | void blk_complete_request(struct request *req) |
3579 | { | 3599 | { |
3580 | struct list_head *cpu_list; | 3600 | struct list_head *cpu_list; |
3581 | unsigned long flags; | 3601 | unsigned long flags; |
3582 | 3602 | ||
3583 | BUG_ON(!req->q->softirq_done_fn); | 3603 | BUG_ON(!req->q->softirq_done_fn); |
3584 | 3604 | ||
3585 | local_irq_save(flags); | 3605 | local_irq_save(flags); |
3586 | 3606 | ||
3587 | cpu_list = &__get_cpu_var(blk_cpu_done); | 3607 | cpu_list = &__get_cpu_var(blk_cpu_done); |
3588 | list_add_tail(&req->donelist, cpu_list); | 3608 | list_add_tail(&req->donelist, cpu_list); |
3589 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | 3609 | raise_softirq_irqoff(BLOCK_SOFTIRQ); |
3590 | 3610 | ||
3591 | local_irq_restore(flags); | 3611 | local_irq_restore(flags); |
3592 | } | 3612 | } |
3593 | 3613 | ||
3594 | EXPORT_SYMBOL(blk_complete_request); | 3614 | EXPORT_SYMBOL(blk_complete_request); |
3595 | 3615 | ||
3596 | /* | 3616 | /* |
3597 | * queue lock must be held | 3617 | * queue lock must be held |
3598 | */ | 3618 | */ |
3599 | void end_that_request_last(struct request *req, int uptodate) | 3619 | void end_that_request_last(struct request *req, int uptodate) |
3600 | { | 3620 | { |
3601 | struct gendisk *disk = req->rq_disk; | 3621 | struct gendisk *disk = req->rq_disk; |
3602 | int error; | 3622 | int error; |
3603 | 3623 | ||
3604 | /* | 3624 | /* |
3605 | * extend uptodate bool to allow < 0 value to be direct io error | 3625 | * extend uptodate bool to allow < 0 value to be direct io error |
3606 | */ | 3626 | */ |
3607 | error = 0; | 3627 | error = 0; |
3608 | if (end_io_error(uptodate)) | 3628 | if (end_io_error(uptodate)) |
3609 | error = !uptodate ? -EIO : uptodate; | 3629 | error = !uptodate ? -EIO : uptodate; |
3610 | 3630 | ||
3611 | if (unlikely(laptop_mode) && blk_fs_request(req)) | 3631 | if (unlikely(laptop_mode) && blk_fs_request(req)) |
3612 | laptop_io_completion(); | 3632 | laptop_io_completion(); |
3613 | 3633 | ||
3614 | /* | 3634 | /* |
3615 | * Account IO completion. bar_rq isn't accounted as a normal | 3635 | * Account IO completion. bar_rq isn't accounted as a normal |
3616 | * IO on queueing nor completion. Accounting the containing | 3636 | * IO on queueing nor completion. Accounting the containing |
3617 | * request is enough. | 3637 | * request is enough. |
3618 | */ | 3638 | */ |
3619 | if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { | 3639 | if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { |
3620 | unsigned long duration = jiffies - req->start_time; | 3640 | unsigned long duration = jiffies - req->start_time; |
3621 | const int rw = rq_data_dir(req); | 3641 | const int rw = rq_data_dir(req); |
3622 | 3642 | ||
3623 | __disk_stat_inc(disk, ios[rw]); | 3643 | __disk_stat_inc(disk, ios[rw]); |
3624 | __disk_stat_add(disk, ticks[rw], duration); | 3644 | __disk_stat_add(disk, ticks[rw], duration); |
3625 | disk_round_stats(disk); | 3645 | disk_round_stats(disk); |
3626 | disk->in_flight--; | 3646 | disk->in_flight--; |
3627 | } | 3647 | } |
3628 | if (req->end_io) | 3648 | if (req->end_io) |
3629 | req->end_io(req, error); | 3649 | req->end_io(req, error); |
3630 | else | 3650 | else |
3631 | __blk_put_request(req->q, req); | 3651 | __blk_put_request(req->q, req); |
3632 | } | 3652 | } |
3633 | 3653 | ||
3634 | EXPORT_SYMBOL(end_that_request_last); | 3654 | EXPORT_SYMBOL(end_that_request_last); |
3635 | 3655 | ||
3636 | static inline void __end_request(struct request *rq, int uptodate, | 3656 | static inline void __end_request(struct request *rq, int uptodate, |
3637 | unsigned int nr_bytes, int dequeue) | 3657 | unsigned int nr_bytes, int dequeue) |
3638 | { | 3658 | { |
3639 | if (!end_that_request_chunk(rq, uptodate, nr_bytes)) { | 3659 | if (!end_that_request_chunk(rq, uptodate, nr_bytes)) { |
3640 | if (dequeue) | 3660 | if (dequeue) |
3641 | blkdev_dequeue_request(rq); | 3661 | blkdev_dequeue_request(rq); |
3642 | add_disk_randomness(rq->rq_disk); | 3662 | add_disk_randomness(rq->rq_disk); |
3643 | end_that_request_last(rq, uptodate); | 3663 | end_that_request_last(rq, uptodate); |
3644 | } | 3664 | } |
3645 | } | 3665 | } |
3646 | 3666 | ||
3647 | static unsigned int rq_byte_size(struct request *rq) | 3667 | static unsigned int rq_byte_size(struct request *rq) |
3648 | { | 3668 | { |
3649 | if (blk_fs_request(rq)) | 3669 | if (blk_fs_request(rq)) |
3650 | return rq->hard_nr_sectors << 9; | 3670 | return rq->hard_nr_sectors << 9; |
3651 | 3671 | ||
3652 | return rq->data_len; | 3672 | return rq->data_len; |
3653 | } | 3673 | } |
3654 | 3674 | ||
3655 | /** | 3675 | /** |
3656 | * end_queued_request - end all I/O on a queued request | 3676 | * end_queued_request - end all I/O on a queued request |
3657 | * @rq: the request being processed | 3677 | * @rq: the request being processed |
3658 | * @uptodate: error value or 0/1 uptodate flag | 3678 | * @uptodate: error value or 0/1 uptodate flag |
3659 | * | 3679 | * |
3660 | * Description: | 3680 | * Description: |
3661 | * Ends all I/O on a request, and removes it from the block layer queues. | 3681 | * Ends all I/O on a request, and removes it from the block layer queues. |
3662 | * Not suitable for normal IO completion, unless the driver still has | 3682 | * Not suitable for normal IO completion, unless the driver still has |
3663 | * the request attached to the block layer. | 3683 | * the request attached to the block layer. |
3664 | * | 3684 | * |
3665 | **/ | 3685 | **/ |
3666 | void end_queued_request(struct request *rq, int uptodate) | 3686 | void end_queued_request(struct request *rq, int uptodate) |
3667 | { | 3687 | { |
3668 | __end_request(rq, uptodate, rq_byte_size(rq), 1); | 3688 | __end_request(rq, uptodate, rq_byte_size(rq), 1); |
3669 | } | 3689 | } |
3670 | EXPORT_SYMBOL(end_queued_request); | 3690 | EXPORT_SYMBOL(end_queued_request); |
3671 | 3691 | ||
3672 | /** | 3692 | /** |
3673 | * end_dequeued_request - end all I/O on a dequeued request | 3693 | * end_dequeued_request - end all I/O on a dequeued request |
3674 | * @rq: the request being processed | 3694 | * @rq: the request being processed |
3675 | * @uptodate: error value or 0/1 uptodate flag | 3695 | * @uptodate: error value or 0/1 uptodate flag |
3676 | * | 3696 | * |
3677 | * Description: | 3697 | * Description: |
3678 | * Ends all I/O on a request. The request must already have been | 3698 | * Ends all I/O on a request. The request must already have been |
3679 | * dequeued using blkdev_dequeue_request(), as is normally the case | 3699 | * dequeued using blkdev_dequeue_request(), as is normally the case |
3680 | * for most drivers. | 3700 | * for most drivers. |
3681 | * | 3701 | * |
3682 | **/ | 3702 | **/ |
3683 | void end_dequeued_request(struct request *rq, int uptodate) | 3703 | void end_dequeued_request(struct request *rq, int uptodate) |
3684 | { | 3704 | { |
3685 | __end_request(rq, uptodate, rq_byte_size(rq), 0); | 3705 | __end_request(rq, uptodate, rq_byte_size(rq), 0); |
3686 | } | 3706 | } |
3687 | EXPORT_SYMBOL(end_dequeued_request); | 3707 | EXPORT_SYMBOL(end_dequeued_request); |
3688 | 3708 | ||
3689 | 3709 | ||
3690 | /** | 3710 | /** |
3691 | * end_request - end I/O on the current segment of the request | 3711 | * end_request - end I/O on the current segment of the request |
3692 | * @rq: the request being processed | 3712 | * @rq: the request being processed |
3693 | * @uptodate: error value or 0/1 uptodate flag | 3713 | * @uptodate: error value or 0/1 uptodate flag |
3694 | * | 3714 | * |
3695 | * Description: | 3715 | * Description: |
3696 | * Ends I/O on the current segment of a request. If that is the only | 3716 | * Ends I/O on the current segment of a request. If that is the only |
3697 | * remaining segment, the request is also completed and freed. | 3717 | * remaining segment, the request is also completed and freed. |
3698 | * | 3718 | * |
3699 | * This is a remnant of how older block drivers handled IO completions. | 3719 | * This is a remnant of how older block drivers handled IO completions. |
3700 | * Modern drivers typically end IO on the full request in one go, unless | 3720 | * Modern drivers typically end IO on the full request in one go, unless |
3701 | * they have a residual value to account for. For that case this function | 3721 | * they have a residual value to account for. For that case this function |
3702 | * isn't really useful, unless the residual just happens to be the | 3722 | * isn't really useful, unless the residual just happens to be the |
3703 | * full current segment. In other words, don't use this function in new | 3723 | * full current segment. In other words, don't use this function in new |
3704 | * code. Either use end_request_completely(), or the | 3724 | * code. Either use end_request_completely(), or the |
3705 | * end_that_request_chunk() (along with end_that_request_last()) for | 3725 | * end_that_request_chunk() (along with end_that_request_last()) for |
3706 | * partial completions. | 3726 | * partial completions. |
3707 | * | 3727 | * |
3708 | **/ | 3728 | **/ |
3709 | void end_request(struct request *req, int uptodate) | 3729 | void end_request(struct request *req, int uptodate) |
3710 | { | 3730 | { |
3711 | __end_request(req, uptodate, req->hard_cur_sectors << 9, 1); | 3731 | __end_request(req, uptodate, req->hard_cur_sectors << 9, 1); |
3712 | } | 3732 | } |
3713 | EXPORT_SYMBOL(end_request); | 3733 | EXPORT_SYMBOL(end_request); |
3714 | 3734 | ||
3715 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 3735 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
3716 | struct bio *bio) | 3736 | struct bio *bio) |
3717 | { | 3737 | { |
3718 | /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ | 3738 | /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ |
3719 | rq->cmd_flags |= (bio->bi_rw & 3); | 3739 | rq->cmd_flags |= (bio->bi_rw & 3); |
3720 | 3740 | ||
3721 | rq->nr_phys_segments = bio_phys_segments(q, bio); | 3741 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
3722 | rq->nr_hw_segments = bio_hw_segments(q, bio); | 3742 | rq->nr_hw_segments = bio_hw_segments(q, bio); |
3723 | rq->current_nr_sectors = bio_cur_sectors(bio); | 3743 | rq->current_nr_sectors = bio_cur_sectors(bio); |
3724 | rq->hard_cur_sectors = rq->current_nr_sectors; | 3744 | rq->hard_cur_sectors = rq->current_nr_sectors; |
3725 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); | 3745 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); |
3726 | rq->buffer = bio_data(bio); | 3746 | rq->buffer = bio_data(bio); |
3727 | rq->data_len = bio->bi_size; | 3747 | rq->data_len = bio->bi_size; |
3728 | 3748 | ||
3729 | rq->bio = rq->biotail = bio; | 3749 | rq->bio = rq->biotail = bio; |
3730 | 3750 | ||
3731 | if (bio->bi_bdev) | 3751 | if (bio->bi_bdev) |
3732 | rq->rq_disk = bio->bi_bdev->bd_disk; | 3752 | rq->rq_disk = bio->bi_bdev->bd_disk; |
3733 | } | 3753 | } |
3734 | 3754 | ||
3735 | int kblockd_schedule_work(struct work_struct *work) | 3755 | int kblockd_schedule_work(struct work_struct *work) |
3736 | { | 3756 | { |
3737 | return queue_work(kblockd_workqueue, work); | 3757 | return queue_work(kblockd_workqueue, work); |
3738 | } | 3758 | } |
3739 | 3759 | ||
3740 | EXPORT_SYMBOL(kblockd_schedule_work); | 3760 | EXPORT_SYMBOL(kblockd_schedule_work); |
3741 | 3761 | ||
3742 | void kblockd_flush_work(struct work_struct *work) | 3762 | void kblockd_flush_work(struct work_struct *work) |
3743 | { | 3763 | { |
3744 | cancel_work_sync(work); | 3764 | cancel_work_sync(work); |
3745 | } | 3765 | } |
3746 | EXPORT_SYMBOL(kblockd_flush_work); | 3766 | EXPORT_SYMBOL(kblockd_flush_work); |
3747 | 3767 | ||
3748 | int __init blk_dev_init(void) | 3768 | int __init blk_dev_init(void) |
3749 | { | 3769 | { |
3750 | int i; | 3770 | int i; |
3751 | 3771 | ||
3752 | kblockd_workqueue = create_workqueue("kblockd"); | 3772 | kblockd_workqueue = create_workqueue("kblockd"); |
3753 | if (!kblockd_workqueue) | 3773 | if (!kblockd_workqueue) |
3754 | panic("Failed to create kblockd\n"); | 3774 | panic("Failed to create kblockd\n"); |
3755 | 3775 | ||
3756 | request_cachep = kmem_cache_create("blkdev_requests", | 3776 | request_cachep = kmem_cache_create("blkdev_requests", |
3757 | sizeof(struct request), 0, SLAB_PANIC, NULL); | 3777 | sizeof(struct request), 0, SLAB_PANIC, NULL); |
3758 | 3778 | ||
3759 | requestq_cachep = kmem_cache_create("blkdev_queue", | 3779 | requestq_cachep = kmem_cache_create("blkdev_queue", |
3760 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); | 3780 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); |
3761 | 3781 | ||
3762 | iocontext_cachep = kmem_cache_create("blkdev_ioc", | 3782 | iocontext_cachep = kmem_cache_create("blkdev_ioc", |
3763 | sizeof(struct io_context), 0, SLAB_PANIC, NULL); | 3783 | sizeof(struct io_context), 0, SLAB_PANIC, NULL); |
3764 | 3784 | ||
3765 | for_each_possible_cpu(i) | 3785 | for_each_possible_cpu(i) |
3766 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | 3786 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); |
3767 | 3787 | ||
3768 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); | 3788 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); |
3769 | register_hotcpu_notifier(&blk_cpu_notifier); | 3789 | register_hotcpu_notifier(&blk_cpu_notifier); |
3770 | 3790 | ||
3771 | blk_max_low_pfn = max_low_pfn - 1; | 3791 | blk_max_low_pfn = max_low_pfn - 1; |
3772 | blk_max_pfn = max_pfn - 1; | 3792 | blk_max_pfn = max_pfn - 1; |
3773 | 3793 | ||
3774 | return 0; | 3794 | return 0; |
3775 | } | 3795 | } |
3776 | 3796 | ||
3777 | /* | 3797 | /* |
3778 | * IO Context helper functions | 3798 | * IO Context helper functions |
3779 | */ | 3799 | */ |
3780 | void put_io_context(struct io_context *ioc) | 3800 | void put_io_context(struct io_context *ioc) |
3781 | { | 3801 | { |
3782 | if (ioc == NULL) | 3802 | if (ioc == NULL) |
3783 | return; | 3803 | return; |
3784 | 3804 | ||
3785 | BUG_ON(atomic_read(&ioc->refcount) == 0); | 3805 | BUG_ON(atomic_read(&ioc->refcount) == 0); |
3786 | 3806 | ||
3787 | if (atomic_dec_and_test(&ioc->refcount)) { | 3807 | if (atomic_dec_and_test(&ioc->refcount)) { |
3788 | struct cfq_io_context *cic; | 3808 | struct cfq_io_context *cic; |
3789 | 3809 | ||
3790 | rcu_read_lock(); | 3810 | rcu_read_lock(); |
3791 | if (ioc->aic && ioc->aic->dtor) | 3811 | if (ioc->aic && ioc->aic->dtor) |
3792 | ioc->aic->dtor(ioc->aic); | 3812 | ioc->aic->dtor(ioc->aic); |
3793 | if (ioc->cic_root.rb_node != NULL) { | 3813 | if (ioc->cic_root.rb_node != NULL) { |
3794 | struct rb_node *n = rb_first(&ioc->cic_root); | 3814 | struct rb_node *n = rb_first(&ioc->cic_root); |
3795 | 3815 | ||
3796 | cic = rb_entry(n, struct cfq_io_context, rb_node); | 3816 | cic = rb_entry(n, struct cfq_io_context, rb_node); |
3797 | cic->dtor(ioc); | 3817 | cic->dtor(ioc); |
3798 | } | 3818 | } |
3799 | rcu_read_unlock(); | 3819 | rcu_read_unlock(); |
3800 | 3820 | ||
3801 | kmem_cache_free(iocontext_cachep, ioc); | 3821 | kmem_cache_free(iocontext_cachep, ioc); |
3802 | } | 3822 | } |
3803 | } | 3823 | } |
3804 | EXPORT_SYMBOL(put_io_context); | 3824 | EXPORT_SYMBOL(put_io_context); |
3805 | 3825 | ||
3806 | /* Called by the exitting task */ | 3826 | /* Called by the exitting task */ |
3807 | void exit_io_context(void) | 3827 | void exit_io_context(void) |
3808 | { | 3828 | { |
3809 | struct io_context *ioc; | 3829 | struct io_context *ioc; |
3810 | struct cfq_io_context *cic; | 3830 | struct cfq_io_context *cic; |
3811 | 3831 | ||
3812 | task_lock(current); | 3832 | task_lock(current); |
3813 | ioc = current->io_context; | 3833 | ioc = current->io_context; |
3814 | current->io_context = NULL; | 3834 | current->io_context = NULL; |
3815 | task_unlock(current); | 3835 | task_unlock(current); |
3816 | 3836 | ||
3817 | ioc->task = NULL; | 3837 | ioc->task = NULL; |
3818 | if (ioc->aic && ioc->aic->exit) | 3838 | if (ioc->aic && ioc->aic->exit) |
3819 | ioc->aic->exit(ioc->aic); | 3839 | ioc->aic->exit(ioc->aic); |
3820 | if (ioc->cic_root.rb_node != NULL) { | 3840 | if (ioc->cic_root.rb_node != NULL) { |
3821 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); | 3841 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); |
3822 | cic->exit(ioc); | 3842 | cic->exit(ioc); |
3823 | } | 3843 | } |
3824 | 3844 | ||
3825 | put_io_context(ioc); | 3845 | put_io_context(ioc); |
3826 | } | 3846 | } |
3827 | 3847 | ||
3828 | /* | 3848 | /* |
3829 | * If the current task has no IO context then create one and initialise it. | 3849 | * If the current task has no IO context then create one and initialise it. |
3830 | * Otherwise, return its existing IO context. | 3850 | * Otherwise, return its existing IO context. |
3831 | * | 3851 | * |
3832 | * This returned IO context doesn't have a specifically elevated refcount, | 3852 | * This returned IO context doesn't have a specifically elevated refcount, |
3833 | * but since the current task itself holds a reference, the context can be | 3853 | * but since the current task itself holds a reference, the context can be |
3834 | * used in general code, so long as it stays within `current` context. | 3854 | * used in general code, so long as it stays within `current` context. |
3835 | */ | 3855 | */ |
3836 | static struct io_context *current_io_context(gfp_t gfp_flags, int node) | 3856 | static struct io_context *current_io_context(gfp_t gfp_flags, int node) |
3837 | { | 3857 | { |
3838 | struct task_struct *tsk = current; | 3858 | struct task_struct *tsk = current; |
3839 | struct io_context *ret; | 3859 | struct io_context *ret; |
3840 | 3860 | ||
3841 | ret = tsk->io_context; | 3861 | ret = tsk->io_context; |
3842 | if (likely(ret)) | 3862 | if (likely(ret)) |
3843 | return ret; | 3863 | return ret; |
3844 | 3864 | ||
3845 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | 3865 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); |
3846 | if (ret) { | 3866 | if (ret) { |
3847 | atomic_set(&ret->refcount, 1); | 3867 | atomic_set(&ret->refcount, 1); |
3848 | ret->task = current; | 3868 | ret->task = current; |
3849 | ret->ioprio_changed = 0; | 3869 | ret->ioprio_changed = 0; |
3850 | ret->last_waited = jiffies; /* doesn't matter... */ | 3870 | ret->last_waited = jiffies; /* doesn't matter... */ |
3851 | ret->nr_batch_requests = 0; /* because this is 0 */ | 3871 | ret->nr_batch_requests = 0; /* because this is 0 */ |
3852 | ret->aic = NULL; | 3872 | ret->aic = NULL; |
3853 | ret->cic_root.rb_node = NULL; | 3873 | ret->cic_root.rb_node = NULL; |
3854 | ret->ioc_data = NULL; | 3874 | ret->ioc_data = NULL; |
3855 | /* make sure set_task_ioprio() sees the settings above */ | 3875 | /* make sure set_task_ioprio() sees the settings above */ |
3856 | smp_wmb(); | 3876 | smp_wmb(); |
3857 | tsk->io_context = ret; | 3877 | tsk->io_context = ret; |
3858 | } | 3878 | } |
3859 | 3879 | ||
3860 | return ret; | 3880 | return ret; |
3861 | } | 3881 | } |
3862 | 3882 | ||
3863 | /* | 3883 | /* |
3864 | * If the current task has no IO context then create one and initialise it. | 3884 | * If the current task has no IO context then create one and initialise it. |
3865 | * If it does have a context, take a ref on it. | 3885 | * If it does have a context, take a ref on it. |
3866 | * | 3886 | * |
3867 | * This is always called in the context of the task which submitted the I/O. | 3887 | * This is always called in the context of the task which submitted the I/O. |
3868 | */ | 3888 | */ |
3869 | struct io_context *get_io_context(gfp_t gfp_flags, int node) | 3889 | struct io_context *get_io_context(gfp_t gfp_flags, int node) |
3870 | { | 3890 | { |
3871 | struct io_context *ret; | 3891 | struct io_context *ret; |
3872 | ret = current_io_context(gfp_flags, node); | 3892 | ret = current_io_context(gfp_flags, node); |
3873 | if (likely(ret)) | 3893 | if (likely(ret)) |
3874 | atomic_inc(&ret->refcount); | 3894 | atomic_inc(&ret->refcount); |
3875 | return ret; | 3895 | return ret; |
3876 | } | 3896 | } |
3877 | EXPORT_SYMBOL(get_io_context); | 3897 | EXPORT_SYMBOL(get_io_context); |
3878 | 3898 | ||
3879 | void copy_io_context(struct io_context **pdst, struct io_context **psrc) | 3899 | void copy_io_context(struct io_context **pdst, struct io_context **psrc) |
3880 | { | 3900 | { |
3881 | struct io_context *src = *psrc; | 3901 | struct io_context *src = *psrc; |
3882 | struct io_context *dst = *pdst; | 3902 | struct io_context *dst = *pdst; |
3883 | 3903 | ||
3884 | if (src) { | 3904 | if (src) { |
3885 | BUG_ON(atomic_read(&src->refcount) == 0); | 3905 | BUG_ON(atomic_read(&src->refcount) == 0); |
3886 | atomic_inc(&src->refcount); | 3906 | atomic_inc(&src->refcount); |
3887 | put_io_context(dst); | 3907 | put_io_context(dst); |
3888 | *pdst = src; | 3908 | *pdst = src; |
3889 | } | 3909 | } |
3890 | } | 3910 | } |
3891 | EXPORT_SYMBOL(copy_io_context); | 3911 | EXPORT_SYMBOL(copy_io_context); |
3892 | 3912 | ||
3893 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) | 3913 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) |
3894 | { | 3914 | { |
3895 | struct io_context *temp; | 3915 | struct io_context *temp; |
3896 | temp = *ioc1; | 3916 | temp = *ioc1; |
3897 | *ioc1 = *ioc2; | 3917 | *ioc1 = *ioc2; |
3898 | *ioc2 = temp; | 3918 | *ioc2 = temp; |
3899 | } | 3919 | } |
3900 | EXPORT_SYMBOL(swap_io_context); | 3920 | EXPORT_SYMBOL(swap_io_context); |
3901 | 3921 | ||
3902 | /* | 3922 | /* |
3903 | * sysfs parts below | 3923 | * sysfs parts below |
3904 | */ | 3924 | */ |
3905 | struct queue_sysfs_entry { | 3925 | struct queue_sysfs_entry { |
3906 | struct attribute attr; | 3926 | struct attribute attr; |
3907 | ssize_t (*show)(struct request_queue *, char *); | 3927 | ssize_t (*show)(struct request_queue *, char *); |
3908 | ssize_t (*store)(struct request_queue *, const char *, size_t); | 3928 | ssize_t (*store)(struct request_queue *, const char *, size_t); |
3909 | }; | 3929 | }; |
3910 | 3930 | ||
3911 | static ssize_t | 3931 | static ssize_t |
3912 | queue_var_show(unsigned int var, char *page) | 3932 | queue_var_show(unsigned int var, char *page) |
3913 | { | 3933 | { |
3914 | return sprintf(page, "%d\n", var); | 3934 | return sprintf(page, "%d\n", var); |
3915 | } | 3935 | } |
3916 | 3936 | ||
3917 | static ssize_t | 3937 | static ssize_t |
3918 | queue_var_store(unsigned long *var, const char *page, size_t count) | 3938 | queue_var_store(unsigned long *var, const char *page, size_t count) |
3919 | { | 3939 | { |
3920 | char *p = (char *) page; | 3940 | char *p = (char *) page; |
3921 | 3941 | ||
3922 | *var = simple_strtoul(p, &p, 10); | 3942 | *var = simple_strtoul(p, &p, 10); |
3923 | return count; | 3943 | return count; |
3924 | } | 3944 | } |
3925 | 3945 | ||
3926 | static ssize_t queue_requests_show(struct request_queue *q, char *page) | 3946 | static ssize_t queue_requests_show(struct request_queue *q, char *page) |
3927 | { | 3947 | { |
3928 | return queue_var_show(q->nr_requests, (page)); | 3948 | return queue_var_show(q->nr_requests, (page)); |
3929 | } | 3949 | } |
3930 | 3950 | ||
3931 | static ssize_t | 3951 | static ssize_t |
3932 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | 3952 | queue_requests_store(struct request_queue *q, const char *page, size_t count) |
3933 | { | 3953 | { |
3934 | struct request_list *rl = &q->rq; | 3954 | struct request_list *rl = &q->rq; |
3935 | unsigned long nr; | 3955 | unsigned long nr; |
3936 | int ret = queue_var_store(&nr, page, count); | 3956 | int ret = queue_var_store(&nr, page, count); |
3937 | if (nr < BLKDEV_MIN_RQ) | 3957 | if (nr < BLKDEV_MIN_RQ) |
3938 | nr = BLKDEV_MIN_RQ; | 3958 | nr = BLKDEV_MIN_RQ; |
3939 | 3959 | ||
3940 | spin_lock_irq(q->queue_lock); | 3960 | spin_lock_irq(q->queue_lock); |
3941 | q->nr_requests = nr; | 3961 | q->nr_requests = nr; |
3942 | blk_queue_congestion_threshold(q); | 3962 | blk_queue_congestion_threshold(q); |
3943 | 3963 | ||
3944 | if (rl->count[READ] >= queue_congestion_on_threshold(q)) | 3964 | if (rl->count[READ] >= queue_congestion_on_threshold(q)) |
3945 | blk_set_queue_congested(q, READ); | 3965 | blk_set_queue_congested(q, READ); |
3946 | else if (rl->count[READ] < queue_congestion_off_threshold(q)) | 3966 | else if (rl->count[READ] < queue_congestion_off_threshold(q)) |
3947 | blk_clear_queue_congested(q, READ); | 3967 | blk_clear_queue_congested(q, READ); |
3948 | 3968 | ||
3949 | if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) | 3969 | if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) |
3950 | blk_set_queue_congested(q, WRITE); | 3970 | blk_set_queue_congested(q, WRITE); |
3951 | else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) | 3971 | else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) |
3952 | blk_clear_queue_congested(q, WRITE); | 3972 | blk_clear_queue_congested(q, WRITE); |
3953 | 3973 | ||
3954 | if (rl->count[READ] >= q->nr_requests) { | 3974 | if (rl->count[READ] >= q->nr_requests) { |
3955 | blk_set_queue_full(q, READ); | 3975 | blk_set_queue_full(q, READ); |
3956 | } else if (rl->count[READ]+1 <= q->nr_requests) { | 3976 | } else if (rl->count[READ]+1 <= q->nr_requests) { |
3957 | blk_clear_queue_full(q, READ); | 3977 | blk_clear_queue_full(q, READ); |
3958 | wake_up(&rl->wait[READ]); | 3978 | wake_up(&rl->wait[READ]); |
3959 | } | 3979 | } |
3960 | 3980 | ||
3961 | if (rl->count[WRITE] >= q->nr_requests) { | 3981 | if (rl->count[WRITE] >= q->nr_requests) { |
3962 | blk_set_queue_full(q, WRITE); | 3982 | blk_set_queue_full(q, WRITE); |
3963 | } else if (rl->count[WRITE]+1 <= q->nr_requests) { | 3983 | } else if (rl->count[WRITE]+1 <= q->nr_requests) { |
3964 | blk_clear_queue_full(q, WRITE); | 3984 | blk_clear_queue_full(q, WRITE); |
3965 | wake_up(&rl->wait[WRITE]); | 3985 | wake_up(&rl->wait[WRITE]); |
3966 | } | 3986 | } |
3967 | spin_unlock_irq(q->queue_lock); | 3987 | spin_unlock_irq(q->queue_lock); |
3968 | return ret; | 3988 | return ret; |
3969 | } | 3989 | } |
3970 | 3990 | ||
3971 | static ssize_t queue_ra_show(struct request_queue *q, char *page) | 3991 | static ssize_t queue_ra_show(struct request_queue *q, char *page) |
3972 | { | 3992 | { |
3973 | int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); | 3993 | int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); |
3974 | 3994 | ||
3975 | return queue_var_show(ra_kb, (page)); | 3995 | return queue_var_show(ra_kb, (page)); |
3976 | } | 3996 | } |
3977 | 3997 | ||
3978 | static ssize_t | 3998 | static ssize_t |
3979 | queue_ra_store(struct request_queue *q, const char *page, size_t count) | 3999 | queue_ra_store(struct request_queue *q, const char *page, size_t count) |
3980 | { | 4000 | { |
3981 | unsigned long ra_kb; | 4001 | unsigned long ra_kb; |
3982 | ssize_t ret = queue_var_store(&ra_kb, page, count); | 4002 | ssize_t ret = queue_var_store(&ra_kb, page, count); |
3983 | 4003 | ||
3984 | spin_lock_irq(q->queue_lock); | 4004 | spin_lock_irq(q->queue_lock); |
3985 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); | 4005 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); |
3986 | spin_unlock_irq(q->queue_lock); | 4006 | spin_unlock_irq(q->queue_lock); |
3987 | 4007 | ||
3988 | return ret; | 4008 | return ret; |
3989 | } | 4009 | } |
3990 | 4010 | ||
3991 | static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) | 4011 | static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) |
3992 | { | 4012 | { |
3993 | int max_sectors_kb = q->max_sectors >> 1; | 4013 | int max_sectors_kb = q->max_sectors >> 1; |
3994 | 4014 | ||
3995 | return queue_var_show(max_sectors_kb, (page)); | 4015 | return queue_var_show(max_sectors_kb, (page)); |
3996 | } | 4016 | } |
3997 | 4017 | ||
3998 | static ssize_t | 4018 | static ssize_t |
3999 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | 4019 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) |
4000 | { | 4020 | { |
4001 | unsigned long max_sectors_kb, | 4021 | unsigned long max_sectors_kb, |
4002 | max_hw_sectors_kb = q->max_hw_sectors >> 1, | 4022 | max_hw_sectors_kb = q->max_hw_sectors >> 1, |
4003 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); | 4023 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); |
4004 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); | 4024 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); |
4005 | int ra_kb; | 4025 | int ra_kb; |
4006 | 4026 | ||
4007 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) | 4027 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) |
4008 | return -EINVAL; | 4028 | return -EINVAL; |
4009 | /* | 4029 | /* |
4010 | * Take the queue lock to update the readahead and max_sectors | 4030 | * Take the queue lock to update the readahead and max_sectors |
4011 | * values synchronously: | 4031 | * values synchronously: |
4012 | */ | 4032 | */ |
4013 | spin_lock_irq(q->queue_lock); | 4033 | spin_lock_irq(q->queue_lock); |
4014 | /* | 4034 | /* |
4015 | * Trim readahead window as well, if necessary: | 4035 | * Trim readahead window as well, if necessary: |
4016 | */ | 4036 | */ |
4017 | ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); | 4037 | ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); |
4018 | if (ra_kb > max_sectors_kb) | 4038 | if (ra_kb > max_sectors_kb) |
4019 | q->backing_dev_info.ra_pages = | 4039 | q->backing_dev_info.ra_pages = |
4020 | max_sectors_kb >> (PAGE_CACHE_SHIFT - 10); | 4040 | max_sectors_kb >> (PAGE_CACHE_SHIFT - 10); |
4021 | 4041 | ||
4022 | q->max_sectors = max_sectors_kb << 1; | 4042 | q->max_sectors = max_sectors_kb << 1; |
4023 | spin_unlock_irq(q->queue_lock); | 4043 | spin_unlock_irq(q->queue_lock); |
4024 | 4044 | ||
4025 | return ret; | 4045 | return ret; |
4026 | } | 4046 | } |
4027 | 4047 | ||
4028 | static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) | 4048 | static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) |
4029 | { | 4049 | { |
4030 | int max_hw_sectors_kb = q->max_hw_sectors >> 1; | 4050 | int max_hw_sectors_kb = q->max_hw_sectors >> 1; |
4031 | 4051 | ||
4032 | return queue_var_show(max_hw_sectors_kb, (page)); | 4052 | return queue_var_show(max_hw_sectors_kb, (page)); |
4033 | } | 4053 | } |
4034 | 4054 | ||
4035 | 4055 | ||
4036 | static struct queue_sysfs_entry queue_requests_entry = { | 4056 | static struct queue_sysfs_entry queue_requests_entry = { |
4037 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, | 4057 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, |
4038 | .show = queue_requests_show, | 4058 | .show = queue_requests_show, |
4039 | .store = queue_requests_store, | 4059 | .store = queue_requests_store, |
4040 | }; | 4060 | }; |
4041 | 4061 | ||
4042 | static struct queue_sysfs_entry queue_ra_entry = { | 4062 | static struct queue_sysfs_entry queue_ra_entry = { |
4043 | .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, | 4063 | .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, |
4044 | .show = queue_ra_show, | 4064 | .show = queue_ra_show, |
4045 | .store = queue_ra_store, | 4065 | .store = queue_ra_store, |
4046 | }; | 4066 | }; |
4047 | 4067 | ||
4048 | static struct queue_sysfs_entry queue_max_sectors_entry = { | 4068 | static struct queue_sysfs_entry queue_max_sectors_entry = { |
4049 | .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, | 4069 | .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, |
4050 | .show = queue_max_sectors_show, | 4070 | .show = queue_max_sectors_show, |
4051 | .store = queue_max_sectors_store, | 4071 | .store = queue_max_sectors_store, |
4052 | }; | 4072 | }; |
4053 | 4073 | ||
4054 | static struct queue_sysfs_entry queue_max_hw_sectors_entry = { | 4074 | static struct queue_sysfs_entry queue_max_hw_sectors_entry = { |
4055 | .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, | 4075 | .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, |
4056 | .show = queue_max_hw_sectors_show, | 4076 | .show = queue_max_hw_sectors_show, |
4057 | }; | 4077 | }; |
4058 | 4078 | ||
4059 | static struct queue_sysfs_entry queue_iosched_entry = { | 4079 | static struct queue_sysfs_entry queue_iosched_entry = { |
4060 | .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, | 4080 | .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, |
4061 | .show = elv_iosched_show, | 4081 | .show = elv_iosched_show, |
4062 | .store = elv_iosched_store, | 4082 | .store = elv_iosched_store, |
4063 | }; | 4083 | }; |
4064 | 4084 | ||
4065 | static struct attribute *default_attrs[] = { | 4085 | static struct attribute *default_attrs[] = { |
4066 | &queue_requests_entry.attr, | 4086 | &queue_requests_entry.attr, |
4067 | &queue_ra_entry.attr, | 4087 | &queue_ra_entry.attr, |
4068 | &queue_max_hw_sectors_entry.attr, | 4088 | &queue_max_hw_sectors_entry.attr, |
4069 | &queue_max_sectors_entry.attr, | 4089 | &queue_max_sectors_entry.attr, |
4070 | &queue_iosched_entry.attr, | 4090 | &queue_iosched_entry.attr, |
4071 | NULL, | 4091 | NULL, |
4072 | }; | 4092 | }; |
4073 | 4093 | ||
4074 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) | 4094 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) |
4075 | 4095 | ||
4076 | static ssize_t | 4096 | static ssize_t |
4077 | queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | 4097 | queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) |
4078 | { | 4098 | { |
4079 | struct queue_sysfs_entry *entry = to_queue(attr); | 4099 | struct queue_sysfs_entry *entry = to_queue(attr); |
4080 | struct request_queue *q = | 4100 | struct request_queue *q = |
4081 | container_of(kobj, struct request_queue, kobj); | 4101 | container_of(kobj, struct request_queue, kobj); |
4082 | ssize_t res; | 4102 | ssize_t res; |
4083 | 4103 | ||
4084 | if (!entry->show) | 4104 | if (!entry->show) |
4085 | return -EIO; | 4105 | return -EIO; |
4086 | mutex_lock(&q->sysfs_lock); | 4106 | mutex_lock(&q->sysfs_lock); |
4087 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | 4107 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { |
4088 | mutex_unlock(&q->sysfs_lock); | 4108 | mutex_unlock(&q->sysfs_lock); |
4089 | return -ENOENT; | 4109 | return -ENOENT; |
4090 | } | 4110 | } |
4091 | res = entry->show(q, page); | 4111 | res = entry->show(q, page); |
4092 | mutex_unlock(&q->sysfs_lock); | 4112 | mutex_unlock(&q->sysfs_lock); |
4093 | return res; | 4113 | return res; |
4094 | } | 4114 | } |
4095 | 4115 | ||
4096 | static ssize_t | 4116 | static ssize_t |
4097 | queue_attr_store(struct kobject *kobj, struct attribute *attr, | 4117 | queue_attr_store(struct kobject *kobj, struct attribute *attr, |
4098 | const char *page, size_t length) | 4118 | const char *page, size_t length) |
4099 | { | 4119 | { |
4100 | struct queue_sysfs_entry *entry = to_queue(attr); | 4120 | struct queue_sysfs_entry *entry = to_queue(attr); |
4101 | struct request_queue *q = container_of(kobj, struct request_queue, kobj); | 4121 | struct request_queue *q = container_of(kobj, struct request_queue, kobj); |
4102 | 4122 | ||
4103 | ssize_t res; | 4123 | ssize_t res; |
4104 | 4124 | ||
4105 | if (!entry->store) | 4125 | if (!entry->store) |
4106 | return -EIO; | 4126 | return -EIO; |
4107 | mutex_lock(&q->sysfs_lock); | 4127 | mutex_lock(&q->sysfs_lock); |
4108 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | 4128 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { |
4109 | mutex_unlock(&q->sysfs_lock); | 4129 | mutex_unlock(&q->sysfs_lock); |
4110 | return -ENOENT; | 4130 | return -ENOENT; |
4111 | } | 4131 | } |
4112 | res = entry->store(q, page, length); | 4132 | res = entry->store(q, page, length); |
4113 | mutex_unlock(&q->sysfs_lock); | 4133 | mutex_unlock(&q->sysfs_lock); |
4114 | return res; | 4134 | return res; |
4115 | } | 4135 | } |
4116 | 4136 | ||
4117 | static struct sysfs_ops queue_sysfs_ops = { | 4137 | static struct sysfs_ops queue_sysfs_ops = { |
4118 | .show = queue_attr_show, | 4138 | .show = queue_attr_show, |
4119 | .store = queue_attr_store, | 4139 | .store = queue_attr_store, |
4120 | }; | 4140 | }; |
4121 | 4141 | ||
4122 | static struct kobj_type queue_ktype = { | 4142 | static struct kobj_type queue_ktype = { |
4123 | .sysfs_ops = &queue_sysfs_ops, | 4143 | .sysfs_ops = &queue_sysfs_ops, |
4124 | .default_attrs = default_attrs, | 4144 | .default_attrs = default_attrs, |
4125 | .release = blk_release_queue, | 4145 | .release = blk_release_queue, |
4126 | }; | 4146 | }; |
4127 | 4147 | ||
4128 | int blk_register_queue(struct gendisk *disk) | 4148 | int blk_register_queue(struct gendisk *disk) |
4129 | { | 4149 | { |
4130 | int ret; | 4150 | int ret; |
4131 | 4151 | ||
4132 | struct request_queue *q = disk->queue; | 4152 | struct request_queue *q = disk->queue; |
4133 | 4153 | ||
4134 | if (!q || !q->request_fn) | 4154 | if (!q || !q->request_fn) |
4135 | return -ENXIO; | 4155 | return -ENXIO; |
4136 | 4156 | ||
4137 | q->kobj.parent = kobject_get(&disk->kobj); | 4157 | q->kobj.parent = kobject_get(&disk->kobj); |
4138 | 4158 | ||
4139 | ret = kobject_add(&q->kobj); | 4159 | ret = kobject_add(&q->kobj); |
4140 | if (ret < 0) | 4160 | if (ret < 0) |
4141 | return ret; | 4161 | return ret; |
4142 | 4162 | ||
4143 | kobject_uevent(&q->kobj, KOBJ_ADD); | 4163 | kobject_uevent(&q->kobj, KOBJ_ADD); |
4144 | 4164 | ||
4145 | ret = elv_register_queue(q); | 4165 | ret = elv_register_queue(q); |
4146 | if (ret) { | 4166 | if (ret) { |
4147 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | 4167 | kobject_uevent(&q->kobj, KOBJ_REMOVE); |
4148 | kobject_del(&q->kobj); | 4168 | kobject_del(&q->kobj); |
4149 | return ret; | 4169 | return ret; |
4150 | } | 4170 | } |
4151 | 4171 | ||
4152 | return 0; | 4172 | return 0; |
4153 | } | 4173 | } |
4154 | 4174 | ||
4155 | void blk_unregister_queue(struct gendisk *disk) | 4175 | void blk_unregister_queue(struct gendisk *disk) |
4156 | { | 4176 | { |
4157 | struct request_queue *q = disk->queue; | 4177 | struct request_queue *q = disk->queue; |
4158 | 4178 | ||
4159 | if (q && q->request_fn) { | 4179 | if (q && q->request_fn) { |
include/linux/bio.h
1 | /* | 1 | /* |
2 | * 2.5 block I/O model | 2 | * 2.5 block I/O model |
3 | * | 3 | * |
4 | * Copyright (C) 2001 Jens Axboe <axboe@suse.de> | 4 | * Copyright (C) 2001 Jens Axboe <axboe@suse.de> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 7 | * it under the terms of the GNU General Public License version 2 as |
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | 12 | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. | 14 | * GNU General Public License for more details. |
15 | * | 15 | * |
16 | * You should have received a copy of the GNU General Public Licens | 16 | * You should have received a copy of the GNU General Public Licens |
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- | 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- |
19 | */ | 19 | */ |
20 | #ifndef __LINUX_BIO_H | 20 | #ifndef __LINUX_BIO_H |
21 | #define __LINUX_BIO_H | 21 | #define __LINUX_BIO_H |
22 | 22 | ||
23 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/mempool.h> | 24 | #include <linux/mempool.h> |
25 | #include <linux/ioprio.h> | 25 | #include <linux/ioprio.h> |
26 | 26 | ||
27 | #ifdef CONFIG_BLOCK | 27 | #ifdef CONFIG_BLOCK |
28 | 28 | ||
29 | /* Platforms may set this to teach the BIO layer about IOMMU hardware. */ | 29 | /* Platforms may set this to teach the BIO layer about IOMMU hardware. */ |
30 | #include <asm/io.h> | 30 | #include <asm/io.h> |
31 | 31 | ||
32 | #if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY) | 32 | #if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY) |
33 | #define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1)) | 33 | #define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1)) |
34 | #define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE) | 34 | #define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE) |
35 | #else | 35 | #else |
36 | #define BIOVEC_VIRT_START_SIZE(x) 0 | 36 | #define BIOVEC_VIRT_START_SIZE(x) 0 |
37 | #define BIOVEC_VIRT_OVERSIZE(x) 0 | 37 | #define BIOVEC_VIRT_OVERSIZE(x) 0 |
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | #ifndef BIO_VMERGE_BOUNDARY | 40 | #ifndef BIO_VMERGE_BOUNDARY |
41 | #define BIO_VMERGE_BOUNDARY 0 | 41 | #define BIO_VMERGE_BOUNDARY 0 |
42 | #endif | 42 | #endif |
43 | 43 | ||
44 | #define BIO_DEBUG | 44 | #define BIO_DEBUG |
45 | 45 | ||
46 | #ifdef BIO_DEBUG | 46 | #ifdef BIO_DEBUG |
47 | #define BIO_BUG_ON BUG_ON | 47 | #define BIO_BUG_ON BUG_ON |
48 | #else | 48 | #else |
49 | #define BIO_BUG_ON | 49 | #define BIO_BUG_ON |
50 | #endif | 50 | #endif |
51 | 51 | ||
52 | #define BIO_MAX_PAGES 256 | 52 | #define BIO_MAX_PAGES 256 |
53 | #define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_CACHE_SHIFT) | 53 | #define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_CACHE_SHIFT) |
54 | #define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) | 54 | #define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * was unsigned short, but we might as well be ready for > 64kB I/O pages | 57 | * was unsigned short, but we might as well be ready for > 64kB I/O pages |
58 | */ | 58 | */ |
59 | struct bio_vec { | 59 | struct bio_vec { |
60 | struct page *bv_page; | 60 | struct page *bv_page; |
61 | unsigned int bv_len; | 61 | unsigned int bv_len; |
62 | unsigned int bv_offset; | 62 | unsigned int bv_offset; |
63 | }; | 63 | }; |
64 | 64 | ||
65 | struct bio_set; | 65 | struct bio_set; |
66 | struct bio; | 66 | struct bio; |
67 | typedef void (bio_end_io_t) (struct bio *, int); | 67 | typedef void (bio_end_io_t) (struct bio *, int); |
68 | typedef void (bio_destructor_t) (struct bio *); | 68 | typedef void (bio_destructor_t) (struct bio *); |
69 | 69 | ||
70 | /* | 70 | /* |
71 | * main unit of I/O for the block layer and lower layers (ie drivers and | 71 | * main unit of I/O for the block layer and lower layers (ie drivers and |
72 | * stacking drivers) | 72 | * stacking drivers) |
73 | */ | 73 | */ |
74 | struct bio { | 74 | struct bio { |
75 | sector_t bi_sector; /* device address in 512 byte | 75 | sector_t bi_sector; /* device address in 512 byte |
76 | sectors */ | 76 | sectors */ |
77 | struct bio *bi_next; /* request queue link */ | 77 | struct bio *bi_next; /* request queue link */ |
78 | struct block_device *bi_bdev; | 78 | struct block_device *bi_bdev; |
79 | unsigned long bi_flags; /* status, command, etc */ | 79 | unsigned long bi_flags; /* status, command, etc */ |
80 | unsigned long bi_rw; /* bottom bits READ/WRITE, | 80 | unsigned long bi_rw; /* bottom bits READ/WRITE, |
81 | * top bits priority | 81 | * top bits priority |
82 | */ | 82 | */ |
83 | 83 | ||
84 | unsigned short bi_vcnt; /* how many bio_vec's */ | 84 | unsigned short bi_vcnt; /* how many bio_vec's */ |
85 | unsigned short bi_idx; /* current index into bvl_vec */ | 85 | unsigned short bi_idx; /* current index into bvl_vec */ |
86 | 86 | ||
87 | /* Number of segments in this BIO after | 87 | /* Number of segments in this BIO after |
88 | * physical address coalescing is performed. | 88 | * physical address coalescing is performed. |
89 | */ | 89 | */ |
90 | unsigned short bi_phys_segments; | 90 | unsigned short bi_phys_segments; |
91 | 91 | ||
92 | /* Number of segments after physical and DMA remapping | 92 | /* Number of segments after physical and DMA remapping |
93 | * hardware coalescing is performed. | 93 | * hardware coalescing is performed. |
94 | */ | 94 | */ |
95 | unsigned short bi_hw_segments; | 95 | unsigned short bi_hw_segments; |
96 | 96 | ||
97 | unsigned int bi_size; /* residual I/O count */ | 97 | unsigned int bi_size; /* residual I/O count */ |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * To keep track of the max hw size, we account for the | 100 | * To keep track of the max hw size, we account for the |
101 | * sizes of the first and last virtually mergeable segments | 101 | * sizes of the first and last virtually mergeable segments |
102 | * in this bio | 102 | * in this bio |
103 | */ | 103 | */ |
104 | unsigned int bi_hw_front_size; | 104 | unsigned int bi_hw_front_size; |
105 | unsigned int bi_hw_back_size; | 105 | unsigned int bi_hw_back_size; |
106 | 106 | ||
107 | unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ | 107 | unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ |
108 | 108 | ||
109 | struct bio_vec *bi_io_vec; /* the actual vec list */ | 109 | struct bio_vec *bi_io_vec; /* the actual vec list */ |
110 | 110 | ||
111 | bio_end_io_t *bi_end_io; | 111 | bio_end_io_t *bi_end_io; |
112 | atomic_t bi_cnt; /* pin count */ | 112 | atomic_t bi_cnt; /* pin count */ |
113 | 113 | ||
114 | void *bi_private; | 114 | void *bi_private; |
115 | 115 | ||
116 | bio_destructor_t *bi_destructor; /* destructor */ | 116 | bio_destructor_t *bi_destructor; /* destructor */ |
117 | }; | 117 | }; |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * bio flags | 120 | * bio flags |
121 | */ | 121 | */ |
122 | #define BIO_UPTODATE 0 /* ok after I/O completion */ | 122 | #define BIO_UPTODATE 0 /* ok after I/O completion */ |
123 | #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ | 123 | #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ |
124 | #define BIO_EOF 2 /* out-out-bounds error */ | 124 | #define BIO_EOF 2 /* out-out-bounds error */ |
125 | #define BIO_SEG_VALID 3 /* nr_hw_seg valid */ | 125 | #define BIO_SEG_VALID 3 /* nr_hw_seg valid */ |
126 | #define BIO_CLONED 4 /* doesn't own data */ | 126 | #define BIO_CLONED 4 /* doesn't own data */ |
127 | #define BIO_BOUNCED 5 /* bio is a bounce bio */ | 127 | #define BIO_BOUNCED 5 /* bio is a bounce bio */ |
128 | #define BIO_USER_MAPPED 6 /* contains user pages */ | 128 | #define BIO_USER_MAPPED 6 /* contains user pages */ |
129 | #define BIO_EOPNOTSUPP 7 /* not supported */ | 129 | #define BIO_EOPNOTSUPP 7 /* not supported */ |
130 | #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) | 130 | #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) |
131 | 131 | ||
132 | /* | 132 | /* |
133 | * top 4 bits of bio flags indicate the pool this bio came from | 133 | * top 4 bits of bio flags indicate the pool this bio came from |
134 | */ | 134 | */ |
135 | #define BIO_POOL_BITS (4) | 135 | #define BIO_POOL_BITS (4) |
136 | #define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) | 136 | #define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) |
137 | #define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) | 137 | #define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) |
138 | #define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) | 138 | #define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * bio bi_rw flags | 141 | * bio bi_rw flags |
142 | * | 142 | * |
143 | * bit 0 -- read (not set) or write (set) | 143 | * bit 0 -- read (not set) or write (set) |
144 | * bit 1 -- rw-ahead when set | 144 | * bit 1 -- rw-ahead when set |
145 | * bit 2 -- barrier | 145 | * bit 2 -- barrier |
146 | * bit 3 -- fail fast, don't want low level driver retries | 146 | * bit 3 -- fail fast, don't want low level driver retries |
147 | * bit 4 -- synchronous I/O hint: the block layer will unplug immediately | 147 | * bit 4 -- synchronous I/O hint: the block layer will unplug immediately |
148 | */ | 148 | */ |
149 | #define BIO_RW 0 | 149 | #define BIO_RW 0 |
150 | #define BIO_RW_AHEAD 1 | 150 | #define BIO_RW_AHEAD 1 |
151 | #define BIO_RW_BARRIER 2 | 151 | #define BIO_RW_BARRIER 2 |
152 | #define BIO_RW_FAILFAST 3 | 152 | #define BIO_RW_FAILFAST 3 |
153 | #define BIO_RW_SYNC 4 | 153 | #define BIO_RW_SYNC 4 |
154 | #define BIO_RW_META 5 | 154 | #define BIO_RW_META 5 |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * upper 16 bits of bi_rw define the io priority of this bio | 157 | * upper 16 bits of bi_rw define the io priority of this bio |
158 | */ | 158 | */ |
159 | #define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - IOPRIO_BITS) | 159 | #define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - IOPRIO_BITS) |
160 | #define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT) | 160 | #define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT) |
161 | #define bio_prio_valid(bio) ioprio_valid(bio_prio(bio)) | 161 | #define bio_prio_valid(bio) ioprio_valid(bio_prio(bio)) |
162 | 162 | ||
163 | #define bio_set_prio(bio, prio) do { \ | 163 | #define bio_set_prio(bio, prio) do { \ |
164 | WARN_ON(prio >= (1 << IOPRIO_BITS)); \ | 164 | WARN_ON(prio >= (1 << IOPRIO_BITS)); \ |
165 | (bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1); \ | 165 | (bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1); \ |
166 | (bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT); \ | 166 | (bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT); \ |
167 | } while (0) | 167 | } while (0) |
168 | 168 | ||
169 | /* | 169 | /* |
170 | * various member access, note that bio_data should of course not be used | 170 | * various member access, note that bio_data should of course not be used |
171 | * on highmem page vectors | 171 | * on highmem page vectors |
172 | */ | 172 | */ |
173 | #define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) | 173 | #define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) |
174 | #define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx) | 174 | #define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx) |
175 | #define bio_page(bio) bio_iovec((bio))->bv_page | 175 | #define bio_page(bio) bio_iovec((bio))->bv_page |
176 | #define bio_offset(bio) bio_iovec((bio))->bv_offset | 176 | #define bio_offset(bio) bio_iovec((bio))->bv_offset |
177 | #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) | 177 | #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) |
178 | #define bio_sectors(bio) ((bio)->bi_size >> 9) | 178 | #define bio_sectors(bio) ((bio)->bi_size >> 9) |
179 | #define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9) | ||
180 | #define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) | ||
181 | #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) | 179 | #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) |
182 | #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) | 180 | #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) |
183 | #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) | 181 | #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) |
184 | #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) | 182 | #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) |
185 | #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) | 183 | #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) |
184 | #define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size) | ||
185 | |||
186 | static inline unsigned int bio_cur_sectors(struct bio *bio) | ||
187 | { | ||
188 | if (bio->bi_vcnt) | ||
189 | return bio_iovec(bio)->bv_len >> 9; | ||
190 | |||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | static inline void *bio_data(struct bio *bio) | ||
195 | { | ||
196 | if (bio->bi_vcnt) | ||
197 | return page_address(bio_page(bio)) + bio_offset(bio); | ||
198 | |||
199 | return NULL; | ||
200 | } | ||
186 | 201 | ||
187 | /* | 202 | /* |
188 | * will die | 203 | * will die |
189 | */ | 204 | */ |
190 | #define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio))) | 205 | #define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio))) |
191 | #define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) | 206 | #define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) |
192 | 207 | ||
193 | /* | 208 | /* |
194 | * queues that have highmem support enabled may still need to revert to | 209 | * queues that have highmem support enabled may still need to revert to |
195 | * PIO transfers occasionally and thus map high pages temporarily. For | 210 | * PIO transfers occasionally and thus map high pages temporarily. For |
196 | * permanent PIO fall back, user is probably better off disabling highmem | 211 | * permanent PIO fall back, user is probably better off disabling highmem |
197 | * I/O completely on that queue (see ide-dma for example) | 212 | * I/O completely on that queue (see ide-dma for example) |
198 | */ | 213 | */ |
199 | #define __bio_kmap_atomic(bio, idx, kmtype) \ | 214 | #define __bio_kmap_atomic(bio, idx, kmtype) \ |
200 | (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page, kmtype) + \ | 215 | (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page, kmtype) + \ |
201 | bio_iovec_idx((bio), (idx))->bv_offset) | 216 | bio_iovec_idx((bio), (idx))->bv_offset) |
202 | 217 | ||
203 | #define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr, kmtype) | 218 | #define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr, kmtype) |
204 | 219 | ||
205 | /* | 220 | /* |
206 | * merge helpers etc | 221 | * merge helpers etc |
207 | */ | 222 | */ |
208 | 223 | ||
209 | #define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) | 224 | #define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) |
210 | #define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) | 225 | #define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) |
211 | 226 | ||
212 | /* | 227 | /* |
213 | * allow arch override, for eg virtualized architectures (put in asm/io.h) | 228 | * allow arch override, for eg virtualized architectures (put in asm/io.h) |
214 | */ | 229 | */ |
215 | #ifndef BIOVEC_PHYS_MERGEABLE | 230 | #ifndef BIOVEC_PHYS_MERGEABLE |
216 | #define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ | 231 | #define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ |
217 | ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) | 232 | ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) |
218 | #endif | 233 | #endif |
219 | 234 | ||
220 | #define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \ | 235 | #define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \ |
221 | ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0) | 236 | ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0) |
222 | #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ | 237 | #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ |
223 | (((addr1) | (mask)) == (((addr2) - 1) | (mask))) | 238 | (((addr1) | (mask)) == (((addr2) - 1) | (mask))) |
224 | #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ | 239 | #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ |
225 | __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask) | 240 | __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask) |
226 | #define BIO_SEG_BOUNDARY(q, b1, b2) \ | 241 | #define BIO_SEG_BOUNDARY(q, b1, b2) \ |
227 | BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2))) | 242 | BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2))) |
228 | 243 | ||
229 | #define bio_io_error(bio) bio_endio((bio), -EIO) | 244 | #define bio_io_error(bio) bio_endio((bio), -EIO) |
230 | 245 | ||
231 | /* | 246 | /* |
232 | * drivers should not use the __ version unless they _really_ want to | 247 | * drivers should not use the __ version unless they _really_ want to |
233 | * run through the entire bio and not just pending pieces | 248 | * run through the entire bio and not just pending pieces |
234 | */ | 249 | */ |
235 | #define __bio_for_each_segment(bvl, bio, i, start_idx) \ | 250 | #define __bio_for_each_segment(bvl, bio, i, start_idx) \ |
236 | for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ | 251 | for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ |
237 | i < (bio)->bi_vcnt; \ | 252 | i < (bio)->bi_vcnt; \ |
238 | bvl++, i++) | 253 | bvl++, i++) |
239 | 254 | ||
240 | #define bio_for_each_segment(bvl, bio, i) \ | 255 | #define bio_for_each_segment(bvl, bio, i) \ |
241 | __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx) | 256 | __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx) |
242 | 257 | ||
243 | /* | 258 | /* |
244 | * get a reference to a bio, so it won't disappear. the intended use is | 259 | * get a reference to a bio, so it won't disappear. the intended use is |
245 | * something like: | 260 | * something like: |
246 | * | 261 | * |
247 | * bio_get(bio); | 262 | * bio_get(bio); |
248 | * submit_bio(rw, bio); | 263 | * submit_bio(rw, bio); |
249 | * if (bio->bi_flags ...) | 264 | * if (bio->bi_flags ...) |
250 | * do_something | 265 | * do_something |
251 | * bio_put(bio); | 266 | * bio_put(bio); |
252 | * | 267 | * |
253 | * without the bio_get(), it could potentially complete I/O before submit_bio | 268 | * without the bio_get(), it could potentially complete I/O before submit_bio |
254 | * returns. and then bio would be freed memory when if (bio->bi_flags ...) | 269 | * returns. and then bio would be freed memory when if (bio->bi_flags ...) |
255 | * runs | 270 | * runs |
256 | */ | 271 | */ |
257 | #define bio_get(bio) atomic_inc(&(bio)->bi_cnt) | 272 | #define bio_get(bio) atomic_inc(&(bio)->bi_cnt) |
258 | 273 | ||
259 | 274 | ||
260 | /* | 275 | /* |
261 | * A bio_pair is used when we need to split a bio. | 276 | * A bio_pair is used when we need to split a bio. |
262 | * This can only happen for a bio that refers to just one | 277 | * This can only happen for a bio that refers to just one |
263 | * page of data, and in the unusual situation when the | 278 | * page of data, and in the unusual situation when the |
264 | * page crosses a chunk/device boundary | 279 | * page crosses a chunk/device boundary |
265 | * | 280 | * |
266 | * The address of the master bio is stored in bio1.bi_private | 281 | * The address of the master bio is stored in bio1.bi_private |
267 | * The address of the pool the pair was allocated from is stored | 282 | * The address of the pool the pair was allocated from is stored |
268 | * in bio2.bi_private | 283 | * in bio2.bi_private |
269 | */ | 284 | */ |
270 | struct bio_pair { | 285 | struct bio_pair { |
271 | struct bio bio1, bio2; | 286 | struct bio bio1, bio2; |
272 | struct bio_vec bv1, bv2; | 287 | struct bio_vec bv1, bv2; |
273 | atomic_t cnt; | 288 | atomic_t cnt; |
274 | int error; | 289 | int error; |
275 | }; | 290 | }; |
276 | extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, | 291 | extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, |
277 | int first_sectors); | 292 | int first_sectors); |
278 | extern mempool_t *bio_split_pool; | 293 | extern mempool_t *bio_split_pool; |
279 | extern void bio_pair_release(struct bio_pair *dbio); | 294 | extern void bio_pair_release(struct bio_pair *dbio); |
280 | 295 | ||
281 | extern struct bio_set *bioset_create(int, int); | 296 | extern struct bio_set *bioset_create(int, int); |
282 | extern void bioset_free(struct bio_set *); | 297 | extern void bioset_free(struct bio_set *); |
283 | 298 | ||
284 | extern struct bio *bio_alloc(gfp_t, int); | 299 | extern struct bio *bio_alloc(gfp_t, int); |
285 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); | 300 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); |
286 | extern void bio_put(struct bio *); | 301 | extern void bio_put(struct bio *); |
287 | extern void bio_free(struct bio *, struct bio_set *); | 302 | extern void bio_free(struct bio *, struct bio_set *); |
288 | 303 | ||
289 | extern void bio_endio(struct bio *, int); | 304 | extern void bio_endio(struct bio *, int); |
290 | struct request_queue; | 305 | struct request_queue; |
291 | extern int bio_phys_segments(struct request_queue *, struct bio *); | 306 | extern int bio_phys_segments(struct request_queue *, struct bio *); |
292 | extern int bio_hw_segments(struct request_queue *, struct bio *); | 307 | extern int bio_hw_segments(struct request_queue *, struct bio *); |
293 | 308 | ||
294 | extern void __bio_clone(struct bio *, struct bio *); | 309 | extern void __bio_clone(struct bio *, struct bio *); |
295 | extern struct bio *bio_clone(struct bio *, gfp_t); | 310 | extern struct bio *bio_clone(struct bio *, gfp_t); |
296 | 311 | ||
297 | extern void bio_init(struct bio *); | 312 | extern void bio_init(struct bio *); |
298 | 313 | ||
299 | extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); | 314 | extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); |
300 | extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, | 315 | extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, |
301 | unsigned int, unsigned int); | 316 | unsigned int, unsigned int); |
302 | extern int bio_get_nr_vecs(struct block_device *); | 317 | extern int bio_get_nr_vecs(struct block_device *); |
303 | extern struct bio *bio_map_user(struct request_queue *, struct block_device *, | 318 | extern struct bio *bio_map_user(struct request_queue *, struct block_device *, |
304 | unsigned long, unsigned int, int); | 319 | unsigned long, unsigned int, int); |
305 | struct sg_iovec; | 320 | struct sg_iovec; |
306 | extern struct bio *bio_map_user_iov(struct request_queue *, | 321 | extern struct bio *bio_map_user_iov(struct request_queue *, |
307 | struct block_device *, | 322 | struct block_device *, |
308 | struct sg_iovec *, int, int); | 323 | struct sg_iovec *, int, int); |
309 | extern void bio_unmap_user(struct bio *); | 324 | extern void bio_unmap_user(struct bio *); |
310 | extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, | 325 | extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, |
311 | gfp_t); | 326 | gfp_t); |
312 | extern void bio_set_pages_dirty(struct bio *bio); | 327 | extern void bio_set_pages_dirty(struct bio *bio); |
313 | extern void bio_check_pages_dirty(struct bio *bio); | 328 | extern void bio_check_pages_dirty(struct bio *bio); |
314 | extern void bio_release_pages(struct bio *bio); | 329 | extern void bio_release_pages(struct bio *bio); |
315 | extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); | 330 | extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); |
316 | extern int bio_uncopy_user(struct bio *); | 331 | extern int bio_uncopy_user(struct bio *); |
317 | void zero_fill_bio(struct bio *bio); | 332 | void zero_fill_bio(struct bio *bio); |
318 | 333 | ||
319 | #ifdef CONFIG_HIGHMEM | 334 | #ifdef CONFIG_HIGHMEM |
320 | /* | 335 | /* |
321 | * remember to add offset! and never ever reenable interrupts between a | 336 | * remember to add offset! and never ever reenable interrupts between a |
322 | * bvec_kmap_irq and bvec_kunmap_irq!! | 337 | * bvec_kmap_irq and bvec_kunmap_irq!! |
323 | * | 338 | * |
324 | * This function MUST be inlined - it plays with the CPU interrupt flags. | 339 | * This function MUST be inlined - it plays with the CPU interrupt flags. |
325 | */ | 340 | */ |
326 | static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) | 341 | static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) |
327 | { | 342 | { |
328 | unsigned long addr; | 343 | unsigned long addr; |
329 | 344 | ||
330 | /* | 345 | /* |
331 | * might not be a highmem page, but the preempt/irq count | 346 | * might not be a highmem page, but the preempt/irq count |
332 | * balancing is a lot nicer this way | 347 | * balancing is a lot nicer this way |
333 | */ | 348 | */ |
334 | local_irq_save(*flags); | 349 | local_irq_save(*flags); |
335 | addr = (unsigned long) kmap_atomic(bvec->bv_page, KM_BIO_SRC_IRQ); | 350 | addr = (unsigned long) kmap_atomic(bvec->bv_page, KM_BIO_SRC_IRQ); |
336 | 351 | ||
337 | BUG_ON(addr & ~PAGE_MASK); | 352 | BUG_ON(addr & ~PAGE_MASK); |
338 | 353 | ||
339 | return (char *) addr + bvec->bv_offset; | 354 | return (char *) addr + bvec->bv_offset; |
340 | } | 355 | } |
341 | 356 | ||
342 | static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) | 357 | static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) |
343 | { | 358 | { |
344 | unsigned long ptr = (unsigned long) buffer & PAGE_MASK; | 359 | unsigned long ptr = (unsigned long) buffer & PAGE_MASK; |
345 | 360 | ||
346 | kunmap_atomic((void *) ptr, KM_BIO_SRC_IRQ); | 361 | kunmap_atomic((void *) ptr, KM_BIO_SRC_IRQ); |
347 | local_irq_restore(*flags); | 362 | local_irq_restore(*flags); |
348 | } | 363 | } |
349 | 364 | ||
350 | #else | 365 | #else |
351 | #define bvec_kmap_irq(bvec, flags) (page_address((bvec)->bv_page) + (bvec)->bv_offset) | 366 | #define bvec_kmap_irq(bvec, flags) (page_address((bvec)->bv_page) + (bvec)->bv_offset) |
352 | #define bvec_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0) | 367 | #define bvec_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0) |
353 | #endif | 368 | #endif |
354 | 369 | ||
355 | static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, | 370 | static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, |
356 | unsigned long *flags) | 371 | unsigned long *flags) |
357 | { | 372 | { |
358 | return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags); | 373 | return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags); |
359 | } | 374 | } |
360 | #define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) | 375 | #define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) |
361 | 376 | ||
362 | #define bio_kmap_irq(bio, flags) \ | 377 | #define bio_kmap_irq(bio, flags) \ |
363 | __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) | 378 | __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) |
364 | #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) | 379 | #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) |
365 | 380 | ||
366 | #endif /* CONFIG_BLOCK */ | 381 | #endif /* CONFIG_BLOCK */ |
include/linux/blkdev.h
1 | #ifndef _LINUX_BLKDEV_H | 1 | #ifndef _LINUX_BLKDEV_H |
2 | #define _LINUX_BLKDEV_H | 2 | #define _LINUX_BLKDEV_H |
3 | 3 | ||
4 | #ifdef CONFIG_BLOCK | 4 | #ifdef CONFIG_BLOCK |
5 | 5 | ||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/major.h> | 7 | #include <linux/major.h> |
8 | #include <linux/genhd.h> | 8 | #include <linux/genhd.h> |
9 | #include <linux/list.h> | 9 | #include <linux/list.h> |
10 | #include <linux/timer.h> | 10 | #include <linux/timer.h> |
11 | #include <linux/workqueue.h> | 11 | #include <linux/workqueue.h> |
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/backing-dev.h> | 13 | #include <linux/backing-dev.h> |
14 | #include <linux/wait.h> | 14 | #include <linux/wait.h> |
15 | #include <linux/mempool.h> | 15 | #include <linux/mempool.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/stringify.h> | 18 | #include <linux/stringify.h> |
19 | #include <linux/bsg.h> | 19 | #include <linux/bsg.h> |
20 | 20 | ||
21 | #include <asm/scatterlist.h> | 21 | #include <asm/scatterlist.h> |
22 | 22 | ||
23 | struct scsi_ioctl_command; | 23 | struct scsi_ioctl_command; |
24 | 24 | ||
25 | struct request_queue; | 25 | struct request_queue; |
26 | typedef struct request_queue request_queue_t __deprecated; | 26 | typedef struct request_queue request_queue_t __deprecated; |
27 | struct elevator_queue; | 27 | struct elevator_queue; |
28 | typedef struct elevator_queue elevator_t; | 28 | typedef struct elevator_queue elevator_t; |
29 | struct request_pm_state; | 29 | struct request_pm_state; |
30 | struct blk_trace; | 30 | struct blk_trace; |
31 | struct request; | 31 | struct request; |
32 | struct sg_io_hdr; | 32 | struct sg_io_hdr; |
33 | 33 | ||
34 | #define BLKDEV_MIN_RQ 4 | 34 | #define BLKDEV_MIN_RQ 4 |
35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * This is the per-process anticipatory I/O scheduler state. | 38 | * This is the per-process anticipatory I/O scheduler state. |
39 | */ | 39 | */ |
40 | struct as_io_context { | 40 | struct as_io_context { |
41 | spinlock_t lock; | 41 | spinlock_t lock; |
42 | 42 | ||
43 | void (*dtor)(struct as_io_context *aic); /* destructor */ | 43 | void (*dtor)(struct as_io_context *aic); /* destructor */ |
44 | void (*exit)(struct as_io_context *aic); /* called on task exit */ | 44 | void (*exit)(struct as_io_context *aic); /* called on task exit */ |
45 | 45 | ||
46 | unsigned long state; | 46 | unsigned long state; |
47 | atomic_t nr_queued; /* queued reads & sync writes */ | 47 | atomic_t nr_queued; /* queued reads & sync writes */ |
48 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ | 48 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ |
49 | 49 | ||
50 | /* IO History tracking */ | 50 | /* IO History tracking */ |
51 | /* Thinktime */ | 51 | /* Thinktime */ |
52 | unsigned long last_end_request; | 52 | unsigned long last_end_request; |
53 | unsigned long ttime_total; | 53 | unsigned long ttime_total; |
54 | unsigned long ttime_samples; | 54 | unsigned long ttime_samples; |
55 | unsigned long ttime_mean; | 55 | unsigned long ttime_mean; |
56 | /* Layout pattern */ | 56 | /* Layout pattern */ |
57 | unsigned int seek_samples; | 57 | unsigned int seek_samples; |
58 | sector_t last_request_pos; | 58 | sector_t last_request_pos; |
59 | u64 seek_total; | 59 | u64 seek_total; |
60 | sector_t seek_mean; | 60 | sector_t seek_mean; |
61 | }; | 61 | }; |
62 | 62 | ||
63 | struct cfq_queue; | 63 | struct cfq_queue; |
64 | struct cfq_io_context { | 64 | struct cfq_io_context { |
65 | struct rb_node rb_node; | 65 | struct rb_node rb_node; |
66 | void *key; | 66 | void *key; |
67 | 67 | ||
68 | struct cfq_queue *cfqq[2]; | 68 | struct cfq_queue *cfqq[2]; |
69 | 69 | ||
70 | struct io_context *ioc; | 70 | struct io_context *ioc; |
71 | 71 | ||
72 | unsigned long last_end_request; | 72 | unsigned long last_end_request; |
73 | sector_t last_request_pos; | 73 | sector_t last_request_pos; |
74 | 74 | ||
75 | unsigned long ttime_total; | 75 | unsigned long ttime_total; |
76 | unsigned long ttime_samples; | 76 | unsigned long ttime_samples; |
77 | unsigned long ttime_mean; | 77 | unsigned long ttime_mean; |
78 | 78 | ||
79 | unsigned int seek_samples; | 79 | unsigned int seek_samples; |
80 | u64 seek_total; | 80 | u64 seek_total; |
81 | sector_t seek_mean; | 81 | sector_t seek_mean; |
82 | 82 | ||
83 | struct list_head queue_list; | 83 | struct list_head queue_list; |
84 | 84 | ||
85 | void (*dtor)(struct io_context *); /* destructor */ | 85 | void (*dtor)(struct io_context *); /* destructor */ |
86 | void (*exit)(struct io_context *); /* called on task exit */ | 86 | void (*exit)(struct io_context *); /* called on task exit */ |
87 | }; | 87 | }; |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * This is the per-process I/O subsystem state. It is refcounted and | 90 | * This is the per-process I/O subsystem state. It is refcounted and |
91 | * kmalloc'ed. Currently all fields are modified in process io context | 91 | * kmalloc'ed. Currently all fields are modified in process io context |
92 | * (apart from the atomic refcount), so require no locking. | 92 | * (apart from the atomic refcount), so require no locking. |
93 | */ | 93 | */ |
94 | struct io_context { | 94 | struct io_context { |
95 | atomic_t refcount; | 95 | atomic_t refcount; |
96 | struct task_struct *task; | 96 | struct task_struct *task; |
97 | 97 | ||
98 | unsigned int ioprio_changed; | 98 | unsigned int ioprio_changed; |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * For request batching | 101 | * For request batching |
102 | */ | 102 | */ |
103 | unsigned long last_waited; /* Time last woken after wait for request */ | 103 | unsigned long last_waited; /* Time last woken after wait for request */ |
104 | int nr_batch_requests; /* Number of requests left in the batch */ | 104 | int nr_batch_requests; /* Number of requests left in the batch */ |
105 | 105 | ||
106 | struct as_io_context *aic; | 106 | struct as_io_context *aic; |
107 | struct rb_root cic_root; | 107 | struct rb_root cic_root; |
108 | void *ioc_data; | 108 | void *ioc_data; |
109 | }; | 109 | }; |
110 | 110 | ||
111 | void put_io_context(struct io_context *ioc); | 111 | void put_io_context(struct io_context *ioc); |
112 | void exit_io_context(void); | 112 | void exit_io_context(void); |
113 | struct io_context *get_io_context(gfp_t gfp_flags, int node); | 113 | struct io_context *get_io_context(gfp_t gfp_flags, int node); |
114 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); | 114 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); |
115 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); | 115 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); |
116 | 116 | ||
117 | struct request; | 117 | struct request; |
118 | typedef void (rq_end_io_fn)(struct request *, int); | 118 | typedef void (rq_end_io_fn)(struct request *, int); |
119 | 119 | ||
120 | struct request_list { | 120 | struct request_list { |
121 | int count[2]; | 121 | int count[2]; |
122 | int starved[2]; | 122 | int starved[2]; |
123 | int elvpriv; | 123 | int elvpriv; |
124 | mempool_t *rq_pool; | 124 | mempool_t *rq_pool; |
125 | wait_queue_head_t wait[2]; | 125 | wait_queue_head_t wait[2]; |
126 | }; | 126 | }; |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * request command types | 129 | * request command types |
130 | */ | 130 | */ |
131 | enum rq_cmd_type_bits { | 131 | enum rq_cmd_type_bits { |
132 | REQ_TYPE_FS = 1, /* fs request */ | 132 | REQ_TYPE_FS = 1, /* fs request */ |
133 | REQ_TYPE_BLOCK_PC, /* scsi command */ | 133 | REQ_TYPE_BLOCK_PC, /* scsi command */ |
134 | REQ_TYPE_SENSE, /* sense request */ | 134 | REQ_TYPE_SENSE, /* sense request */ |
135 | REQ_TYPE_PM_SUSPEND, /* suspend request */ | 135 | REQ_TYPE_PM_SUSPEND, /* suspend request */ |
136 | REQ_TYPE_PM_RESUME, /* resume request */ | 136 | REQ_TYPE_PM_RESUME, /* resume request */ |
137 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ | 137 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ |
138 | REQ_TYPE_FLUSH, /* flush request */ | 138 | REQ_TYPE_FLUSH, /* flush request */ |
139 | REQ_TYPE_SPECIAL, /* driver defined type */ | 139 | REQ_TYPE_SPECIAL, /* driver defined type */ |
140 | REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ | 140 | REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ |
141 | /* | 141 | /* |
142 | * for ATA/ATAPI devices. this really doesn't belong here, ide should | 142 | * for ATA/ATAPI devices. this really doesn't belong here, ide should |
143 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver | 143 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver |
144 | * private REQ_LB opcodes to differentiate what type of request this is | 144 | * private REQ_LB opcodes to differentiate what type of request this is |
145 | */ | 145 | */ |
146 | REQ_TYPE_ATA_CMD, | 146 | REQ_TYPE_ATA_CMD, |
147 | REQ_TYPE_ATA_TASK, | 147 | REQ_TYPE_ATA_TASK, |
148 | REQ_TYPE_ATA_TASKFILE, | 148 | REQ_TYPE_ATA_TASKFILE, |
149 | REQ_TYPE_ATA_PC, | 149 | REQ_TYPE_ATA_PC, |
150 | }; | 150 | }; |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being | 153 | * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being |
154 | * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a | 154 | * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a |
155 | * SCSI cdb. | 155 | * SCSI cdb. |
156 | * | 156 | * |
157 | * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, | 157 | * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, |
158 | * typically to differentiate REQ_TYPE_SPECIAL requests. | 158 | * typically to differentiate REQ_TYPE_SPECIAL requests. |
159 | * | 159 | * |
160 | */ | 160 | */ |
161 | enum { | 161 | enum { |
162 | /* | 162 | /* |
163 | * just examples for now | 163 | * just examples for now |
164 | */ | 164 | */ |
165 | REQ_LB_OP_EJECT = 0x40, /* eject request */ | 165 | REQ_LB_OP_EJECT = 0x40, /* eject request */ |
166 | REQ_LB_OP_FLUSH = 0x41, /* flush device */ | 166 | REQ_LB_OP_FLUSH = 0x41, /* flush device */ |
167 | }; | 167 | }; |
168 | 168 | ||
169 | /* | 169 | /* |
170 | * request type modified bits. first three bits match BIO_RW* bits, important | 170 | * request type modified bits. first three bits match BIO_RW* bits, important |
171 | */ | 171 | */ |
172 | enum rq_flag_bits { | 172 | enum rq_flag_bits { |
173 | __REQ_RW, /* not set, read. set, write */ | 173 | __REQ_RW, /* not set, read. set, write */ |
174 | __REQ_FAILFAST, /* no low level driver retries */ | 174 | __REQ_FAILFAST, /* no low level driver retries */ |
175 | __REQ_SORTED, /* elevator knows about this request */ | 175 | __REQ_SORTED, /* elevator knows about this request */ |
176 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ | 176 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ |
177 | __REQ_HARDBARRIER, /* may not be passed by drive either */ | 177 | __REQ_HARDBARRIER, /* may not be passed by drive either */ |
178 | __REQ_FUA, /* forced unit access */ | 178 | __REQ_FUA, /* forced unit access */ |
179 | __REQ_NOMERGE, /* don't touch this for merging */ | 179 | __REQ_NOMERGE, /* don't touch this for merging */ |
180 | __REQ_STARTED, /* drive already may have started this one */ | 180 | __REQ_STARTED, /* drive already may have started this one */ |
181 | __REQ_DONTPREP, /* don't call prep for this one */ | 181 | __REQ_DONTPREP, /* don't call prep for this one */ |
182 | __REQ_QUEUED, /* uses queueing */ | 182 | __REQ_QUEUED, /* uses queueing */ |
183 | __REQ_ELVPRIV, /* elevator private data attached */ | 183 | __REQ_ELVPRIV, /* elevator private data attached */ |
184 | __REQ_FAILED, /* set if the request failed */ | 184 | __REQ_FAILED, /* set if the request failed */ |
185 | __REQ_QUIET, /* don't worry about errors */ | 185 | __REQ_QUIET, /* don't worry about errors */ |
186 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ | 186 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ |
187 | __REQ_ORDERED_COLOR, /* is before or after barrier */ | 187 | __REQ_ORDERED_COLOR, /* is before or after barrier */ |
188 | __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ | 188 | __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ |
189 | __REQ_ALLOCED, /* request came from our alloc pool */ | 189 | __REQ_ALLOCED, /* request came from our alloc pool */ |
190 | __REQ_RW_META, /* metadata io request */ | 190 | __REQ_RW_META, /* metadata io request */ |
191 | __REQ_NR_BITS, /* stops here */ | 191 | __REQ_NR_BITS, /* stops here */ |
192 | }; | 192 | }; |
193 | 193 | ||
194 | #define REQ_RW (1 << __REQ_RW) | 194 | #define REQ_RW (1 << __REQ_RW) |
195 | #define REQ_FAILFAST (1 << __REQ_FAILFAST) | 195 | #define REQ_FAILFAST (1 << __REQ_FAILFAST) |
196 | #define REQ_SORTED (1 << __REQ_SORTED) | 196 | #define REQ_SORTED (1 << __REQ_SORTED) |
197 | #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) | 197 | #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) |
198 | #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) | 198 | #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) |
199 | #define REQ_FUA (1 << __REQ_FUA) | 199 | #define REQ_FUA (1 << __REQ_FUA) |
200 | #define REQ_NOMERGE (1 << __REQ_NOMERGE) | 200 | #define REQ_NOMERGE (1 << __REQ_NOMERGE) |
201 | #define REQ_STARTED (1 << __REQ_STARTED) | 201 | #define REQ_STARTED (1 << __REQ_STARTED) |
202 | #define REQ_DONTPREP (1 << __REQ_DONTPREP) | 202 | #define REQ_DONTPREP (1 << __REQ_DONTPREP) |
203 | #define REQ_QUEUED (1 << __REQ_QUEUED) | 203 | #define REQ_QUEUED (1 << __REQ_QUEUED) |
204 | #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) | 204 | #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) |
205 | #define REQ_FAILED (1 << __REQ_FAILED) | 205 | #define REQ_FAILED (1 << __REQ_FAILED) |
206 | #define REQ_QUIET (1 << __REQ_QUIET) | 206 | #define REQ_QUIET (1 << __REQ_QUIET) |
207 | #define REQ_PREEMPT (1 << __REQ_PREEMPT) | 207 | #define REQ_PREEMPT (1 << __REQ_PREEMPT) |
208 | #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) | 208 | #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) |
209 | #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) | 209 | #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) |
210 | #define REQ_ALLOCED (1 << __REQ_ALLOCED) | 210 | #define REQ_ALLOCED (1 << __REQ_ALLOCED) |
211 | #define REQ_RW_META (1 << __REQ_RW_META) | 211 | #define REQ_RW_META (1 << __REQ_RW_META) |
212 | 212 | ||
213 | #define BLK_MAX_CDB 16 | 213 | #define BLK_MAX_CDB 16 |
214 | 214 | ||
215 | /* | 215 | /* |
216 | * try to put the fields that are referenced together in the same cacheline | 216 | * try to put the fields that are referenced together in the same cacheline |
217 | */ | 217 | */ |
218 | struct request { | 218 | struct request { |
219 | struct list_head queuelist; | 219 | struct list_head queuelist; |
220 | struct list_head donelist; | 220 | struct list_head donelist; |
221 | 221 | ||
222 | struct request_queue *q; | 222 | struct request_queue *q; |
223 | 223 | ||
224 | unsigned int cmd_flags; | 224 | unsigned int cmd_flags; |
225 | enum rq_cmd_type_bits cmd_type; | 225 | enum rq_cmd_type_bits cmd_type; |
226 | 226 | ||
227 | /* Maintain bio traversal state for part by part I/O submission. | 227 | /* Maintain bio traversal state for part by part I/O submission. |
228 | * hard_* are block layer internals, no driver should touch them! | 228 | * hard_* are block layer internals, no driver should touch them! |
229 | */ | 229 | */ |
230 | 230 | ||
231 | sector_t sector; /* next sector to submit */ | 231 | sector_t sector; /* next sector to submit */ |
232 | sector_t hard_sector; /* next sector to complete */ | 232 | sector_t hard_sector; /* next sector to complete */ |
233 | unsigned long nr_sectors; /* no. of sectors left to submit */ | 233 | unsigned long nr_sectors; /* no. of sectors left to submit */ |
234 | unsigned long hard_nr_sectors; /* no. of sectors left to complete */ | 234 | unsigned long hard_nr_sectors; /* no. of sectors left to complete */ |
235 | /* no. of sectors left to submit in the current segment */ | 235 | /* no. of sectors left to submit in the current segment */ |
236 | unsigned int current_nr_sectors; | 236 | unsigned int current_nr_sectors; |
237 | 237 | ||
238 | /* no. of sectors left to complete in the current segment */ | 238 | /* no. of sectors left to complete in the current segment */ |
239 | unsigned int hard_cur_sectors; | 239 | unsigned int hard_cur_sectors; |
240 | 240 | ||
241 | struct bio *bio; | 241 | struct bio *bio; |
242 | struct bio *biotail; | 242 | struct bio *biotail; |
243 | 243 | ||
244 | struct hlist_node hash; /* merge hash */ | 244 | struct hlist_node hash; /* merge hash */ |
245 | /* | 245 | /* |
246 | * The rb_node is only used inside the io scheduler, requests | 246 | * The rb_node is only used inside the io scheduler, requests |
247 | * are pruned when moved to the dispatch queue. So let the | 247 | * are pruned when moved to the dispatch queue. So let the |
248 | * completion_data share space with the rb_node. | 248 | * completion_data share space with the rb_node. |
249 | */ | 249 | */ |
250 | union { | 250 | union { |
251 | struct rb_node rb_node; /* sort/lookup */ | 251 | struct rb_node rb_node; /* sort/lookup */ |
252 | void *completion_data; | 252 | void *completion_data; |
253 | }; | 253 | }; |
254 | 254 | ||
255 | /* | 255 | /* |
256 | * two pointers are available for the IO schedulers, if they need | 256 | * two pointers are available for the IO schedulers, if they need |
257 | * more they have to dynamically allocate it. | 257 | * more they have to dynamically allocate it. |
258 | */ | 258 | */ |
259 | void *elevator_private; | 259 | void *elevator_private; |
260 | void *elevator_private2; | 260 | void *elevator_private2; |
261 | 261 | ||
262 | struct gendisk *rq_disk; | 262 | struct gendisk *rq_disk; |
263 | unsigned long start_time; | 263 | unsigned long start_time; |
264 | 264 | ||
265 | /* Number of scatter-gather DMA addr+len pairs after | 265 | /* Number of scatter-gather DMA addr+len pairs after |
266 | * physical address coalescing is performed. | 266 | * physical address coalescing is performed. |
267 | */ | 267 | */ |
268 | unsigned short nr_phys_segments; | 268 | unsigned short nr_phys_segments; |
269 | 269 | ||
270 | /* Number of scatter-gather addr+len pairs after | 270 | /* Number of scatter-gather addr+len pairs after |
271 | * physical and DMA remapping hardware coalescing is performed. | 271 | * physical and DMA remapping hardware coalescing is performed. |
272 | * This is the number of scatter-gather entries the driver | 272 | * This is the number of scatter-gather entries the driver |
273 | * will actually have to deal with after DMA mapping is done. | 273 | * will actually have to deal with after DMA mapping is done. |
274 | */ | 274 | */ |
275 | unsigned short nr_hw_segments; | 275 | unsigned short nr_hw_segments; |
276 | 276 | ||
277 | unsigned short ioprio; | 277 | unsigned short ioprio; |
278 | 278 | ||
279 | void *special; | 279 | void *special; |
280 | char *buffer; | 280 | char *buffer; |
281 | 281 | ||
282 | int tag; | 282 | int tag; |
283 | int errors; | 283 | int errors; |
284 | 284 | ||
285 | int ref_count; | 285 | int ref_count; |
286 | 286 | ||
287 | /* | 287 | /* |
288 | * when request is used as a packet command carrier | 288 | * when request is used as a packet command carrier |
289 | */ | 289 | */ |
290 | unsigned int cmd_len; | 290 | unsigned int cmd_len; |
291 | unsigned char cmd[BLK_MAX_CDB]; | 291 | unsigned char cmd[BLK_MAX_CDB]; |
292 | 292 | ||
293 | unsigned int data_len; | 293 | unsigned int data_len; |
294 | unsigned int sense_len; | 294 | unsigned int sense_len; |
295 | void *data; | 295 | void *data; |
296 | void *sense; | 296 | void *sense; |
297 | 297 | ||
298 | unsigned int timeout; | 298 | unsigned int timeout; |
299 | int retries; | 299 | int retries; |
300 | 300 | ||
301 | /* | 301 | /* |
302 | * completion callback. | 302 | * completion callback. |
303 | */ | 303 | */ |
304 | rq_end_io_fn *end_io; | 304 | rq_end_io_fn *end_io; |
305 | void *end_io_data; | 305 | void *end_io_data; |
306 | 306 | ||
307 | /* for bidi */ | 307 | /* for bidi */ |
308 | struct request *next_rq; | 308 | struct request *next_rq; |
309 | }; | 309 | }; |
310 | 310 | ||
311 | /* | 311 | /* |
312 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME | 312 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME |
313 | * requests. Some step values could eventually be made generic. | 313 | * requests. Some step values could eventually be made generic. |
314 | */ | 314 | */ |
315 | struct request_pm_state | 315 | struct request_pm_state |
316 | { | 316 | { |
317 | /* PM state machine step value, currently driver specific */ | 317 | /* PM state machine step value, currently driver specific */ |
318 | int pm_step; | 318 | int pm_step; |
319 | /* requested PM state value (S1, S2, S3, S4, ...) */ | 319 | /* requested PM state value (S1, S2, S3, S4, ...) */ |
320 | u32 pm_state; | 320 | u32 pm_state; |
321 | void* data; /* for driver use */ | 321 | void* data; /* for driver use */ |
322 | }; | 322 | }; |
323 | 323 | ||
324 | #include <linux/elevator.h> | 324 | #include <linux/elevator.h> |
325 | 325 | ||
326 | typedef void (request_fn_proc) (struct request_queue *q); | 326 | typedef void (request_fn_proc) (struct request_queue *q); |
327 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); | 327 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); |
328 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); | 328 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); |
329 | typedef void (unplug_fn) (struct request_queue *); | 329 | typedef void (unplug_fn) (struct request_queue *); |
330 | 330 | ||
331 | struct bio_vec; | 331 | struct bio_vec; |
332 | typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); | 332 | typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); |
333 | typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, sector_t *); | 333 | typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, sector_t *); |
334 | typedef void (prepare_flush_fn) (struct request_queue *, struct request *); | 334 | typedef void (prepare_flush_fn) (struct request_queue *, struct request *); |
335 | typedef void (softirq_done_fn)(struct request *); | 335 | typedef void (softirq_done_fn)(struct request *); |
336 | 336 | ||
337 | enum blk_queue_state { | 337 | enum blk_queue_state { |
338 | Queue_down, | 338 | Queue_down, |
339 | Queue_up, | 339 | Queue_up, |
340 | }; | 340 | }; |
341 | 341 | ||
342 | struct blk_queue_tag { | 342 | struct blk_queue_tag { |
343 | struct request **tag_index; /* map of busy tags */ | 343 | struct request **tag_index; /* map of busy tags */ |
344 | unsigned long *tag_map; /* bit map of free/busy tags */ | 344 | unsigned long *tag_map; /* bit map of free/busy tags */ |
345 | struct list_head busy_list; /* fifo list of busy tags */ | 345 | struct list_head busy_list; /* fifo list of busy tags */ |
346 | int busy; /* current depth */ | 346 | int busy; /* current depth */ |
347 | int max_depth; /* what we will send to device */ | 347 | int max_depth; /* what we will send to device */ |
348 | int real_max_depth; /* what the array can hold */ | 348 | int real_max_depth; /* what the array can hold */ |
349 | atomic_t refcnt; /* map can be shared */ | 349 | atomic_t refcnt; /* map can be shared */ |
350 | }; | 350 | }; |
351 | 351 | ||
352 | struct request_queue | 352 | struct request_queue |
353 | { | 353 | { |
354 | /* | 354 | /* |
355 | * Together with queue_head for cacheline sharing | 355 | * Together with queue_head for cacheline sharing |
356 | */ | 356 | */ |
357 | struct list_head queue_head; | 357 | struct list_head queue_head; |
358 | struct request *last_merge; | 358 | struct request *last_merge; |
359 | elevator_t *elevator; | 359 | elevator_t *elevator; |
360 | 360 | ||
361 | /* | 361 | /* |
362 | * the queue request freelist, one for reads and one for writes | 362 | * the queue request freelist, one for reads and one for writes |
363 | */ | 363 | */ |
364 | struct request_list rq; | 364 | struct request_list rq; |
365 | 365 | ||
366 | request_fn_proc *request_fn; | 366 | request_fn_proc *request_fn; |
367 | make_request_fn *make_request_fn; | 367 | make_request_fn *make_request_fn; |
368 | prep_rq_fn *prep_rq_fn; | 368 | prep_rq_fn *prep_rq_fn; |
369 | unplug_fn *unplug_fn; | 369 | unplug_fn *unplug_fn; |
370 | merge_bvec_fn *merge_bvec_fn; | 370 | merge_bvec_fn *merge_bvec_fn; |
371 | issue_flush_fn *issue_flush_fn; | 371 | issue_flush_fn *issue_flush_fn; |
372 | prepare_flush_fn *prepare_flush_fn; | 372 | prepare_flush_fn *prepare_flush_fn; |
373 | softirq_done_fn *softirq_done_fn; | 373 | softirq_done_fn *softirq_done_fn; |
374 | 374 | ||
375 | /* | 375 | /* |
376 | * Dispatch queue sorting | 376 | * Dispatch queue sorting |
377 | */ | 377 | */ |
378 | sector_t end_sector; | 378 | sector_t end_sector; |
379 | struct request *boundary_rq; | 379 | struct request *boundary_rq; |
380 | 380 | ||
381 | /* | 381 | /* |
382 | * Auto-unplugging state | 382 | * Auto-unplugging state |
383 | */ | 383 | */ |
384 | struct timer_list unplug_timer; | 384 | struct timer_list unplug_timer; |
385 | int unplug_thresh; /* After this many requests */ | 385 | int unplug_thresh; /* After this many requests */ |
386 | unsigned long unplug_delay; /* After this many jiffies */ | 386 | unsigned long unplug_delay; /* After this many jiffies */ |
387 | struct work_struct unplug_work; | 387 | struct work_struct unplug_work; |
388 | 388 | ||
389 | struct backing_dev_info backing_dev_info; | 389 | struct backing_dev_info backing_dev_info; |
390 | 390 | ||
391 | /* | 391 | /* |
392 | * The queue owner gets to use this for whatever they like. | 392 | * The queue owner gets to use this for whatever they like. |
393 | * ll_rw_blk doesn't touch it. | 393 | * ll_rw_blk doesn't touch it. |
394 | */ | 394 | */ |
395 | void *queuedata; | 395 | void *queuedata; |
396 | 396 | ||
397 | /* | 397 | /* |
398 | * queue needs bounce pages for pages above this limit | 398 | * queue needs bounce pages for pages above this limit |
399 | */ | 399 | */ |
400 | unsigned long bounce_pfn; | 400 | unsigned long bounce_pfn; |
401 | gfp_t bounce_gfp; | 401 | gfp_t bounce_gfp; |
402 | 402 | ||
403 | /* | 403 | /* |
404 | * various queue flags, see QUEUE_* below | 404 | * various queue flags, see QUEUE_* below |
405 | */ | 405 | */ |
406 | unsigned long queue_flags; | 406 | unsigned long queue_flags; |
407 | 407 | ||
408 | /* | 408 | /* |
409 | * protects queue structures from reentrancy. ->__queue_lock should | 409 | * protects queue structures from reentrancy. ->__queue_lock should |
410 | * _never_ be used directly, it is queue private. always use | 410 | * _never_ be used directly, it is queue private. always use |
411 | * ->queue_lock. | 411 | * ->queue_lock. |
412 | */ | 412 | */ |
413 | spinlock_t __queue_lock; | 413 | spinlock_t __queue_lock; |
414 | spinlock_t *queue_lock; | 414 | spinlock_t *queue_lock; |
415 | 415 | ||
416 | /* | 416 | /* |
417 | * queue kobject | 417 | * queue kobject |
418 | */ | 418 | */ |
419 | struct kobject kobj; | 419 | struct kobject kobj; |
420 | 420 | ||
421 | /* | 421 | /* |
422 | * queue settings | 422 | * queue settings |
423 | */ | 423 | */ |
424 | unsigned long nr_requests; /* Max # of requests */ | 424 | unsigned long nr_requests; /* Max # of requests */ |
425 | unsigned int nr_congestion_on; | 425 | unsigned int nr_congestion_on; |
426 | unsigned int nr_congestion_off; | 426 | unsigned int nr_congestion_off; |
427 | unsigned int nr_batching; | 427 | unsigned int nr_batching; |
428 | 428 | ||
429 | unsigned int max_sectors; | 429 | unsigned int max_sectors; |
430 | unsigned int max_hw_sectors; | 430 | unsigned int max_hw_sectors; |
431 | unsigned short max_phys_segments; | 431 | unsigned short max_phys_segments; |
432 | unsigned short max_hw_segments; | 432 | unsigned short max_hw_segments; |
433 | unsigned short hardsect_size; | 433 | unsigned short hardsect_size; |
434 | unsigned int max_segment_size; | 434 | unsigned int max_segment_size; |
435 | 435 | ||
436 | unsigned long seg_boundary_mask; | 436 | unsigned long seg_boundary_mask; |
437 | unsigned int dma_alignment; | 437 | unsigned int dma_alignment; |
438 | 438 | ||
439 | struct blk_queue_tag *queue_tags; | 439 | struct blk_queue_tag *queue_tags; |
440 | 440 | ||
441 | unsigned int nr_sorted; | 441 | unsigned int nr_sorted; |
442 | unsigned int in_flight; | 442 | unsigned int in_flight; |
443 | 443 | ||
444 | /* | 444 | /* |
445 | * sg stuff | 445 | * sg stuff |
446 | */ | 446 | */ |
447 | unsigned int sg_timeout; | 447 | unsigned int sg_timeout; |
448 | unsigned int sg_reserved_size; | 448 | unsigned int sg_reserved_size; |
449 | int node; | 449 | int node; |
450 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 450 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
451 | struct blk_trace *blk_trace; | 451 | struct blk_trace *blk_trace; |
452 | #endif | 452 | #endif |
453 | /* | 453 | /* |
454 | * reserved for flush operations | 454 | * reserved for flush operations |
455 | */ | 455 | */ |
456 | unsigned int ordered, next_ordered, ordseq; | 456 | unsigned int ordered, next_ordered, ordseq; |
457 | int orderr, ordcolor; | 457 | int orderr, ordcolor; |
458 | struct request pre_flush_rq, bar_rq, post_flush_rq; | 458 | struct request pre_flush_rq, bar_rq, post_flush_rq; |
459 | struct request *orig_bar_rq; | 459 | struct request *orig_bar_rq; |
460 | 460 | ||
461 | struct mutex sysfs_lock; | 461 | struct mutex sysfs_lock; |
462 | 462 | ||
463 | #if defined(CONFIG_BLK_DEV_BSG) | 463 | #if defined(CONFIG_BLK_DEV_BSG) |
464 | struct bsg_class_device bsg_dev; | 464 | struct bsg_class_device bsg_dev; |
465 | #endif | 465 | #endif |
466 | }; | 466 | }; |
467 | 467 | ||
468 | #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ | 468 | #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ |
469 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 469 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |
470 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ | 470 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ |
471 | #define QUEUE_FLAG_READFULL 3 /* read queue has been filled */ | 471 | #define QUEUE_FLAG_READFULL 3 /* read queue has been filled */ |
472 | #define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */ | 472 | #define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */ |
473 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ | 473 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ |
474 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ | 474 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ |
475 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ | 475 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ |
476 | #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ | 476 | #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ |
477 | #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ | 477 | #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ |
478 | 478 | ||
479 | enum { | 479 | enum { |
480 | /* | 480 | /* |
481 | * Hardbarrier is supported with one of the following methods. | 481 | * Hardbarrier is supported with one of the following methods. |
482 | * | 482 | * |
483 | * NONE : hardbarrier unsupported | 483 | * NONE : hardbarrier unsupported |
484 | * DRAIN : ordering by draining is enough | 484 | * DRAIN : ordering by draining is enough |
485 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes | 485 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes |
486 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write | 486 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write |
487 | * TAG : ordering by tag is enough | 487 | * TAG : ordering by tag is enough |
488 | * TAG_FLUSH : ordering by tag w/ pre and post flushes | 488 | * TAG_FLUSH : ordering by tag w/ pre and post flushes |
489 | * TAG_FUA : ordering by tag w/ pre flush and FUA write | 489 | * TAG_FUA : ordering by tag w/ pre flush and FUA write |
490 | */ | 490 | */ |
491 | QUEUE_ORDERED_NONE = 0x00, | 491 | QUEUE_ORDERED_NONE = 0x00, |
492 | QUEUE_ORDERED_DRAIN = 0x01, | 492 | QUEUE_ORDERED_DRAIN = 0x01, |
493 | QUEUE_ORDERED_TAG = 0x02, | 493 | QUEUE_ORDERED_TAG = 0x02, |
494 | 494 | ||
495 | QUEUE_ORDERED_PREFLUSH = 0x10, | 495 | QUEUE_ORDERED_PREFLUSH = 0x10, |
496 | QUEUE_ORDERED_POSTFLUSH = 0x20, | 496 | QUEUE_ORDERED_POSTFLUSH = 0x20, |
497 | QUEUE_ORDERED_FUA = 0x40, | 497 | QUEUE_ORDERED_FUA = 0x40, |
498 | 498 | ||
499 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | | 499 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | |
500 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, | 500 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, |
501 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | | 501 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | |
502 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, | 502 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, |
503 | QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | | 503 | QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | |
504 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, | 504 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, |
505 | QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | | 505 | QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | |
506 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, | 506 | QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, |
507 | 507 | ||
508 | /* | 508 | /* |
509 | * Ordered operation sequence | 509 | * Ordered operation sequence |
510 | */ | 510 | */ |
511 | QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ | 511 | QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ |
512 | QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ | 512 | QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ |
513 | QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ | 513 | QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ |
514 | QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ | 514 | QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ |
515 | QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ | 515 | QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ |
516 | QUEUE_ORDSEQ_DONE = 0x20, | 516 | QUEUE_ORDSEQ_DONE = 0x20, |
517 | }; | 517 | }; |
518 | 518 | ||
519 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) | 519 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) |
520 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) | 520 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) |
521 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) | 521 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) |
522 | #define blk_queue_flushing(q) ((q)->ordseq) | 522 | #define blk_queue_flushing(q) ((q)->ordseq) |
523 | 523 | ||
524 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) | 524 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) |
525 | #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) | 525 | #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) |
526 | #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) | 526 | #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) |
527 | #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) | 527 | #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) |
528 | 528 | ||
529 | #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) | 529 | #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) |
530 | #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) | 530 | #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) |
531 | 531 | ||
532 | #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) | 532 | #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) |
533 | 533 | ||
534 | #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) | 534 | #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) |
535 | #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) | 535 | #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) |
536 | #define blk_pm_request(rq) \ | 536 | #define blk_pm_request(rq) \ |
537 | (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) | 537 | (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) |
538 | 538 | ||
539 | #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) | 539 | #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) |
540 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) | 540 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) |
541 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) | 541 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) |
542 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) | 542 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) |
543 | #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) | ||
543 | 544 | ||
544 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) | 545 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) |
545 | 546 | ||
546 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) | 547 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) |
547 | 548 | ||
548 | /* | 549 | /* |
549 | * We regard a request as sync, if it's a READ or a SYNC write. | 550 | * We regard a request as sync, if it's a READ or a SYNC write. |
550 | */ | 551 | */ |
551 | #define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) | 552 | #define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) |
552 | #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) | 553 | #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) |
553 | 554 | ||
554 | static inline int blk_queue_full(struct request_queue *q, int rw) | 555 | static inline int blk_queue_full(struct request_queue *q, int rw) |
555 | { | 556 | { |
556 | if (rw == READ) | 557 | if (rw == READ) |
557 | return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); | 558 | return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); |
558 | return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); | 559 | return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); |
559 | } | 560 | } |
560 | 561 | ||
561 | static inline void blk_set_queue_full(struct request_queue *q, int rw) | 562 | static inline void blk_set_queue_full(struct request_queue *q, int rw) |
562 | { | 563 | { |
563 | if (rw == READ) | 564 | if (rw == READ) |
564 | set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); | 565 | set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); |
565 | else | 566 | else |
566 | set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); | 567 | set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); |
567 | } | 568 | } |
568 | 569 | ||
569 | static inline void blk_clear_queue_full(struct request_queue *q, int rw) | 570 | static inline void blk_clear_queue_full(struct request_queue *q, int rw) |
570 | { | 571 | { |
571 | if (rw == READ) | 572 | if (rw == READ) |
572 | clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); | 573 | clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); |
573 | else | 574 | else |
574 | clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); | 575 | clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); |
575 | } | 576 | } |
576 | 577 | ||
577 | 578 | ||
578 | /* | 579 | /* |
579 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may | 580 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may |
580 | * it already be started by driver. | 581 | * it already be started by driver. |
581 | */ | 582 | */ |
582 | #define RQ_NOMERGE_FLAGS \ | 583 | #define RQ_NOMERGE_FLAGS \ |
583 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) | 584 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) |
584 | #define rq_mergeable(rq) \ | 585 | #define rq_mergeable(rq) \ |
585 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) | 586 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) |
586 | 587 | ||
587 | /* | 588 | /* |
588 | * q->prep_rq_fn return values | 589 | * q->prep_rq_fn return values |
589 | */ | 590 | */ |
590 | #define BLKPREP_OK 0 /* serve it */ | 591 | #define BLKPREP_OK 0 /* serve it */ |
591 | #define BLKPREP_KILL 1 /* fatal error, kill */ | 592 | #define BLKPREP_KILL 1 /* fatal error, kill */ |
592 | #define BLKPREP_DEFER 2 /* leave on queue */ | 593 | #define BLKPREP_DEFER 2 /* leave on queue */ |
593 | 594 | ||
594 | extern unsigned long blk_max_low_pfn, blk_max_pfn; | 595 | extern unsigned long blk_max_low_pfn, blk_max_pfn; |
595 | 596 | ||
596 | /* | 597 | /* |
597 | * standard bounce addresses: | 598 | * standard bounce addresses: |
598 | * | 599 | * |
599 | * BLK_BOUNCE_HIGH : bounce all highmem pages | 600 | * BLK_BOUNCE_HIGH : bounce all highmem pages |
600 | * BLK_BOUNCE_ANY : don't bounce anything | 601 | * BLK_BOUNCE_ANY : don't bounce anything |
601 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary | 602 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary |
602 | */ | 603 | */ |
603 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) | 604 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) |
604 | #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) | 605 | #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) |
605 | #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) | 606 | #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) |
606 | 607 | ||
607 | /* | 608 | /* |
608 | * default timeout for SG_IO if none specified | 609 | * default timeout for SG_IO if none specified |
609 | */ | 610 | */ |
610 | #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) | 611 | #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) |
611 | 612 | ||
612 | #ifdef CONFIG_BOUNCE | 613 | #ifdef CONFIG_BOUNCE |
613 | extern int init_emergency_isa_pool(void); | 614 | extern int init_emergency_isa_pool(void); |
614 | extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); | 615 | extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); |
615 | #else | 616 | #else |
616 | static inline int init_emergency_isa_pool(void) | 617 | static inline int init_emergency_isa_pool(void) |
617 | { | 618 | { |
618 | return 0; | 619 | return 0; |
619 | } | 620 | } |
620 | static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) | 621 | static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) |
621 | { | 622 | { |
622 | } | 623 | } |
623 | #endif /* CONFIG_MMU */ | 624 | #endif /* CONFIG_MMU */ |
624 | 625 | ||
625 | struct req_iterator { | 626 | struct req_iterator { |
626 | int i; | 627 | int i; |
627 | struct bio *bio; | 628 | struct bio *bio; |
628 | }; | 629 | }; |
629 | 630 | ||
630 | /* This should not be used directly - use rq_for_each_segment */ | 631 | /* This should not be used directly - use rq_for_each_segment */ |
631 | #define __rq_for_each_bio(_bio, rq) \ | 632 | #define __rq_for_each_bio(_bio, rq) \ |
632 | if ((rq->bio)) \ | 633 | if ((rq->bio)) \ |
633 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) | 634 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) |
634 | 635 | ||
635 | #define rq_for_each_segment(bvl, _rq, _iter) \ | 636 | #define rq_for_each_segment(bvl, _rq, _iter) \ |
636 | __rq_for_each_bio(_iter.bio, _rq) \ | 637 | __rq_for_each_bio(_iter.bio, _rq) \ |
637 | bio_for_each_segment(bvl, _iter.bio, _iter.i) | 638 | bio_for_each_segment(bvl, _iter.bio, _iter.i) |
638 | 639 | ||
639 | #define rq_iter_last(rq, _iter) \ | 640 | #define rq_iter_last(rq, _iter) \ |
640 | (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) | 641 | (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) |
641 | 642 | ||
642 | extern int blk_register_queue(struct gendisk *disk); | 643 | extern int blk_register_queue(struct gendisk *disk); |
643 | extern void blk_unregister_queue(struct gendisk *disk); | 644 | extern void blk_unregister_queue(struct gendisk *disk); |
644 | extern void register_disk(struct gendisk *dev); | 645 | extern void register_disk(struct gendisk *dev); |
645 | extern void generic_make_request(struct bio *bio); | 646 | extern void generic_make_request(struct bio *bio); |
646 | extern void blk_put_request(struct request *); | 647 | extern void blk_put_request(struct request *); |
647 | extern void __blk_put_request(struct request_queue *, struct request *); | 648 | extern void __blk_put_request(struct request_queue *, struct request *); |
648 | extern void blk_end_sync_rq(struct request *rq, int error); | 649 | extern void blk_end_sync_rq(struct request *rq, int error); |
649 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); | 650 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); |
650 | extern void blk_insert_request(struct request_queue *, struct request *, int, void *); | 651 | extern void blk_insert_request(struct request_queue *, struct request *, int, void *); |
651 | extern void blk_requeue_request(struct request_queue *, struct request *); | 652 | extern void blk_requeue_request(struct request_queue *, struct request *); |
652 | extern void blk_plug_device(struct request_queue *); | 653 | extern void blk_plug_device(struct request_queue *); |
653 | extern int blk_remove_plug(struct request_queue *); | 654 | extern int blk_remove_plug(struct request_queue *); |
654 | extern void blk_recount_segments(struct request_queue *, struct bio *); | 655 | extern void blk_recount_segments(struct request_queue *, struct bio *); |
655 | extern int scsi_cmd_ioctl(struct file *, struct request_queue *, | 656 | extern int scsi_cmd_ioctl(struct file *, struct request_queue *, |
656 | struct gendisk *, unsigned int, void __user *); | 657 | struct gendisk *, unsigned int, void __user *); |
657 | extern int sg_scsi_ioctl(struct file *, struct request_queue *, | 658 | extern int sg_scsi_ioctl(struct file *, struct request_queue *, |
658 | struct gendisk *, struct scsi_ioctl_command __user *); | 659 | struct gendisk *, struct scsi_ioctl_command __user *); |
659 | 660 | ||
660 | /* | 661 | /* |
661 | * Temporary export, until SCSI gets fixed up. | 662 | * Temporary export, until SCSI gets fixed up. |
662 | */ | 663 | */ |
663 | extern int blk_rq_append_bio(struct request_queue *q, struct request *rq, | 664 | extern int blk_rq_append_bio(struct request_queue *q, struct request *rq, |
664 | struct bio *bio); | 665 | struct bio *bio); |
665 | 666 | ||
666 | /* | 667 | /* |
667 | * A queue has just exitted congestion. Note this in the global counter of | 668 | * A queue has just exitted congestion. Note this in the global counter of |
668 | * congested queues, and wake up anyone who was waiting for requests to be | 669 | * congested queues, and wake up anyone who was waiting for requests to be |
669 | * put back. | 670 | * put back. |
670 | */ | 671 | */ |
671 | static inline void blk_clear_queue_congested(struct request_queue *q, int rw) | 672 | static inline void blk_clear_queue_congested(struct request_queue *q, int rw) |
672 | { | 673 | { |
673 | clear_bdi_congested(&q->backing_dev_info, rw); | 674 | clear_bdi_congested(&q->backing_dev_info, rw); |
674 | } | 675 | } |
675 | 676 | ||
676 | /* | 677 | /* |
677 | * A queue has just entered congestion. Flag that in the queue's VM-visible | 678 | * A queue has just entered congestion. Flag that in the queue's VM-visible |
678 | * state flags and increment the global gounter of congested queues. | 679 | * state flags and increment the global gounter of congested queues. |
679 | */ | 680 | */ |
680 | static inline void blk_set_queue_congested(struct request_queue *q, int rw) | 681 | static inline void blk_set_queue_congested(struct request_queue *q, int rw) |
681 | { | 682 | { |
682 | set_bdi_congested(&q->backing_dev_info, rw); | 683 | set_bdi_congested(&q->backing_dev_info, rw); |
683 | } | 684 | } |
684 | 685 | ||
685 | extern void blk_start_queue(struct request_queue *q); | 686 | extern void blk_start_queue(struct request_queue *q); |
686 | extern void blk_stop_queue(struct request_queue *q); | 687 | extern void blk_stop_queue(struct request_queue *q); |
687 | extern void blk_sync_queue(struct request_queue *q); | 688 | extern void blk_sync_queue(struct request_queue *q); |
688 | extern void __blk_stop_queue(struct request_queue *q); | 689 | extern void __blk_stop_queue(struct request_queue *q); |
689 | extern void blk_run_queue(struct request_queue *); | 690 | extern void blk_run_queue(struct request_queue *); |
690 | extern void blk_start_queueing(struct request_queue *); | 691 | extern void blk_start_queueing(struct request_queue *); |
691 | extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); | 692 | extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); |
692 | extern int blk_rq_unmap_user(struct bio *); | 693 | extern int blk_rq_unmap_user(struct bio *); |
693 | extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); | 694 | extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); |
694 | extern int blk_rq_map_user_iov(struct request_queue *, struct request *, | 695 | extern int blk_rq_map_user_iov(struct request_queue *, struct request *, |
695 | struct sg_iovec *, int, unsigned int); | 696 | struct sg_iovec *, int, unsigned int); |
696 | extern int blk_execute_rq(struct request_queue *, struct gendisk *, | 697 | extern int blk_execute_rq(struct request_queue *, struct gendisk *, |
697 | struct request *, int); | 698 | struct request *, int); |
698 | extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, | 699 | extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, |
699 | struct request *, int, rq_end_io_fn *); | 700 | struct request *, int, rq_end_io_fn *); |
700 | extern int blk_verify_command(unsigned char *, int); | 701 | extern int blk_verify_command(unsigned char *, int); |
701 | 702 | ||
702 | static inline struct request_queue *bdev_get_queue(struct block_device *bdev) | 703 | static inline struct request_queue *bdev_get_queue(struct block_device *bdev) |
703 | { | 704 | { |
704 | return bdev->bd_disk->queue; | 705 | return bdev->bd_disk->queue; |
705 | } | 706 | } |
706 | 707 | ||
707 | static inline void blk_run_backing_dev(struct backing_dev_info *bdi, | 708 | static inline void blk_run_backing_dev(struct backing_dev_info *bdi, |
708 | struct page *page) | 709 | struct page *page) |
709 | { | 710 | { |
710 | if (bdi && bdi->unplug_io_fn) | 711 | if (bdi && bdi->unplug_io_fn) |
711 | bdi->unplug_io_fn(bdi, page); | 712 | bdi->unplug_io_fn(bdi, page); |
712 | } | 713 | } |
713 | 714 | ||
714 | static inline void blk_run_address_space(struct address_space *mapping) | 715 | static inline void blk_run_address_space(struct address_space *mapping) |
715 | { | 716 | { |
716 | if (mapping) | 717 | if (mapping) |
717 | blk_run_backing_dev(mapping->backing_dev_info, NULL); | 718 | blk_run_backing_dev(mapping->backing_dev_info, NULL); |
718 | } | 719 | } |
719 | 720 | ||
720 | /* | 721 | /* |
721 | * end_request() and friends. Must be called with the request queue spinlock | 722 | * end_request() and friends. Must be called with the request queue spinlock |
722 | * acquired. All functions called within end_request() _must_be_ atomic. | 723 | * acquired. All functions called within end_request() _must_be_ atomic. |
723 | * | 724 | * |
724 | * Several drivers define their own end_request and call | 725 | * Several drivers define their own end_request and call |
725 | * end_that_request_first() and end_that_request_last() | 726 | * end_that_request_first() and end_that_request_last() |
726 | * for parts of the original function. This prevents | 727 | * for parts of the original function. This prevents |
727 | * code duplication in drivers. | 728 | * code duplication in drivers. |
728 | */ | 729 | */ |
729 | extern int end_that_request_first(struct request *, int, int); | 730 | extern int end_that_request_first(struct request *, int, int); |
730 | extern int end_that_request_chunk(struct request *, int, int); | 731 | extern int end_that_request_chunk(struct request *, int, int); |
731 | extern void end_that_request_last(struct request *, int); | 732 | extern void end_that_request_last(struct request *, int); |
732 | extern void end_request(struct request *, int); | 733 | extern void end_request(struct request *, int); |
733 | extern void end_queued_request(struct request *, int); | 734 | extern void end_queued_request(struct request *, int); |
734 | extern void end_dequeued_request(struct request *, int); | 735 | extern void end_dequeued_request(struct request *, int); |
735 | extern void blk_complete_request(struct request *); | 736 | extern void blk_complete_request(struct request *); |
736 | 737 | ||
737 | /* | 738 | /* |
738 | * end_that_request_first/chunk() takes an uptodate argument. we account | 739 | * end_that_request_first/chunk() takes an uptodate argument. we account |
739 | * any value <= as an io error. 0 means -EIO for compatability reasons, | 740 | * any value <= as an io error. 0 means -EIO for compatability reasons, |
740 | * any other < 0 value is the direct error type. An uptodate value of | 741 | * any other < 0 value is the direct error type. An uptodate value of |
741 | * 1 indicates successful io completion | 742 | * 1 indicates successful io completion |
742 | */ | 743 | */ |
743 | #define end_io_error(uptodate) (unlikely((uptodate) <= 0)) | 744 | #define end_io_error(uptodate) (unlikely((uptodate) <= 0)) |
744 | 745 | ||
745 | static inline void blkdev_dequeue_request(struct request *req) | 746 | static inline void blkdev_dequeue_request(struct request *req) |
746 | { | 747 | { |
747 | elv_dequeue_request(req->q, req); | 748 | elv_dequeue_request(req->q, req); |
748 | } | 749 | } |
749 | 750 | ||
750 | /* | 751 | /* |
751 | * Access functions for manipulating queue properties | 752 | * Access functions for manipulating queue properties |
752 | */ | 753 | */ |
753 | extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, | 754 | extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, |
754 | spinlock_t *lock, int node_id); | 755 | spinlock_t *lock, int node_id); |
755 | extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); | 756 | extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); |
756 | extern void blk_cleanup_queue(struct request_queue *); | 757 | extern void blk_cleanup_queue(struct request_queue *); |
757 | extern void blk_queue_make_request(struct request_queue *, make_request_fn *); | 758 | extern void blk_queue_make_request(struct request_queue *, make_request_fn *); |
758 | extern void blk_queue_bounce_limit(struct request_queue *, u64); | 759 | extern void blk_queue_bounce_limit(struct request_queue *, u64); |
759 | extern void blk_queue_max_sectors(struct request_queue *, unsigned int); | 760 | extern void blk_queue_max_sectors(struct request_queue *, unsigned int); |
760 | extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); | 761 | extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); |
761 | extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); | 762 | extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); |
762 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); | 763 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); |
763 | extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); | 764 | extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); |
764 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); | 765 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); |
765 | extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); | 766 | extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); |
766 | extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); | 767 | extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); |
767 | extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); | 768 | extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); |
768 | extern void blk_queue_dma_alignment(struct request_queue *, int); | 769 | extern void blk_queue_dma_alignment(struct request_queue *, int); |
769 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); | 770 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); |
770 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); | 771 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); |
771 | extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); | 772 | extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); |
772 | extern void blk_queue_issue_flush_fn(struct request_queue *, issue_flush_fn *); | 773 | extern void blk_queue_issue_flush_fn(struct request_queue *, issue_flush_fn *); |
773 | extern int blk_do_ordered(struct request_queue *, struct request **); | 774 | extern int blk_do_ordered(struct request_queue *, struct request **); |
774 | extern unsigned blk_ordered_cur_seq(struct request_queue *); | 775 | extern unsigned blk_ordered_cur_seq(struct request_queue *); |
775 | extern unsigned blk_ordered_req_seq(struct request *); | 776 | extern unsigned blk_ordered_req_seq(struct request *); |
776 | extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); | 777 | extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); |
777 | 778 | ||
778 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); | 779 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); |
779 | extern void blk_dump_rq_flags(struct request *, char *); | 780 | extern void blk_dump_rq_flags(struct request *, char *); |
780 | extern void generic_unplug_device(struct request_queue *); | 781 | extern void generic_unplug_device(struct request_queue *); |
781 | extern void __generic_unplug_device(struct request_queue *); | 782 | extern void __generic_unplug_device(struct request_queue *); |
782 | extern long nr_blockdev_pages(void); | 783 | extern long nr_blockdev_pages(void); |
783 | 784 | ||
784 | int blk_get_queue(struct request_queue *); | 785 | int blk_get_queue(struct request_queue *); |
785 | struct request_queue *blk_alloc_queue(gfp_t); | 786 | struct request_queue *blk_alloc_queue(gfp_t); |
786 | struct request_queue *blk_alloc_queue_node(gfp_t, int); | 787 | struct request_queue *blk_alloc_queue_node(gfp_t, int); |
787 | extern void blk_put_queue(struct request_queue *); | 788 | extern void blk_put_queue(struct request_queue *); |
788 | 789 | ||
789 | /* | 790 | /* |
790 | * tag stuff | 791 | * tag stuff |
791 | */ | 792 | */ |
792 | #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) | 793 | #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) |
793 | #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) | 794 | #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) |
794 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) | 795 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) |
795 | extern int blk_queue_start_tag(struct request_queue *, struct request *); | 796 | extern int blk_queue_start_tag(struct request_queue *, struct request *); |
796 | extern struct request *blk_queue_find_tag(struct request_queue *, int); | 797 | extern struct request *blk_queue_find_tag(struct request_queue *, int); |
797 | extern void blk_queue_end_tag(struct request_queue *, struct request *); | 798 | extern void blk_queue_end_tag(struct request_queue *, struct request *); |
798 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); | 799 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); |
799 | extern void blk_queue_free_tags(struct request_queue *); | 800 | extern void blk_queue_free_tags(struct request_queue *); |
800 | extern int blk_queue_resize_tags(struct request_queue *, int); | 801 | extern int blk_queue_resize_tags(struct request_queue *, int); |
801 | extern void blk_queue_invalidate_tags(struct request_queue *); | 802 | extern void blk_queue_invalidate_tags(struct request_queue *); |
802 | extern struct blk_queue_tag *blk_init_tags(int); | 803 | extern struct blk_queue_tag *blk_init_tags(int); |
803 | extern void blk_free_tags(struct blk_queue_tag *); | 804 | extern void blk_free_tags(struct blk_queue_tag *); |
804 | 805 | ||
805 | static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, | 806 | static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, |
806 | int tag) | 807 | int tag) |
807 | { | 808 | { |
808 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) | 809 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) |
809 | return NULL; | 810 | return NULL; |
810 | return bqt->tag_index[tag]; | 811 | return bqt->tag_index[tag]; |
811 | } | 812 | } |
812 | 813 | ||
813 | extern int blkdev_issue_flush(struct block_device *, sector_t *); | 814 | extern int blkdev_issue_flush(struct block_device *, sector_t *); |
814 | 815 | ||
815 | #define MAX_PHYS_SEGMENTS 128 | 816 | #define MAX_PHYS_SEGMENTS 128 |
816 | #define MAX_HW_SEGMENTS 128 | 817 | #define MAX_HW_SEGMENTS 128 |
817 | #define SAFE_MAX_SECTORS 255 | 818 | #define SAFE_MAX_SECTORS 255 |
818 | #define BLK_DEF_MAX_SECTORS 1024 | 819 | #define BLK_DEF_MAX_SECTORS 1024 |
819 | 820 | ||
820 | #define MAX_SEGMENT_SIZE 65536 | 821 | #define MAX_SEGMENT_SIZE 65536 |
821 | 822 | ||
822 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) | 823 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) |
823 | 824 | ||
824 | static inline int queue_hardsect_size(struct request_queue *q) | 825 | static inline int queue_hardsect_size(struct request_queue *q) |
825 | { | 826 | { |
826 | int retval = 512; | 827 | int retval = 512; |
827 | 828 | ||
828 | if (q && q->hardsect_size) | 829 | if (q && q->hardsect_size) |
829 | retval = q->hardsect_size; | 830 | retval = q->hardsect_size; |
830 | 831 | ||
831 | return retval; | 832 | return retval; |
832 | } | 833 | } |
833 | 834 | ||
834 | static inline int bdev_hardsect_size(struct block_device *bdev) | 835 | static inline int bdev_hardsect_size(struct block_device *bdev) |
835 | { | 836 | { |
836 | return queue_hardsect_size(bdev_get_queue(bdev)); | 837 | return queue_hardsect_size(bdev_get_queue(bdev)); |
837 | } | 838 | } |
838 | 839 | ||
839 | static inline int queue_dma_alignment(struct request_queue *q) | 840 | static inline int queue_dma_alignment(struct request_queue *q) |
840 | { | 841 | { |
841 | int retval = 511; | 842 | int retval = 511; |
842 | 843 | ||
843 | if (q && q->dma_alignment) | 844 | if (q && q->dma_alignment) |
844 | retval = q->dma_alignment; | 845 | retval = q->dma_alignment; |
845 | 846 | ||
846 | return retval; | 847 | return retval; |
847 | } | 848 | } |
848 | 849 | ||
849 | /* assumes size > 256 */ | 850 | /* assumes size > 256 */ |
850 | static inline unsigned int blksize_bits(unsigned int size) | 851 | static inline unsigned int blksize_bits(unsigned int size) |
851 | { | 852 | { |
852 | unsigned int bits = 8; | 853 | unsigned int bits = 8; |
853 | do { | 854 | do { |
854 | bits++; | 855 | bits++; |
855 | size >>= 1; | 856 | size >>= 1; |
856 | } while (size > 256); | 857 | } while (size > 256); |
857 | return bits; | 858 | return bits; |
858 | } | 859 | } |
859 | 860 | ||
860 | static inline unsigned int block_size(struct block_device *bdev) | 861 | static inline unsigned int block_size(struct block_device *bdev) |
861 | { | 862 | { |
862 | return bdev->bd_block_size; | 863 | return bdev->bd_block_size; |
863 | } | 864 | } |
864 | 865 | ||
865 | typedef struct {struct page *v;} Sector; | 866 | typedef struct {struct page *v;} Sector; |
866 | 867 | ||
867 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); | 868 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); |
868 | 869 | ||
869 | static inline void put_dev_sector(Sector p) | 870 | static inline void put_dev_sector(Sector p) |
870 | { | 871 | { |
871 | page_cache_release(p.v); | 872 | page_cache_release(p.v); |
872 | } | 873 | } |
873 | 874 | ||
874 | struct work_struct; | 875 | struct work_struct; |
875 | int kblockd_schedule_work(struct work_struct *work); | 876 | int kblockd_schedule_work(struct work_struct *work); |
876 | void kblockd_flush_work(struct work_struct *work); | 877 | void kblockd_flush_work(struct work_struct *work); |
877 | 878 | ||
878 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ | 879 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ |
879 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) | 880 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) |
880 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ | 881 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ |
881 | MODULE_ALIAS("block-major-" __stringify(major) "-*") | 882 | MODULE_ALIAS("block-major-" __stringify(major) "-*") |
882 | 883 | ||
883 | 884 | ||
884 | #else /* CONFIG_BLOCK */ | 885 | #else /* CONFIG_BLOCK */ |
885 | /* | 886 | /* |
886 | * stubs for when the block layer is configured out | 887 | * stubs for when the block layer is configured out |
887 | */ | 888 | */ |
888 | #define buffer_heads_over_limit 0 | 889 | #define buffer_heads_over_limit 0 |
889 | 890 | ||
890 | static inline long nr_blockdev_pages(void) | 891 | static inline long nr_blockdev_pages(void) |
891 | { | 892 | { |
892 | return 0; | 893 | return 0; |
893 | } | 894 | } |
894 | 895 | ||
895 | static inline void exit_io_context(void) | 896 | static inline void exit_io_context(void) |
896 | { | 897 | { |
897 | } | 898 | } |
898 | 899 | ||
899 | #endif /* CONFIG_BLOCK */ | 900 | #endif /* CONFIG_BLOCK */ |
900 | 901 | ||
901 | #endif | 902 | #endif |
902 | 903 |
mm/bounce.c
1 | /* bounce buffer handling for block devices | 1 | /* bounce buffer handling for block devices |
2 | * | 2 | * |
3 | * - Split from highmem.c | 3 | * - Split from highmem.c |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/swap.h> | 8 | #include <linux/swap.h> |
9 | #include <linux/bio.h> | 9 | #include <linux/bio.h> |
10 | #include <linux/pagemap.h> | 10 | #include <linux/pagemap.h> |
11 | #include <linux/mempool.h> | 11 | #include <linux/mempool.h> |
12 | #include <linux/blkdev.h> | 12 | #include <linux/blkdev.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/hash.h> | 14 | #include <linux/hash.h> |
15 | #include <linux/highmem.h> | 15 | #include <linux/highmem.h> |
16 | #include <linux/blktrace_api.h> | 16 | #include <linux/blktrace_api.h> |
17 | #include <asm/tlbflush.h> | 17 | #include <asm/tlbflush.h> |
18 | 18 | ||
19 | #define POOL_SIZE 64 | 19 | #define POOL_SIZE 64 |
20 | #define ISA_POOL_SIZE 16 | 20 | #define ISA_POOL_SIZE 16 |
21 | 21 | ||
22 | static mempool_t *page_pool, *isa_page_pool; | 22 | static mempool_t *page_pool, *isa_page_pool; |
23 | 23 | ||
24 | #ifdef CONFIG_HIGHMEM | 24 | #ifdef CONFIG_HIGHMEM |
25 | static __init int init_emergency_pool(void) | 25 | static __init int init_emergency_pool(void) |
26 | { | 26 | { |
27 | struct sysinfo i; | 27 | struct sysinfo i; |
28 | si_meminfo(&i); | 28 | si_meminfo(&i); |
29 | si_swapinfo(&i); | 29 | si_swapinfo(&i); |
30 | 30 | ||
31 | if (!i.totalhigh) | 31 | if (!i.totalhigh) |
32 | return 0; | 32 | return 0; |
33 | 33 | ||
34 | page_pool = mempool_create_page_pool(POOL_SIZE, 0); | 34 | page_pool = mempool_create_page_pool(POOL_SIZE, 0); |
35 | BUG_ON(!page_pool); | 35 | BUG_ON(!page_pool); |
36 | printk("highmem bounce pool size: %d pages\n", POOL_SIZE); | 36 | printk("highmem bounce pool size: %d pages\n", POOL_SIZE); |
37 | 37 | ||
38 | return 0; | 38 | return 0; |
39 | } | 39 | } |
40 | 40 | ||
41 | __initcall(init_emergency_pool); | 41 | __initcall(init_emergency_pool); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * highmem version, map in to vec | 44 | * highmem version, map in to vec |
45 | */ | 45 | */ |
46 | static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) | 46 | static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) |
47 | { | 47 | { |
48 | unsigned long flags; | 48 | unsigned long flags; |
49 | unsigned char *vto; | 49 | unsigned char *vto; |
50 | 50 | ||
51 | local_irq_save(flags); | 51 | local_irq_save(flags); |
52 | vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); | 52 | vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); |
53 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); | 53 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); |
54 | kunmap_atomic(vto, KM_BOUNCE_READ); | 54 | kunmap_atomic(vto, KM_BOUNCE_READ); |
55 | local_irq_restore(flags); | 55 | local_irq_restore(flags); |
56 | } | 56 | } |
57 | 57 | ||
58 | #else /* CONFIG_HIGHMEM */ | 58 | #else /* CONFIG_HIGHMEM */ |
59 | 59 | ||
60 | #define bounce_copy_vec(to, vfrom) \ | 60 | #define bounce_copy_vec(to, vfrom) \ |
61 | memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) | 61 | memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) |
62 | 62 | ||
63 | #endif /* CONFIG_HIGHMEM */ | 63 | #endif /* CONFIG_HIGHMEM */ |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * allocate pages in the DMA region for the ISA pool | 66 | * allocate pages in the DMA region for the ISA pool |
67 | */ | 67 | */ |
68 | static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) | 68 | static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) |
69 | { | 69 | { |
70 | return mempool_alloc_pages(gfp_mask | GFP_DMA, data); | 70 | return mempool_alloc_pages(gfp_mask | GFP_DMA, data); |
71 | } | 71 | } |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA | 74 | * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA |
75 | * as the max address, so check if the pool has already been created. | 75 | * as the max address, so check if the pool has already been created. |
76 | */ | 76 | */ |
77 | int init_emergency_isa_pool(void) | 77 | int init_emergency_isa_pool(void) |
78 | { | 78 | { |
79 | if (isa_page_pool) | 79 | if (isa_page_pool) |
80 | return 0; | 80 | return 0; |
81 | 81 | ||
82 | isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa, | 82 | isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa, |
83 | mempool_free_pages, (void *) 0); | 83 | mempool_free_pages, (void *) 0); |
84 | BUG_ON(!isa_page_pool); | 84 | BUG_ON(!isa_page_pool); |
85 | 85 | ||
86 | printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE); | 86 | printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE); |
87 | return 0; | 87 | return 0; |
88 | } | 88 | } |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * Simple bounce buffer support for highmem pages. Depending on the | 91 | * Simple bounce buffer support for highmem pages. Depending on the |
92 | * queue gfp mask set, *to may or may not be a highmem page. kmap it | 92 | * queue gfp mask set, *to may or may not be a highmem page. kmap it |
93 | * always, it will do the Right Thing | 93 | * always, it will do the Right Thing |
94 | */ | 94 | */ |
95 | static void copy_to_high_bio_irq(struct bio *to, struct bio *from) | 95 | static void copy_to_high_bio_irq(struct bio *to, struct bio *from) |
96 | { | 96 | { |
97 | unsigned char *vfrom; | 97 | unsigned char *vfrom; |
98 | struct bio_vec *tovec, *fromvec; | 98 | struct bio_vec *tovec, *fromvec; |
99 | int i; | 99 | int i; |
100 | 100 | ||
101 | __bio_for_each_segment(tovec, to, i, 0) { | 101 | __bio_for_each_segment(tovec, to, i, 0) { |
102 | fromvec = from->bi_io_vec + i; | 102 | fromvec = from->bi_io_vec + i; |
103 | 103 | ||
104 | /* | 104 | /* |
105 | * not bounced | 105 | * not bounced |
106 | */ | 106 | */ |
107 | if (tovec->bv_page == fromvec->bv_page) | 107 | if (tovec->bv_page == fromvec->bv_page) |
108 | continue; | 108 | continue; |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * fromvec->bv_offset and fromvec->bv_len might have been | 111 | * fromvec->bv_offset and fromvec->bv_len might have been |
112 | * modified by the block layer, so use the original copy, | 112 | * modified by the block layer, so use the original copy, |
113 | * bounce_copy_vec already uses tovec->bv_len | 113 | * bounce_copy_vec already uses tovec->bv_len |
114 | */ | 114 | */ |
115 | vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; | 115 | vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; |
116 | 116 | ||
117 | flush_dcache_page(tovec->bv_page); | 117 | flush_dcache_page(tovec->bv_page); |
118 | bounce_copy_vec(tovec, vfrom); | 118 | bounce_copy_vec(tovec, vfrom); |
119 | } | 119 | } |
120 | } | 120 | } |
121 | 121 | ||
122 | static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) | 122 | static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) |
123 | { | 123 | { |
124 | struct bio *bio_orig = bio->bi_private; | 124 | struct bio *bio_orig = bio->bi_private; |
125 | struct bio_vec *bvec, *org_vec; | 125 | struct bio_vec *bvec, *org_vec; |
126 | int i; | 126 | int i; |
127 | 127 | ||
128 | if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) | 128 | if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) |
129 | set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); | 129 | set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); |
130 | 130 | ||
131 | /* | 131 | /* |
132 | * free up bounce indirect pages used | 132 | * free up bounce indirect pages used |
133 | */ | 133 | */ |
134 | __bio_for_each_segment(bvec, bio, i, 0) { | 134 | __bio_for_each_segment(bvec, bio, i, 0) { |
135 | org_vec = bio_orig->bi_io_vec + i; | 135 | org_vec = bio_orig->bi_io_vec + i; |
136 | if (bvec->bv_page == org_vec->bv_page) | 136 | if (bvec->bv_page == org_vec->bv_page) |
137 | continue; | 137 | continue; |
138 | 138 | ||
139 | dec_zone_page_state(bvec->bv_page, NR_BOUNCE); | 139 | dec_zone_page_state(bvec->bv_page, NR_BOUNCE); |
140 | mempool_free(bvec->bv_page, pool); | 140 | mempool_free(bvec->bv_page, pool); |
141 | } | 141 | } |
142 | 142 | ||
143 | bio_endio(bio_orig, err); | 143 | bio_endio(bio_orig, err); |
144 | bio_put(bio); | 144 | bio_put(bio); |
145 | } | 145 | } |
146 | 146 | ||
147 | static void bounce_end_io_write(struct bio *bio, int err) | 147 | static void bounce_end_io_write(struct bio *bio, int err) |
148 | { | 148 | { |
149 | bounce_end_io(bio, page_pool, err); | 149 | bounce_end_io(bio, page_pool, err); |
150 | } | 150 | } |
151 | 151 | ||
152 | static void bounce_end_io_write_isa(struct bio *bio, int err) | 152 | static void bounce_end_io_write_isa(struct bio *bio, int err) |
153 | { | 153 | { |
154 | 154 | ||
155 | bounce_end_io(bio, isa_page_pool, err); | 155 | bounce_end_io(bio, isa_page_pool, err); |
156 | } | 156 | } |
157 | 157 | ||
158 | static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err) | 158 | static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err) |
159 | { | 159 | { |
160 | struct bio *bio_orig = bio->bi_private; | 160 | struct bio *bio_orig = bio->bi_private; |
161 | 161 | ||
162 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 162 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
163 | copy_to_high_bio_irq(bio_orig, bio); | 163 | copy_to_high_bio_irq(bio_orig, bio); |
164 | 164 | ||
165 | bounce_end_io(bio, pool, err); | 165 | bounce_end_io(bio, pool, err); |
166 | } | 166 | } |
167 | 167 | ||
168 | static void bounce_end_io_read(struct bio *bio, int err) | 168 | static void bounce_end_io_read(struct bio *bio, int err) |
169 | { | 169 | { |
170 | __bounce_end_io_read(bio, page_pool, err); | 170 | __bounce_end_io_read(bio, page_pool, err); |
171 | } | 171 | } |
172 | 172 | ||
173 | static void bounce_end_io_read_isa(struct bio *bio, int err) | 173 | static void bounce_end_io_read_isa(struct bio *bio, int err) |
174 | { | 174 | { |
175 | __bounce_end_io_read(bio, isa_page_pool, err); | 175 | __bounce_end_io_read(bio, isa_page_pool, err); |
176 | } | 176 | } |
177 | 177 | ||
178 | static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, | 178 | static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, |
179 | mempool_t *pool) | 179 | mempool_t *pool) |
180 | { | 180 | { |
181 | struct page *page; | 181 | struct page *page; |
182 | struct bio *bio = NULL; | 182 | struct bio *bio = NULL; |
183 | int i, rw = bio_data_dir(*bio_orig); | 183 | int i, rw = bio_data_dir(*bio_orig); |
184 | struct bio_vec *to, *from; | 184 | struct bio_vec *to, *from; |
185 | 185 | ||
186 | bio_for_each_segment(from, *bio_orig, i) { | 186 | bio_for_each_segment(from, *bio_orig, i) { |
187 | page = from->bv_page; | 187 | page = from->bv_page; |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * is destination page below bounce pfn? | 190 | * is destination page below bounce pfn? |
191 | */ | 191 | */ |
192 | if (page_to_pfn(page) <= q->bounce_pfn) | 192 | if (page_to_pfn(page) <= q->bounce_pfn) |
193 | continue; | 193 | continue; |
194 | 194 | ||
195 | /* | 195 | /* |
196 | * irk, bounce it | 196 | * irk, bounce it |
197 | */ | 197 | */ |
198 | if (!bio) | 198 | if (!bio) |
199 | bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); | 199 | bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); |
200 | 200 | ||
201 | to = bio->bi_io_vec + i; | 201 | to = bio->bi_io_vec + i; |
202 | 202 | ||
203 | to->bv_page = mempool_alloc(pool, q->bounce_gfp); | 203 | to->bv_page = mempool_alloc(pool, q->bounce_gfp); |
204 | to->bv_len = from->bv_len; | 204 | to->bv_len = from->bv_len; |
205 | to->bv_offset = from->bv_offset; | 205 | to->bv_offset = from->bv_offset; |
206 | inc_zone_page_state(to->bv_page, NR_BOUNCE); | 206 | inc_zone_page_state(to->bv_page, NR_BOUNCE); |
207 | 207 | ||
208 | if (rw == WRITE) { | 208 | if (rw == WRITE) { |
209 | char *vto, *vfrom; | 209 | char *vto, *vfrom; |
210 | 210 | ||
211 | flush_dcache_page(from->bv_page); | 211 | flush_dcache_page(from->bv_page); |
212 | vto = page_address(to->bv_page) + to->bv_offset; | 212 | vto = page_address(to->bv_page) + to->bv_offset; |
213 | vfrom = kmap(from->bv_page) + from->bv_offset; | 213 | vfrom = kmap(from->bv_page) + from->bv_offset; |
214 | memcpy(vto, vfrom, to->bv_len); | 214 | memcpy(vto, vfrom, to->bv_len); |
215 | kunmap(from->bv_page); | 215 | kunmap(from->bv_page); |
216 | } | 216 | } |
217 | } | 217 | } |
218 | 218 | ||
219 | /* | 219 | /* |
220 | * no pages bounced | 220 | * no pages bounced |
221 | */ | 221 | */ |
222 | if (!bio) | 222 | if (!bio) |
223 | return; | 223 | return; |
224 | 224 | ||
225 | blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); | 225 | blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); |
226 | 226 | ||
227 | /* | 227 | /* |
228 | * at least one page was bounced, fill in possible non-highmem | 228 | * at least one page was bounced, fill in possible non-highmem |
229 | * pages | 229 | * pages |
230 | */ | 230 | */ |
231 | __bio_for_each_segment(from, *bio_orig, i, 0) { | 231 | __bio_for_each_segment(from, *bio_orig, i, 0) { |
232 | to = bio_iovec_idx(bio, i); | 232 | to = bio_iovec_idx(bio, i); |
233 | if (!to->bv_page) { | 233 | if (!to->bv_page) { |
234 | to->bv_page = from->bv_page; | 234 | to->bv_page = from->bv_page; |
235 | to->bv_len = from->bv_len; | 235 | to->bv_len = from->bv_len; |
236 | to->bv_offset = from->bv_offset; | 236 | to->bv_offset = from->bv_offset; |
237 | } | 237 | } |
238 | } | 238 | } |
239 | 239 | ||
240 | bio->bi_bdev = (*bio_orig)->bi_bdev; | 240 | bio->bi_bdev = (*bio_orig)->bi_bdev; |
241 | bio->bi_flags |= (1 << BIO_BOUNCED); | 241 | bio->bi_flags |= (1 << BIO_BOUNCED); |
242 | bio->bi_sector = (*bio_orig)->bi_sector; | 242 | bio->bi_sector = (*bio_orig)->bi_sector; |
243 | bio->bi_rw = (*bio_orig)->bi_rw; | 243 | bio->bi_rw = (*bio_orig)->bi_rw; |
244 | 244 | ||
245 | bio->bi_vcnt = (*bio_orig)->bi_vcnt; | 245 | bio->bi_vcnt = (*bio_orig)->bi_vcnt; |
246 | bio->bi_idx = (*bio_orig)->bi_idx; | 246 | bio->bi_idx = (*bio_orig)->bi_idx; |
247 | bio->bi_size = (*bio_orig)->bi_size; | 247 | bio->bi_size = (*bio_orig)->bi_size; |
248 | 248 | ||
249 | if (pool == page_pool) { | 249 | if (pool == page_pool) { |
250 | bio->bi_end_io = bounce_end_io_write; | 250 | bio->bi_end_io = bounce_end_io_write; |
251 | if (rw == READ) | 251 | if (rw == READ) |
252 | bio->bi_end_io = bounce_end_io_read; | 252 | bio->bi_end_io = bounce_end_io_read; |
253 | } else { | 253 | } else { |
254 | bio->bi_end_io = bounce_end_io_write_isa; | 254 | bio->bi_end_io = bounce_end_io_write_isa; |
255 | if (rw == READ) | 255 | if (rw == READ) |
256 | bio->bi_end_io = bounce_end_io_read_isa; | 256 | bio->bi_end_io = bounce_end_io_read_isa; |
257 | } | 257 | } |
258 | 258 | ||
259 | bio->bi_private = *bio_orig; | 259 | bio->bi_private = *bio_orig; |
260 | *bio_orig = bio; | 260 | *bio_orig = bio; |
261 | } | 261 | } |
262 | 262 | ||
263 | void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) | 263 | void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) |
264 | { | 264 | { |
265 | mempool_t *pool; | 265 | mempool_t *pool; |
266 | 266 | ||
267 | /* | 267 | /* |
268 | * Data-less bio, nothing to bounce | ||
269 | */ | ||
270 | if (bio_empty_barrier(*bio_orig)) | ||
271 | return; | ||
272 | |||
273 | /* | ||
268 | * for non-isa bounce case, just check if the bounce pfn is equal | 274 | * for non-isa bounce case, just check if the bounce pfn is equal |
269 | * to or bigger than the highest pfn in the system -- in that case, | 275 | * to or bigger than the highest pfn in the system -- in that case, |
270 | * don't waste time iterating over bio segments | 276 | * don't waste time iterating over bio segments |
271 | */ | 277 | */ |
272 | if (!(q->bounce_gfp & GFP_DMA)) { | 278 | if (!(q->bounce_gfp & GFP_DMA)) { |
273 | if (q->bounce_pfn >= blk_max_pfn) | 279 | if (q->bounce_pfn >= blk_max_pfn) |
274 | return; | 280 | return; |
275 | pool = page_pool; | 281 | pool = page_pool; |
276 | } else { | 282 | } else { |
277 | BUG_ON(!isa_page_pool); | 283 | BUG_ON(!isa_page_pool); |
278 | pool = isa_page_pool; | 284 | pool = isa_page_pool; |
279 | } | 285 | } |
280 | 286 | ||
281 | /* | 287 | /* |
282 | * slow path | 288 | * slow path |
283 | */ | 289 | */ |
284 | __blk_queue_bounce(q, bio_orig, pool); | 290 | __blk_queue_bounce(q, bio_orig, pool); |
285 | } | 291 | } |
286 | 292 | ||
287 | EXPORT_SYMBOL(blk_queue_bounce); | 293 | EXPORT_SYMBOL(blk_queue_bounce); |
288 | 294 |