Commit 64100099ed22f71cce656c5c2caecf5c9cf255dc

Authored by Arjan van de Ven
Committed by Jens Axboe
1 parent 80cfd548ee

[BLOCK] mark some block/ variables cons

the patch below marks various read-only variables in block/* as const,
so that gcc can optimize the use of them; eg gcc will replace the use by
the value directly now and will even remove the memory usage of these.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Jens Axboe <axboe@suse.de>

Showing 4 changed files with 14 additions and 14 deletions Inline Diff

1 /* 1 /*
2 * CFQ, or complete fairness queueing, disk scheduler. 2 * CFQ, or complete fairness queueing, disk scheduler.
3 * 3 *
4 * Based on ideas from a previously unfinished io 4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 * 6 *
7 * Copyright (C) 2003 Jens Axboe <axboe@suse.de> 7 * Copyright (C) 2003 Jens Axboe <axboe@suse.de>
8 */ 8 */
9 #include <linux/kernel.h> 9 #include <linux/kernel.h>
10 #include <linux/fs.h> 10 #include <linux/fs.h>
11 #include <linux/blkdev.h> 11 #include <linux/blkdev.h>
12 #include <linux/elevator.h> 12 #include <linux/elevator.h>
13 #include <linux/bio.h> 13 #include <linux/bio.h>
14 #include <linux/config.h> 14 #include <linux/config.h>
15 #include <linux/module.h> 15 #include <linux/module.h>
16 #include <linux/slab.h> 16 #include <linux/slab.h>
17 #include <linux/init.h> 17 #include <linux/init.h>
18 #include <linux/compiler.h> 18 #include <linux/compiler.h>
19 #include <linux/hash.h> 19 #include <linux/hash.h>
20 #include <linux/rbtree.h> 20 #include <linux/rbtree.h>
21 #include <linux/mempool.h> 21 #include <linux/mempool.h>
22 #include <linux/ioprio.h> 22 #include <linux/ioprio.h>
23 #include <linux/writeback.h> 23 #include <linux/writeback.h>
24 24
25 /* 25 /*
26 * tunables 26 * tunables
27 */ 27 */
28 static int cfq_quantum = 4; /* max queue in one round of service */ 28 static const int cfq_quantum = 4; /* max queue in one round of service */
29 static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ 29 static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
30 static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 30 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
31 static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 31 static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
32 static int cfq_back_penalty = 2; /* penalty of a backwards seek */ 32 static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
33 33
34 static int cfq_slice_sync = HZ / 10; 34 static const int cfq_slice_sync = HZ / 10;
35 static int cfq_slice_async = HZ / 25; 35 static int cfq_slice_async = HZ / 25;
36 static int cfq_slice_async_rq = 2; 36 static const int cfq_slice_async_rq = 2;
37 static int cfq_slice_idle = HZ / 100; 37 static int cfq_slice_idle = HZ / 100;
38 38
39 #define CFQ_IDLE_GRACE (HZ / 10) 39 #define CFQ_IDLE_GRACE (HZ / 10)
40 #define CFQ_SLICE_SCALE (5) 40 #define CFQ_SLICE_SCALE (5)
41 41
42 #define CFQ_KEY_ASYNC (0) 42 #define CFQ_KEY_ASYNC (0)
43 #define CFQ_KEY_ANY (0xffff) 43 #define CFQ_KEY_ANY (0xffff)
44 44
45 /* 45 /*
46 * disable queueing at the driver/hardware level 46 * disable queueing at the driver/hardware level
47 */ 47 */
48 static int cfq_max_depth = 2; 48 static const int cfq_max_depth = 2;
49 49
50 /* 50 /*
51 * for the hash of cfqq inside the cfqd 51 * for the hash of cfqq inside the cfqd
52 */ 52 */
53 #define CFQ_QHASH_SHIFT 6 53 #define CFQ_QHASH_SHIFT 6
54 #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) 54 #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
55 #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) 55 #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
56 56
57 /* 57 /*
58 * for the hash of crq inside the cfqq 58 * for the hash of crq inside the cfqq
59 */ 59 */
60 #define CFQ_MHASH_SHIFT 6 60 #define CFQ_MHASH_SHIFT 6
61 #define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) 61 #define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
62 #define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) 62 #define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
63 #define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT) 63 #define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
64 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 64 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
65 #define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) 65 #define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
66 66
67 #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) 67 #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
68 #define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) 68 #define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
69 69
70 #define RQ_DATA(rq) (rq)->elevator_private 70 #define RQ_DATA(rq) (rq)->elevator_private
71 71
72 /* 72 /*
73 * rb-tree defines 73 * rb-tree defines
74 */ 74 */
75 #define RB_NONE (2) 75 #define RB_NONE (2)
76 #define RB_EMPTY(node) ((node)->rb_node == NULL) 76 #define RB_EMPTY(node) ((node)->rb_node == NULL)
77 #define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE 77 #define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE
78 #define RB_CLEAR(node) do { \ 78 #define RB_CLEAR(node) do { \
79 (node)->rb_parent = NULL; \ 79 (node)->rb_parent = NULL; \
80 RB_CLEAR_COLOR((node)); \ 80 RB_CLEAR_COLOR((node)); \
81 (node)->rb_right = NULL; \ 81 (node)->rb_right = NULL; \
82 (node)->rb_left = NULL; \ 82 (node)->rb_left = NULL; \
83 } while (0) 83 } while (0)
84 #define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL) 84 #define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
85 #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) 85 #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
86 #define rq_rb_key(rq) (rq)->sector 86 #define rq_rb_key(rq) (rq)->sector
87 87
88 static kmem_cache_t *crq_pool; 88 static kmem_cache_t *crq_pool;
89 static kmem_cache_t *cfq_pool; 89 static kmem_cache_t *cfq_pool;
90 static kmem_cache_t *cfq_ioc_pool; 90 static kmem_cache_t *cfq_ioc_pool;
91 91
92 #define CFQ_PRIO_LISTS IOPRIO_BE_NR 92 #define CFQ_PRIO_LISTS IOPRIO_BE_NR
93 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 93 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
94 #define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) 94 #define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
95 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 95 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
96 96
97 #define ASYNC (0) 97 #define ASYNC (0)
98 #define SYNC (1) 98 #define SYNC (1)
99 99
100 #define cfq_cfqq_dispatched(cfqq) \ 100 #define cfq_cfqq_dispatched(cfqq) \
101 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC]) 101 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])
102 102
103 #define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) 103 #define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)
104 104
105 #define cfq_cfqq_sync(cfqq) \ 105 #define cfq_cfqq_sync(cfqq) \
106 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) 106 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
107 107
108 /* 108 /*
109 * Per block device queue structure 109 * Per block device queue structure
110 */ 110 */
111 struct cfq_data { 111 struct cfq_data {
112 atomic_t ref; 112 atomic_t ref;
113 request_queue_t *queue; 113 request_queue_t *queue;
114 114
115 /* 115 /*
116 * rr list of queues with requests and the count of them 116 * rr list of queues with requests and the count of them
117 */ 117 */
118 struct list_head rr_list[CFQ_PRIO_LISTS]; 118 struct list_head rr_list[CFQ_PRIO_LISTS];
119 struct list_head busy_rr; 119 struct list_head busy_rr;
120 struct list_head cur_rr; 120 struct list_head cur_rr;
121 struct list_head idle_rr; 121 struct list_head idle_rr;
122 unsigned int busy_queues; 122 unsigned int busy_queues;
123 123
124 /* 124 /*
125 * non-ordered list of empty cfqq's 125 * non-ordered list of empty cfqq's
126 */ 126 */
127 struct list_head empty_list; 127 struct list_head empty_list;
128 128
129 /* 129 /*
130 * cfqq lookup hash 130 * cfqq lookup hash
131 */ 131 */
132 struct hlist_head *cfq_hash; 132 struct hlist_head *cfq_hash;
133 133
134 /* 134 /*
135 * global crq hash for all queues 135 * global crq hash for all queues
136 */ 136 */
137 struct hlist_head *crq_hash; 137 struct hlist_head *crq_hash;
138 138
139 unsigned int max_queued; 139 unsigned int max_queued;
140 140
141 mempool_t *crq_pool; 141 mempool_t *crq_pool;
142 142
143 int rq_in_driver; 143 int rq_in_driver;
144 144
145 /* 145 /*
146 * schedule slice state info 146 * schedule slice state info
147 */ 147 */
148 /* 148 /*
149 * idle window management 149 * idle window management
150 */ 150 */
151 struct timer_list idle_slice_timer; 151 struct timer_list idle_slice_timer;
152 struct work_struct unplug_work; 152 struct work_struct unplug_work;
153 153
154 struct cfq_queue *active_queue; 154 struct cfq_queue *active_queue;
155 struct cfq_io_context *active_cic; 155 struct cfq_io_context *active_cic;
156 int cur_prio, cur_end_prio; 156 int cur_prio, cur_end_prio;
157 unsigned int dispatch_slice; 157 unsigned int dispatch_slice;
158 158
159 struct timer_list idle_class_timer; 159 struct timer_list idle_class_timer;
160 160
161 sector_t last_sector; 161 sector_t last_sector;
162 unsigned long last_end_request; 162 unsigned long last_end_request;
163 163
164 unsigned int rq_starved; 164 unsigned int rq_starved;
165 165
166 /* 166 /*
167 * tunables, see top of file 167 * tunables, see top of file
168 */ 168 */
169 unsigned int cfq_quantum; 169 unsigned int cfq_quantum;
170 unsigned int cfq_queued; 170 unsigned int cfq_queued;
171 unsigned int cfq_fifo_expire[2]; 171 unsigned int cfq_fifo_expire[2];
172 unsigned int cfq_back_penalty; 172 unsigned int cfq_back_penalty;
173 unsigned int cfq_back_max; 173 unsigned int cfq_back_max;
174 unsigned int cfq_slice[2]; 174 unsigned int cfq_slice[2];
175 unsigned int cfq_slice_async_rq; 175 unsigned int cfq_slice_async_rq;
176 unsigned int cfq_slice_idle; 176 unsigned int cfq_slice_idle;
177 unsigned int cfq_max_depth; 177 unsigned int cfq_max_depth;
178 }; 178 };
179 179
180 /* 180 /*
181 * Per process-grouping structure 181 * Per process-grouping structure
182 */ 182 */
183 struct cfq_queue { 183 struct cfq_queue {
184 /* reference count */ 184 /* reference count */
185 atomic_t ref; 185 atomic_t ref;
186 /* parent cfq_data */ 186 /* parent cfq_data */
187 struct cfq_data *cfqd; 187 struct cfq_data *cfqd;
188 /* cfqq lookup hash */ 188 /* cfqq lookup hash */
189 struct hlist_node cfq_hash; 189 struct hlist_node cfq_hash;
190 /* hash key */ 190 /* hash key */
191 unsigned int key; 191 unsigned int key;
192 /* on either rr or empty list of cfqd */ 192 /* on either rr or empty list of cfqd */
193 struct list_head cfq_list; 193 struct list_head cfq_list;
194 /* sorted list of pending requests */ 194 /* sorted list of pending requests */
195 struct rb_root sort_list; 195 struct rb_root sort_list;
196 /* if fifo isn't expired, next request to serve */ 196 /* if fifo isn't expired, next request to serve */
197 struct cfq_rq *next_crq; 197 struct cfq_rq *next_crq;
198 /* requests queued in sort_list */ 198 /* requests queued in sort_list */
199 int queued[2]; 199 int queued[2];
200 /* currently allocated requests */ 200 /* currently allocated requests */
201 int allocated[2]; 201 int allocated[2];
202 /* fifo list of requests in sort_list */ 202 /* fifo list of requests in sort_list */
203 struct list_head fifo; 203 struct list_head fifo;
204 204
205 unsigned long slice_start; 205 unsigned long slice_start;
206 unsigned long slice_end; 206 unsigned long slice_end;
207 unsigned long slice_left; 207 unsigned long slice_left;
208 unsigned long service_last; 208 unsigned long service_last;
209 209
210 /* number of requests that are on the dispatch list */ 210 /* number of requests that are on the dispatch list */
211 int on_dispatch[2]; 211 int on_dispatch[2];
212 212
213 /* io prio of this group */ 213 /* io prio of this group */
214 unsigned short ioprio, org_ioprio; 214 unsigned short ioprio, org_ioprio;
215 unsigned short ioprio_class, org_ioprio_class; 215 unsigned short ioprio_class, org_ioprio_class;
216 216
217 /* various state flags, see below */ 217 /* various state flags, see below */
218 unsigned int flags; 218 unsigned int flags;
219 }; 219 };
220 220
221 struct cfq_rq { 221 struct cfq_rq {
222 struct rb_node rb_node; 222 struct rb_node rb_node;
223 sector_t rb_key; 223 sector_t rb_key;
224 struct request *request; 224 struct request *request;
225 struct hlist_node hash; 225 struct hlist_node hash;
226 226
227 struct cfq_queue *cfq_queue; 227 struct cfq_queue *cfq_queue;
228 struct cfq_io_context *io_context; 228 struct cfq_io_context *io_context;
229 229
230 unsigned int crq_flags; 230 unsigned int crq_flags;
231 }; 231 };
232 232
233 enum cfqq_state_flags { 233 enum cfqq_state_flags {
234 CFQ_CFQQ_FLAG_on_rr = 0, 234 CFQ_CFQQ_FLAG_on_rr = 0,
235 CFQ_CFQQ_FLAG_wait_request, 235 CFQ_CFQQ_FLAG_wait_request,
236 CFQ_CFQQ_FLAG_must_alloc, 236 CFQ_CFQQ_FLAG_must_alloc,
237 CFQ_CFQQ_FLAG_must_alloc_slice, 237 CFQ_CFQQ_FLAG_must_alloc_slice,
238 CFQ_CFQQ_FLAG_must_dispatch, 238 CFQ_CFQQ_FLAG_must_dispatch,
239 CFQ_CFQQ_FLAG_fifo_expire, 239 CFQ_CFQQ_FLAG_fifo_expire,
240 CFQ_CFQQ_FLAG_idle_window, 240 CFQ_CFQQ_FLAG_idle_window,
241 CFQ_CFQQ_FLAG_prio_changed, 241 CFQ_CFQQ_FLAG_prio_changed,
242 CFQ_CFQQ_FLAG_expired, 242 CFQ_CFQQ_FLAG_expired,
243 }; 243 };
244 244
245 #define CFQ_CFQQ_FNS(name) \ 245 #define CFQ_CFQQ_FNS(name) \
246 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ 246 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
247 { \ 247 { \
248 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ 248 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
249 } \ 249 } \
250 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ 250 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
251 { \ 251 { \
252 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ 252 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
253 } \ 253 } \
254 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ 254 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
255 { \ 255 { \
256 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ 256 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
257 } 257 }
258 258
259 CFQ_CFQQ_FNS(on_rr); 259 CFQ_CFQQ_FNS(on_rr);
260 CFQ_CFQQ_FNS(wait_request); 260 CFQ_CFQQ_FNS(wait_request);
261 CFQ_CFQQ_FNS(must_alloc); 261 CFQ_CFQQ_FNS(must_alloc);
262 CFQ_CFQQ_FNS(must_alloc_slice); 262 CFQ_CFQQ_FNS(must_alloc_slice);
263 CFQ_CFQQ_FNS(must_dispatch); 263 CFQ_CFQQ_FNS(must_dispatch);
264 CFQ_CFQQ_FNS(fifo_expire); 264 CFQ_CFQQ_FNS(fifo_expire);
265 CFQ_CFQQ_FNS(idle_window); 265 CFQ_CFQQ_FNS(idle_window);
266 CFQ_CFQQ_FNS(prio_changed); 266 CFQ_CFQQ_FNS(prio_changed);
267 CFQ_CFQQ_FNS(expired); 267 CFQ_CFQQ_FNS(expired);
268 #undef CFQ_CFQQ_FNS 268 #undef CFQ_CFQQ_FNS
269 269
270 enum cfq_rq_state_flags { 270 enum cfq_rq_state_flags {
271 CFQ_CRQ_FLAG_is_sync = 0, 271 CFQ_CRQ_FLAG_is_sync = 0,
272 }; 272 };
273 273
274 #define CFQ_CRQ_FNS(name) \ 274 #define CFQ_CRQ_FNS(name) \
275 static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \ 275 static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \
276 { \ 276 { \
277 crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \ 277 crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \
278 } \ 278 } \
279 static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \ 279 static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \
280 { \ 280 { \
281 crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \ 281 crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \
282 } \ 282 } \
283 static inline int cfq_crq_##name(const struct cfq_rq *crq) \ 283 static inline int cfq_crq_##name(const struct cfq_rq *crq) \
284 { \ 284 { \
285 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ 285 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \
286 } 286 }
287 287
288 CFQ_CRQ_FNS(is_sync); 288 CFQ_CRQ_FNS(is_sync);
289 #undef CFQ_CRQ_FNS 289 #undef CFQ_CRQ_FNS
290 290
291 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); 291 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
292 static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); 292 static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
293 static void cfq_put_cfqd(struct cfq_data *cfqd); 293 static void cfq_put_cfqd(struct cfq_data *cfqd);
294 294
295 #define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) 295 #define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)
296 296
297 /* 297 /*
298 * lots of deadline iosched dupes, can be abstracted later... 298 * lots of deadline iosched dupes, can be abstracted later...
299 */ 299 */
300 static inline void cfq_del_crq_hash(struct cfq_rq *crq) 300 static inline void cfq_del_crq_hash(struct cfq_rq *crq)
301 { 301 {
302 hlist_del_init(&crq->hash); 302 hlist_del_init(&crq->hash);
303 } 303 }
304 304
305 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) 305 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
306 { 306 {
307 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); 307 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
308 308
309 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); 309 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
310 } 310 }
311 311
312 static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) 312 static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
313 { 313 {
314 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; 314 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
315 struct hlist_node *entry, *next; 315 struct hlist_node *entry, *next;
316 316
317 hlist_for_each_safe(entry, next, hash_list) { 317 hlist_for_each_safe(entry, next, hash_list) {
318 struct cfq_rq *crq = list_entry_hash(entry); 318 struct cfq_rq *crq = list_entry_hash(entry);
319 struct request *__rq = crq->request; 319 struct request *__rq = crq->request;
320 320
321 if (!rq_mergeable(__rq)) { 321 if (!rq_mergeable(__rq)) {
322 cfq_del_crq_hash(crq); 322 cfq_del_crq_hash(crq);
323 continue; 323 continue;
324 } 324 }
325 325
326 if (rq_hash_key(__rq) == offset) 326 if (rq_hash_key(__rq) == offset)
327 return __rq; 327 return __rq;
328 } 328 }
329 329
330 return NULL; 330 return NULL;
331 } 331 }
332 332
333 /* 333 /*
334 * scheduler run of queue, if there are requests pending and no one in the 334 * scheduler run of queue, if there are requests pending and no one in the
335 * driver that will restart queueing 335 * driver that will restart queueing
336 */ 336 */
337 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 337 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
338 { 338 {
339 if (!cfqd->rq_in_driver && cfqd->busy_queues) 339 if (!cfqd->rq_in_driver && cfqd->busy_queues)
340 kblockd_schedule_work(&cfqd->unplug_work); 340 kblockd_schedule_work(&cfqd->unplug_work);
341 } 341 }
342 342
343 static int cfq_queue_empty(request_queue_t *q) 343 static int cfq_queue_empty(request_queue_t *q)
344 { 344 {
345 struct cfq_data *cfqd = q->elevator->elevator_data; 345 struct cfq_data *cfqd = q->elevator->elevator_data;
346 346
347 return !cfqd->busy_queues; 347 return !cfqd->busy_queues;
348 } 348 }
349 349
350 /* 350 /*
351 * Lifted from AS - choose which of crq1 and crq2 that is best served now. 351 * Lifted from AS - choose which of crq1 and crq2 that is best served now.
352 * We choose the request that is closest to the head right now. Distance 352 * We choose the request that is closest to the head right now. Distance
353 * behind the head are penalized and only allowed to a certain extent. 353 * behind the head are penalized and only allowed to a certain extent.
354 */ 354 */
355 static struct cfq_rq * 355 static struct cfq_rq *
356 cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) 356 cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
357 { 357 {
358 sector_t last, s1, s2, d1 = 0, d2 = 0; 358 sector_t last, s1, s2, d1 = 0, d2 = 0;
359 int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */ 359 int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */
360 unsigned long back_max; 360 unsigned long back_max;
361 361
362 if (crq1 == NULL || crq1 == crq2) 362 if (crq1 == NULL || crq1 == crq2)
363 return crq2; 363 return crq2;
364 if (crq2 == NULL) 364 if (crq2 == NULL)
365 return crq1; 365 return crq1;
366 366
367 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) 367 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
368 return crq1; 368 return crq1;
369 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) 369 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
370 return crq2; 370 return crq2;
371 371
372 s1 = crq1->request->sector; 372 s1 = crq1->request->sector;
373 s2 = crq2->request->sector; 373 s2 = crq2->request->sector;
374 374
375 last = cfqd->last_sector; 375 last = cfqd->last_sector;
376 376
377 /* 377 /*
378 * by definition, 1KiB is 2 sectors 378 * by definition, 1KiB is 2 sectors
379 */ 379 */
380 back_max = cfqd->cfq_back_max * 2; 380 back_max = cfqd->cfq_back_max * 2;
381 381
382 /* 382 /*
383 * Strict one way elevator _except_ in the case where we allow 383 * Strict one way elevator _except_ in the case where we allow
384 * short backward seeks which are biased as twice the cost of a 384 * short backward seeks which are biased as twice the cost of a
385 * similar forward seek. 385 * similar forward seek.
386 */ 386 */
387 if (s1 >= last) 387 if (s1 >= last)
388 d1 = s1 - last; 388 d1 = s1 - last;
389 else if (s1 + back_max >= last) 389 else if (s1 + back_max >= last)
390 d1 = (last - s1) * cfqd->cfq_back_penalty; 390 d1 = (last - s1) * cfqd->cfq_back_penalty;
391 else 391 else
392 r1_wrap = 1; 392 r1_wrap = 1;
393 393
394 if (s2 >= last) 394 if (s2 >= last)
395 d2 = s2 - last; 395 d2 = s2 - last;
396 else if (s2 + back_max >= last) 396 else if (s2 + back_max >= last)
397 d2 = (last - s2) * cfqd->cfq_back_penalty; 397 d2 = (last - s2) * cfqd->cfq_back_penalty;
398 else 398 else
399 r2_wrap = 1; 399 r2_wrap = 1;
400 400
401 /* Found required data */ 401 /* Found required data */
402 if (!r1_wrap && r2_wrap) 402 if (!r1_wrap && r2_wrap)
403 return crq1; 403 return crq1;
404 else if (!r2_wrap && r1_wrap) 404 else if (!r2_wrap && r1_wrap)
405 return crq2; 405 return crq2;
406 else if (r1_wrap && r2_wrap) { 406 else if (r1_wrap && r2_wrap) {
407 /* both behind the head */ 407 /* both behind the head */
408 if (s1 <= s2) 408 if (s1 <= s2)
409 return crq1; 409 return crq1;
410 else 410 else
411 return crq2; 411 return crq2;
412 } 412 }
413 413
414 /* Both requests in front of the head */ 414 /* Both requests in front of the head */
415 if (d1 < d2) 415 if (d1 < d2)
416 return crq1; 416 return crq1;
417 else if (d2 < d1) 417 else if (d2 < d1)
418 return crq2; 418 return crq2;
419 else { 419 else {
420 if (s1 >= s2) 420 if (s1 >= s2)
421 return crq1; 421 return crq1;
422 else 422 else
423 return crq2; 423 return crq2;
424 } 424 }
425 } 425 }
426 426
427 /* 427 /*
428 * would be nice to take fifo expire time into account as well 428 * would be nice to take fifo expire time into account as well
429 */ 429 */
430 static struct cfq_rq * 430 static struct cfq_rq *
431 cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 431 cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
432 struct cfq_rq *last) 432 struct cfq_rq *last)
433 { 433 {
434 struct cfq_rq *crq_next = NULL, *crq_prev = NULL; 434 struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
435 struct rb_node *rbnext, *rbprev; 435 struct rb_node *rbnext, *rbprev;
436 436
437 if (!(rbnext = rb_next(&last->rb_node))) { 437 if (!(rbnext = rb_next(&last->rb_node))) {
438 rbnext = rb_first(&cfqq->sort_list); 438 rbnext = rb_first(&cfqq->sort_list);
439 if (rbnext == &last->rb_node) 439 if (rbnext == &last->rb_node)
440 rbnext = NULL; 440 rbnext = NULL;
441 } 441 }
442 442
443 rbprev = rb_prev(&last->rb_node); 443 rbprev = rb_prev(&last->rb_node);
444 444
445 if (rbprev) 445 if (rbprev)
446 crq_prev = rb_entry_crq(rbprev); 446 crq_prev = rb_entry_crq(rbprev);
447 if (rbnext) 447 if (rbnext)
448 crq_next = rb_entry_crq(rbnext); 448 crq_next = rb_entry_crq(rbnext);
449 449
450 return cfq_choose_req(cfqd, crq_next, crq_prev); 450 return cfq_choose_req(cfqd, crq_next, crq_prev);
451 } 451 }
452 452
453 static void cfq_update_next_crq(struct cfq_rq *crq) 453 static void cfq_update_next_crq(struct cfq_rq *crq)
454 { 454 {
455 struct cfq_queue *cfqq = crq->cfq_queue; 455 struct cfq_queue *cfqq = crq->cfq_queue;
456 456
457 if (cfqq->next_crq == crq) 457 if (cfqq->next_crq == crq)
458 cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); 458 cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
459 } 459 }
460 460
461 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) 461 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
462 { 462 {
463 struct cfq_data *cfqd = cfqq->cfqd; 463 struct cfq_data *cfqd = cfqq->cfqd;
464 struct list_head *list, *entry; 464 struct list_head *list, *entry;
465 465
466 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 466 BUG_ON(!cfq_cfqq_on_rr(cfqq));
467 467
468 list_del(&cfqq->cfq_list); 468 list_del(&cfqq->cfq_list);
469 469
470 if (cfq_class_rt(cfqq)) 470 if (cfq_class_rt(cfqq))
471 list = &cfqd->cur_rr; 471 list = &cfqd->cur_rr;
472 else if (cfq_class_idle(cfqq)) 472 else if (cfq_class_idle(cfqq))
473 list = &cfqd->idle_rr; 473 list = &cfqd->idle_rr;
474 else { 474 else {
475 /* 475 /*
476 * if cfqq has requests in flight, don't allow it to be 476 * if cfqq has requests in flight, don't allow it to be
477 * found in cfq_set_active_queue before it has finished them. 477 * found in cfq_set_active_queue before it has finished them.
478 * this is done to increase fairness between a process that 478 * this is done to increase fairness between a process that
479 * has lots of io pending vs one that only generates one 479 * has lots of io pending vs one that only generates one
480 * sporadically or synchronously 480 * sporadically or synchronously
481 */ 481 */
482 if (cfq_cfqq_dispatched(cfqq)) 482 if (cfq_cfqq_dispatched(cfqq))
483 list = &cfqd->busy_rr; 483 list = &cfqd->busy_rr;
484 else 484 else
485 list = &cfqd->rr_list[cfqq->ioprio]; 485 list = &cfqd->rr_list[cfqq->ioprio];
486 } 486 }
487 487
488 /* 488 /*
489 * if queue was preempted, just add to front to be fair. busy_rr 489 * if queue was preempted, just add to front to be fair. busy_rr
490 * isn't sorted. 490 * isn't sorted.
491 */ 491 */
492 if (preempted || list == &cfqd->busy_rr) { 492 if (preempted || list == &cfqd->busy_rr) {
493 list_add(&cfqq->cfq_list, list); 493 list_add(&cfqq->cfq_list, list);
494 return; 494 return;
495 } 495 }
496 496
497 /* 497 /*
498 * sort by when queue was last serviced 498 * sort by when queue was last serviced
499 */ 499 */
500 entry = list; 500 entry = list;
501 while ((entry = entry->prev) != list) { 501 while ((entry = entry->prev) != list) {
502 struct cfq_queue *__cfqq = list_entry_cfqq(entry); 502 struct cfq_queue *__cfqq = list_entry_cfqq(entry);
503 503
504 if (!__cfqq->service_last) 504 if (!__cfqq->service_last)
505 break; 505 break;
506 if (time_before(__cfqq->service_last, cfqq->service_last)) 506 if (time_before(__cfqq->service_last, cfqq->service_last))
507 break; 507 break;
508 } 508 }
509 509
510 list_add(&cfqq->cfq_list, entry); 510 list_add(&cfqq->cfq_list, entry);
511 } 511 }
512 512
513 /* 513 /*
514 * add to busy list of queues for service, trying to be fair in ordering 514 * add to busy list of queues for service, trying to be fair in ordering
515 * the pending list according to last request service 515 * the pending list according to last request service
516 */ 516 */
517 static inline void 517 static inline void
518 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 518 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
519 { 519 {
520 BUG_ON(cfq_cfqq_on_rr(cfqq)); 520 BUG_ON(cfq_cfqq_on_rr(cfqq));
521 cfq_mark_cfqq_on_rr(cfqq); 521 cfq_mark_cfqq_on_rr(cfqq);
522 cfqd->busy_queues++; 522 cfqd->busy_queues++;
523 523
524 cfq_resort_rr_list(cfqq, 0); 524 cfq_resort_rr_list(cfqq, 0);
525 } 525 }
526 526
527 static inline void 527 static inline void
528 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 528 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
529 { 529 {
530 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 530 BUG_ON(!cfq_cfqq_on_rr(cfqq));
531 cfq_clear_cfqq_on_rr(cfqq); 531 cfq_clear_cfqq_on_rr(cfqq);
532 list_move(&cfqq->cfq_list, &cfqd->empty_list); 532 list_move(&cfqq->cfq_list, &cfqd->empty_list);
533 533
534 BUG_ON(!cfqd->busy_queues); 534 BUG_ON(!cfqd->busy_queues);
535 cfqd->busy_queues--; 535 cfqd->busy_queues--;
536 } 536 }
537 537
538 /* 538 /*
539 * rb tree support functions 539 * rb tree support functions
540 */ 540 */
541 static inline void cfq_del_crq_rb(struct cfq_rq *crq) 541 static inline void cfq_del_crq_rb(struct cfq_rq *crq)
542 { 542 {
543 struct cfq_queue *cfqq = crq->cfq_queue; 543 struct cfq_queue *cfqq = crq->cfq_queue;
544 struct cfq_data *cfqd = cfqq->cfqd; 544 struct cfq_data *cfqd = cfqq->cfqd;
545 const int sync = cfq_crq_is_sync(crq); 545 const int sync = cfq_crq_is_sync(crq);
546 546
547 BUG_ON(!cfqq->queued[sync]); 547 BUG_ON(!cfqq->queued[sync]);
548 cfqq->queued[sync]--; 548 cfqq->queued[sync]--;
549 549
550 cfq_update_next_crq(crq); 550 cfq_update_next_crq(crq);
551 551
552 rb_erase(&crq->rb_node, &cfqq->sort_list); 552 rb_erase(&crq->rb_node, &cfqq->sort_list);
553 RB_CLEAR_COLOR(&crq->rb_node); 553 RB_CLEAR_COLOR(&crq->rb_node);
554 554
555 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list)) 555 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
556 cfq_del_cfqq_rr(cfqd, cfqq); 556 cfq_del_cfqq_rr(cfqd, cfqq);
557 } 557 }
558 558
559 static struct cfq_rq * 559 static struct cfq_rq *
560 __cfq_add_crq_rb(struct cfq_rq *crq) 560 __cfq_add_crq_rb(struct cfq_rq *crq)
561 { 561 {
562 struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; 562 struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;
563 struct rb_node *parent = NULL; 563 struct rb_node *parent = NULL;
564 struct cfq_rq *__crq; 564 struct cfq_rq *__crq;
565 565
566 while (*p) { 566 while (*p) {
567 parent = *p; 567 parent = *p;
568 __crq = rb_entry_crq(parent); 568 __crq = rb_entry_crq(parent);
569 569
570 if (crq->rb_key < __crq->rb_key) 570 if (crq->rb_key < __crq->rb_key)
571 p = &(*p)->rb_left; 571 p = &(*p)->rb_left;
572 else if (crq->rb_key > __crq->rb_key) 572 else if (crq->rb_key > __crq->rb_key)
573 p = &(*p)->rb_right; 573 p = &(*p)->rb_right;
574 else 574 else
575 return __crq; 575 return __crq;
576 } 576 }
577 577
578 rb_link_node(&crq->rb_node, parent, p); 578 rb_link_node(&crq->rb_node, parent, p);
579 return NULL; 579 return NULL;
580 } 580 }
581 581
582 static void cfq_add_crq_rb(struct cfq_rq *crq) 582 static void cfq_add_crq_rb(struct cfq_rq *crq)
583 { 583 {
584 struct cfq_queue *cfqq = crq->cfq_queue; 584 struct cfq_queue *cfqq = crq->cfq_queue;
585 struct cfq_data *cfqd = cfqq->cfqd; 585 struct cfq_data *cfqd = cfqq->cfqd;
586 struct request *rq = crq->request; 586 struct request *rq = crq->request;
587 struct cfq_rq *__alias; 587 struct cfq_rq *__alias;
588 588
589 crq->rb_key = rq_rb_key(rq); 589 crq->rb_key = rq_rb_key(rq);
590 cfqq->queued[cfq_crq_is_sync(crq)]++; 590 cfqq->queued[cfq_crq_is_sync(crq)]++;
591 591
592 /* 592 /*
593 * looks a little odd, but the first insert might return an alias. 593 * looks a little odd, but the first insert might return an alias.
594 * if that happens, put the alias on the dispatch list 594 * if that happens, put the alias on the dispatch list
595 */ 595 */
596 while ((__alias = __cfq_add_crq_rb(crq)) != NULL) 596 while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
597 cfq_dispatch_insert(cfqd->queue, __alias); 597 cfq_dispatch_insert(cfqd->queue, __alias);
598 598
599 rb_insert_color(&crq->rb_node, &cfqq->sort_list); 599 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
600 600
601 if (!cfq_cfqq_on_rr(cfqq)) 601 if (!cfq_cfqq_on_rr(cfqq))
602 cfq_add_cfqq_rr(cfqd, cfqq); 602 cfq_add_cfqq_rr(cfqd, cfqq);
603 603
604 /* 604 /*
605 * check if this request is a better next-serve candidate 605 * check if this request is a better next-serve candidate
606 */ 606 */
607 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 607 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
608 } 608 }
609 609
610 static inline void 610 static inline void
611 cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) 611 cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
612 { 612 {
613 rb_erase(&crq->rb_node, &cfqq->sort_list); 613 rb_erase(&crq->rb_node, &cfqq->sort_list);
614 cfqq->queued[cfq_crq_is_sync(crq)]--; 614 cfqq->queued[cfq_crq_is_sync(crq)]--;
615 615
616 cfq_add_crq_rb(crq); 616 cfq_add_crq_rb(crq);
617 } 617 }
618 618
619 static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) 619 static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
620 620
621 { 621 {
622 struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY); 622 struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY);
623 struct rb_node *n; 623 struct rb_node *n;
624 624
625 if (!cfqq) 625 if (!cfqq)
626 goto out; 626 goto out;
627 627
628 n = cfqq->sort_list.rb_node; 628 n = cfqq->sort_list.rb_node;
629 while (n) { 629 while (n) {
630 struct cfq_rq *crq = rb_entry_crq(n); 630 struct cfq_rq *crq = rb_entry_crq(n);
631 631
632 if (sector < crq->rb_key) 632 if (sector < crq->rb_key)
633 n = n->rb_left; 633 n = n->rb_left;
634 else if (sector > crq->rb_key) 634 else if (sector > crq->rb_key)
635 n = n->rb_right; 635 n = n->rb_right;
636 else 636 else
637 return crq->request; 637 return crq->request;
638 } 638 }
639 639
640 out: 640 out:
641 return NULL; 641 return NULL;
642 } 642 }
643 643
644 static void cfq_activate_request(request_queue_t *q, struct request *rq) 644 static void cfq_activate_request(request_queue_t *q, struct request *rq)
645 { 645 {
646 struct cfq_data *cfqd = q->elevator->elevator_data; 646 struct cfq_data *cfqd = q->elevator->elevator_data;
647 647
648 cfqd->rq_in_driver++; 648 cfqd->rq_in_driver++;
649 } 649 }
650 650
651 static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 651 static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
652 { 652 {
653 struct cfq_data *cfqd = q->elevator->elevator_data; 653 struct cfq_data *cfqd = q->elevator->elevator_data;
654 654
655 WARN_ON(!cfqd->rq_in_driver); 655 WARN_ON(!cfqd->rq_in_driver);
656 cfqd->rq_in_driver--; 656 cfqd->rq_in_driver--;
657 } 657 }
658 658
659 static void cfq_remove_request(struct request *rq) 659 static void cfq_remove_request(struct request *rq)
660 { 660 {
661 struct cfq_rq *crq = RQ_DATA(rq); 661 struct cfq_rq *crq = RQ_DATA(rq);
662 662
663 list_del_init(&rq->queuelist); 663 list_del_init(&rq->queuelist);
664 cfq_del_crq_rb(crq); 664 cfq_del_crq_rb(crq);
665 cfq_del_crq_hash(crq); 665 cfq_del_crq_hash(crq);
666 } 666 }
667 667
668 static int 668 static int
669 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) 669 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
670 { 670 {
671 struct cfq_data *cfqd = q->elevator->elevator_data; 671 struct cfq_data *cfqd = q->elevator->elevator_data;
672 struct request *__rq; 672 struct request *__rq;
673 int ret; 673 int ret;
674 674
675 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); 675 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
676 if (__rq && elv_rq_merge_ok(__rq, bio)) { 676 if (__rq && elv_rq_merge_ok(__rq, bio)) {
677 ret = ELEVATOR_BACK_MERGE; 677 ret = ELEVATOR_BACK_MERGE;
678 goto out; 678 goto out;
679 } 679 }
680 680
681 __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio)); 681 __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
682 if (__rq && elv_rq_merge_ok(__rq, bio)) { 682 if (__rq && elv_rq_merge_ok(__rq, bio)) {
683 ret = ELEVATOR_FRONT_MERGE; 683 ret = ELEVATOR_FRONT_MERGE;
684 goto out; 684 goto out;
685 } 685 }
686 686
687 return ELEVATOR_NO_MERGE; 687 return ELEVATOR_NO_MERGE;
688 out: 688 out:
689 *req = __rq; 689 *req = __rq;
690 return ret; 690 return ret;
691 } 691 }
692 692
693 static void cfq_merged_request(request_queue_t *q, struct request *req) 693 static void cfq_merged_request(request_queue_t *q, struct request *req)
694 { 694 {
695 struct cfq_data *cfqd = q->elevator->elevator_data; 695 struct cfq_data *cfqd = q->elevator->elevator_data;
696 struct cfq_rq *crq = RQ_DATA(req); 696 struct cfq_rq *crq = RQ_DATA(req);
697 697
698 cfq_del_crq_hash(crq); 698 cfq_del_crq_hash(crq);
699 cfq_add_crq_hash(cfqd, crq); 699 cfq_add_crq_hash(cfqd, crq);
700 700
701 if (rq_rb_key(req) != crq->rb_key) { 701 if (rq_rb_key(req) != crq->rb_key) {
702 struct cfq_queue *cfqq = crq->cfq_queue; 702 struct cfq_queue *cfqq = crq->cfq_queue;
703 703
704 cfq_update_next_crq(crq); 704 cfq_update_next_crq(crq);
705 cfq_reposition_crq_rb(cfqq, crq); 705 cfq_reposition_crq_rb(cfqq, crq);
706 } 706 }
707 } 707 }
708 708
709 static void 709 static void
710 cfq_merged_requests(request_queue_t *q, struct request *rq, 710 cfq_merged_requests(request_queue_t *q, struct request *rq,
711 struct request *next) 711 struct request *next)
712 { 712 {
713 cfq_merged_request(q, rq); 713 cfq_merged_request(q, rq);
714 714
715 /* 715 /*
716 * reposition in fifo if next is older than rq 716 * reposition in fifo if next is older than rq
717 */ 717 */
718 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 718 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
719 time_before(next->start_time, rq->start_time)) 719 time_before(next->start_time, rq->start_time))
720 list_move(&rq->queuelist, &next->queuelist); 720 list_move(&rq->queuelist, &next->queuelist);
721 721
722 cfq_remove_request(next); 722 cfq_remove_request(next);
723 } 723 }
724 724
725 static inline void 725 static inline void
726 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 726 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
727 { 727 {
728 if (cfqq) { 728 if (cfqq) {
729 /* 729 /*
730 * stop potential idle class queues waiting service 730 * stop potential idle class queues waiting service
731 */ 731 */
732 del_timer(&cfqd->idle_class_timer); 732 del_timer(&cfqd->idle_class_timer);
733 733
734 cfqq->slice_start = jiffies; 734 cfqq->slice_start = jiffies;
735 cfqq->slice_end = 0; 735 cfqq->slice_end = 0;
736 cfqq->slice_left = 0; 736 cfqq->slice_left = 0;
737 cfq_clear_cfqq_must_alloc_slice(cfqq); 737 cfq_clear_cfqq_must_alloc_slice(cfqq);
738 cfq_clear_cfqq_fifo_expire(cfqq); 738 cfq_clear_cfqq_fifo_expire(cfqq);
739 cfq_clear_cfqq_expired(cfqq); 739 cfq_clear_cfqq_expired(cfqq);
740 } 740 }
741 741
742 cfqd->active_queue = cfqq; 742 cfqd->active_queue = cfqq;
743 } 743 }
744 744
745 /* 745 /*
746 * 0 746 * 0
747 * 0,1 747 * 0,1
748 * 0,1,2 748 * 0,1,2
749 * 0,1,2,3 749 * 0,1,2,3
750 * 0,1,2,3,4 750 * 0,1,2,3,4
751 * 0,1,2,3,4,5 751 * 0,1,2,3,4,5
752 * 0,1,2,3,4,5,6 752 * 0,1,2,3,4,5,6
753 * 0,1,2,3,4,5,6,7 753 * 0,1,2,3,4,5,6,7
754 */ 754 */
755 static int cfq_get_next_prio_level(struct cfq_data *cfqd) 755 static int cfq_get_next_prio_level(struct cfq_data *cfqd)
756 { 756 {
757 int prio, wrap; 757 int prio, wrap;
758 758
759 prio = -1; 759 prio = -1;
760 wrap = 0; 760 wrap = 0;
761 do { 761 do {
762 int p; 762 int p;
763 763
764 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { 764 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
765 if (!list_empty(&cfqd->rr_list[p])) { 765 if (!list_empty(&cfqd->rr_list[p])) {
766 prio = p; 766 prio = p;
767 break; 767 break;
768 } 768 }
769 } 769 }
770 770
771 if (prio != -1) 771 if (prio != -1)
772 break; 772 break;
773 cfqd->cur_prio = 0; 773 cfqd->cur_prio = 0;
774 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 774 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
775 cfqd->cur_end_prio = 0; 775 cfqd->cur_end_prio = 0;
776 if (wrap) 776 if (wrap)
777 break; 777 break;
778 wrap = 1; 778 wrap = 1;
779 } 779 }
780 } while (1); 780 } while (1);
781 781
782 if (unlikely(prio == -1)) 782 if (unlikely(prio == -1))
783 return -1; 783 return -1;
784 784
785 BUG_ON(prio >= CFQ_PRIO_LISTS); 785 BUG_ON(prio >= CFQ_PRIO_LISTS);
786 786
787 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); 787 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
788 788
789 cfqd->cur_prio = prio + 1; 789 cfqd->cur_prio = prio + 1;
790 if (cfqd->cur_prio > cfqd->cur_end_prio) { 790 if (cfqd->cur_prio > cfqd->cur_end_prio) {
791 cfqd->cur_end_prio = cfqd->cur_prio; 791 cfqd->cur_end_prio = cfqd->cur_prio;
792 cfqd->cur_prio = 0; 792 cfqd->cur_prio = 0;
793 } 793 }
794 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 794 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
795 cfqd->cur_prio = 0; 795 cfqd->cur_prio = 0;
796 cfqd->cur_end_prio = 0; 796 cfqd->cur_end_prio = 0;
797 } 797 }
798 798
799 return prio; 799 return prio;
800 } 800 }
801 801
802 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 802 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
803 { 803 {
804 struct cfq_queue *cfqq; 804 struct cfq_queue *cfqq;
805 805
806 /* 806 /*
807 * if current queue is expired but not done with its requests yet, 807 * if current queue is expired but not done with its requests yet,
808 * wait for that to happen 808 * wait for that to happen
809 */ 809 */
810 if ((cfqq = cfqd->active_queue) != NULL) { 810 if ((cfqq = cfqd->active_queue) != NULL) {
811 if (cfq_cfqq_expired(cfqq) && cfq_cfqq_dispatched(cfqq)) 811 if (cfq_cfqq_expired(cfqq) && cfq_cfqq_dispatched(cfqq))
812 return NULL; 812 return NULL;
813 } 813 }
814 814
815 /* 815 /*
816 * if current list is non-empty, grab first entry. if it is empty, 816 * if current list is non-empty, grab first entry. if it is empty,
817 * get next prio level and grab first entry then if any are spliced 817 * get next prio level and grab first entry then if any are spliced
818 */ 818 */
819 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) 819 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
820 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 820 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
821 821
822 /* 822 /*
823 * if we have idle queues and no rt or be queues had pending 823 * if we have idle queues and no rt or be queues had pending
824 * requests, either allow immediate service if the grace period 824 * requests, either allow immediate service if the grace period
825 * has passed or arm the idle grace timer 825 * has passed or arm the idle grace timer
826 */ 826 */
827 if (!cfqq && !list_empty(&cfqd->idle_rr)) { 827 if (!cfqq && !list_empty(&cfqd->idle_rr)) {
828 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; 828 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
829 829
830 if (time_after_eq(jiffies, end)) 830 if (time_after_eq(jiffies, end))
831 cfqq = list_entry_cfqq(cfqd->idle_rr.next); 831 cfqq = list_entry_cfqq(cfqd->idle_rr.next);
832 else 832 else
833 mod_timer(&cfqd->idle_class_timer, end); 833 mod_timer(&cfqd->idle_class_timer, end);
834 } 834 }
835 835
836 __cfq_set_active_queue(cfqd, cfqq); 836 __cfq_set_active_queue(cfqd, cfqq);
837 return cfqq; 837 return cfqq;
838 } 838 }
839 839
840 /* 840 /*
841 * current cfqq expired its slice (or was too idle), select new one 841 * current cfqq expired its slice (or was too idle), select new one
842 */ 842 */
843 static void 843 static void
844 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 844 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
845 int preempted) 845 int preempted)
846 { 846 {
847 unsigned long now = jiffies; 847 unsigned long now = jiffies;
848 848
849 if (cfq_cfqq_wait_request(cfqq)) 849 if (cfq_cfqq_wait_request(cfqq))
850 del_timer(&cfqd->idle_slice_timer); 850 del_timer(&cfqd->idle_slice_timer);
851 851
852 if (!preempted && !cfq_cfqq_dispatched(cfqq)) 852 if (!preempted && !cfq_cfqq_dispatched(cfqq))
853 cfqq->service_last = now; 853 cfqq->service_last = now;
854 854
855 cfq_clear_cfqq_must_dispatch(cfqq); 855 cfq_clear_cfqq_must_dispatch(cfqq);
856 cfq_clear_cfqq_wait_request(cfqq); 856 cfq_clear_cfqq_wait_request(cfqq);
857 857
858 /* 858 /*
859 * store what was left of this slice, if the queue idled out 859 * store what was left of this slice, if the queue idled out
860 * or was preempted 860 * or was preempted
861 */ 861 */
862 if (time_after(cfqq->slice_end, now)) 862 if (time_after(cfqq->slice_end, now))
863 cfqq->slice_left = cfqq->slice_end - now; 863 cfqq->slice_left = cfqq->slice_end - now;
864 else 864 else
865 cfqq->slice_left = 0; 865 cfqq->slice_left = 0;
866 866
867 if (cfq_cfqq_on_rr(cfqq)) 867 if (cfq_cfqq_on_rr(cfqq))
868 cfq_resort_rr_list(cfqq, preempted); 868 cfq_resort_rr_list(cfqq, preempted);
869 869
870 if (cfqq == cfqd->active_queue) 870 if (cfqq == cfqd->active_queue)
871 cfqd->active_queue = NULL; 871 cfqd->active_queue = NULL;
872 872
873 if (cfqd->active_cic) { 873 if (cfqd->active_cic) {
874 put_io_context(cfqd->active_cic->ioc); 874 put_io_context(cfqd->active_cic->ioc);
875 cfqd->active_cic = NULL; 875 cfqd->active_cic = NULL;
876 } 876 }
877 877
878 cfqd->dispatch_slice = 0; 878 cfqd->dispatch_slice = 0;
879 } 879 }
880 880
881 static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) 881 static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
882 { 882 {
883 struct cfq_queue *cfqq = cfqd->active_queue; 883 struct cfq_queue *cfqq = cfqd->active_queue;
884 884
885 if (cfqq) { 885 if (cfqq) {
886 /* 886 /*
887 * use deferred expiry, if there are requests in progress as 887 * use deferred expiry, if there are requests in progress as
888 * not to disturb the slice of the next queue 888 * not to disturb the slice of the next queue
889 */ 889 */
890 if (cfq_cfqq_dispatched(cfqq)) 890 if (cfq_cfqq_dispatched(cfqq))
891 cfq_mark_cfqq_expired(cfqq); 891 cfq_mark_cfqq_expired(cfqq);
892 else 892 else
893 __cfq_slice_expired(cfqd, cfqq, preempted); 893 __cfq_slice_expired(cfqd, cfqq, preempted);
894 } 894 }
895 } 895 }
896 896
897 static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) 897 static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
898 898
899 { 899 {
900 WARN_ON(!RB_EMPTY(&cfqq->sort_list)); 900 WARN_ON(!RB_EMPTY(&cfqq->sort_list));
901 WARN_ON(cfqq != cfqd->active_queue); 901 WARN_ON(cfqq != cfqd->active_queue);
902 902
903 /* 903 /*
904 * idle is disabled, either manually or by past process history 904 * idle is disabled, either manually or by past process history
905 */ 905 */
906 if (!cfqd->cfq_slice_idle) 906 if (!cfqd->cfq_slice_idle)
907 return 0; 907 return 0;
908 if (!cfq_cfqq_idle_window(cfqq)) 908 if (!cfq_cfqq_idle_window(cfqq))
909 return 0; 909 return 0;
910 /* 910 /*
911 * task has exited, don't wait 911 * task has exited, don't wait
912 */ 912 */
913 if (cfqd->active_cic && !cfqd->active_cic->ioc->task) 913 if (cfqd->active_cic && !cfqd->active_cic->ioc->task)
914 return 0; 914 return 0;
915 915
916 cfq_mark_cfqq_must_dispatch(cfqq); 916 cfq_mark_cfqq_must_dispatch(cfqq);
917 cfq_mark_cfqq_wait_request(cfqq); 917 cfq_mark_cfqq_wait_request(cfqq);
918 918
919 if (!timer_pending(&cfqd->idle_slice_timer)) { 919 if (!timer_pending(&cfqd->idle_slice_timer)) {
920 unsigned long slice_left = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); 920 unsigned long slice_left = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
921 921
922 cfqd->idle_slice_timer.expires = jiffies + slice_left; 922 cfqd->idle_slice_timer.expires = jiffies + slice_left;
923 add_timer(&cfqd->idle_slice_timer); 923 add_timer(&cfqd->idle_slice_timer);
924 } 924 }
925 925
926 return 1; 926 return 1;
927 } 927 }
928 928
929 static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) 929 static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
930 { 930 {
931 struct cfq_data *cfqd = q->elevator->elevator_data; 931 struct cfq_data *cfqd = q->elevator->elevator_data;
932 struct cfq_queue *cfqq = crq->cfq_queue; 932 struct cfq_queue *cfqq = crq->cfq_queue;
933 933
934 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); 934 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
935 cfq_remove_request(crq->request); 935 cfq_remove_request(crq->request);
936 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; 936 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
937 elv_dispatch_sort(q, crq->request); 937 elv_dispatch_sort(q, crq->request);
938 } 938 }
939 939
940 /* 940 /*
941 * return expired entry, or NULL to just start from scratch in rbtree 941 * return expired entry, or NULL to just start from scratch in rbtree
942 */ 942 */
943 static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) 943 static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
944 { 944 {
945 struct cfq_data *cfqd = cfqq->cfqd; 945 struct cfq_data *cfqd = cfqq->cfqd;
946 struct request *rq; 946 struct request *rq;
947 struct cfq_rq *crq; 947 struct cfq_rq *crq;
948 948
949 if (cfq_cfqq_fifo_expire(cfqq)) 949 if (cfq_cfqq_fifo_expire(cfqq))
950 return NULL; 950 return NULL;
951 951
952 if (!list_empty(&cfqq->fifo)) { 952 if (!list_empty(&cfqq->fifo)) {
953 int fifo = cfq_cfqq_class_sync(cfqq); 953 int fifo = cfq_cfqq_class_sync(cfqq);
954 954
955 crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); 955 crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
956 rq = crq->request; 956 rq = crq->request;
957 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { 957 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
958 cfq_mark_cfqq_fifo_expire(cfqq); 958 cfq_mark_cfqq_fifo_expire(cfqq);
959 return crq; 959 return crq;
960 } 960 }
961 } 961 }
962 962
963 return NULL; 963 return NULL;
964 } 964 }
965 965
966 /* 966 /*
967 * Scale schedule slice based on io priority. Use the sync time slice only 967 * Scale schedule slice based on io priority. Use the sync time slice only
968 * if a queue is marked sync and has sync io queued. A sync queue with async 968 * if a queue is marked sync and has sync io queued. A sync queue with async
969 * io only, should not get full sync slice length. 969 * io only, should not get full sync slice length.
970 */ 970 */
971 static inline int 971 static inline int
972 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 972 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
973 { 973 {
974 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; 974 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];
975 975
976 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 976 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
977 977
978 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); 978 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio));
979 } 979 }
980 980
981 static inline void 981 static inline void
982 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 982 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
983 { 983 {
984 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 984 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
985 } 985 }
986 986
987 static inline int 987 static inline int
988 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 988 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
989 { 989 {
990 const int base_rq = cfqd->cfq_slice_async_rq; 990 const int base_rq = cfqd->cfq_slice_async_rq;
991 991
992 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 992 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
993 993
994 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 994 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
995 } 995 }
996 996
997 /* 997 /*
998 * get next queue for service 998 * get next queue for service
999 */ 999 */
1000 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 1000 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1001 { 1001 {
1002 unsigned long now = jiffies; 1002 unsigned long now = jiffies;
1003 struct cfq_queue *cfqq; 1003 struct cfq_queue *cfqq;
1004 1004
1005 cfqq = cfqd->active_queue; 1005 cfqq = cfqd->active_queue;
1006 if (!cfqq) 1006 if (!cfqq)
1007 goto new_queue; 1007 goto new_queue;
1008 1008
1009 if (cfq_cfqq_expired(cfqq)) 1009 if (cfq_cfqq_expired(cfqq))
1010 goto new_queue; 1010 goto new_queue;
1011 1011
1012 /* 1012 /*
1013 * slice has expired 1013 * slice has expired
1014 */ 1014 */
1015 if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end)) 1015 if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end))
1016 goto expire; 1016 goto expire;
1017 1017
1018 /* 1018 /*
1019 * if queue has requests, dispatch one. if not, check if 1019 * if queue has requests, dispatch one. if not, check if
1020 * enough slice is left to wait for one 1020 * enough slice is left to wait for one
1021 */ 1021 */
1022 if (!RB_EMPTY(&cfqq->sort_list)) 1022 if (!RB_EMPTY(&cfqq->sort_list))
1023 goto keep_queue; 1023 goto keep_queue;
1024 else if (cfq_cfqq_class_sync(cfqq) && 1024 else if (cfq_cfqq_class_sync(cfqq) &&
1025 time_before(now, cfqq->slice_end)) { 1025 time_before(now, cfqq->slice_end)) {
1026 if (cfq_arm_slice_timer(cfqd, cfqq)) 1026 if (cfq_arm_slice_timer(cfqd, cfqq))
1027 return NULL; 1027 return NULL;
1028 } 1028 }
1029 1029
1030 expire: 1030 expire:
1031 cfq_slice_expired(cfqd, 0); 1031 cfq_slice_expired(cfqd, 0);
1032 new_queue: 1032 new_queue:
1033 cfqq = cfq_set_active_queue(cfqd); 1033 cfqq = cfq_set_active_queue(cfqd);
1034 keep_queue: 1034 keep_queue:
1035 return cfqq; 1035 return cfqq;
1036 } 1036 }
1037 1037
1038 static int 1038 static int
1039 __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1039 __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1040 int max_dispatch) 1040 int max_dispatch)
1041 { 1041 {
1042 int dispatched = 0; 1042 int dispatched = 0;
1043 1043
1044 BUG_ON(RB_EMPTY(&cfqq->sort_list)); 1044 BUG_ON(RB_EMPTY(&cfqq->sort_list));
1045 1045
1046 do { 1046 do {
1047 struct cfq_rq *crq; 1047 struct cfq_rq *crq;
1048 1048
1049 /* 1049 /*
1050 * follow expired path, else get first next available 1050 * follow expired path, else get first next available
1051 */ 1051 */
1052 if ((crq = cfq_check_fifo(cfqq)) == NULL) 1052 if ((crq = cfq_check_fifo(cfqq)) == NULL)
1053 crq = cfqq->next_crq; 1053 crq = cfqq->next_crq;
1054 1054
1055 /* 1055 /*
1056 * finally, insert request into driver dispatch list 1056 * finally, insert request into driver dispatch list
1057 */ 1057 */
1058 cfq_dispatch_insert(cfqd->queue, crq); 1058 cfq_dispatch_insert(cfqd->queue, crq);
1059 1059
1060 cfqd->dispatch_slice++; 1060 cfqd->dispatch_slice++;
1061 dispatched++; 1061 dispatched++;
1062 1062
1063 if (!cfqd->active_cic) { 1063 if (!cfqd->active_cic) {
1064 atomic_inc(&crq->io_context->ioc->refcount); 1064 atomic_inc(&crq->io_context->ioc->refcount);
1065 cfqd->active_cic = crq->io_context; 1065 cfqd->active_cic = crq->io_context;
1066 } 1066 }
1067 1067
1068 if (RB_EMPTY(&cfqq->sort_list)) 1068 if (RB_EMPTY(&cfqq->sort_list))
1069 break; 1069 break;
1070 1070
1071 } while (dispatched < max_dispatch); 1071 } while (dispatched < max_dispatch);
1072 1072
1073 /* 1073 /*
1074 * if slice end isn't set yet, set it. if at least one request was 1074 * if slice end isn't set yet, set it. if at least one request was
1075 * sync, use the sync time slice value 1075 * sync, use the sync time slice value
1076 */ 1076 */
1077 if (!cfqq->slice_end) 1077 if (!cfqq->slice_end)
1078 cfq_set_prio_slice(cfqd, cfqq); 1078 cfq_set_prio_slice(cfqd, cfqq);
1079 1079
1080 /* 1080 /*
1081 * expire an async queue immediately if it has used up its slice. idle 1081 * expire an async queue immediately if it has used up its slice. idle
1082 * queue always expire after 1 dispatch round. 1082 * queue always expire after 1 dispatch round.
1083 */ 1083 */
1084 if ((!cfq_cfqq_sync(cfqq) && 1084 if ((!cfq_cfqq_sync(cfqq) &&
1085 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) || 1085 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
1086 cfq_class_idle(cfqq)) 1086 cfq_class_idle(cfqq))
1087 cfq_slice_expired(cfqd, 0); 1087 cfq_slice_expired(cfqd, 0);
1088 1088
1089 return dispatched; 1089 return dispatched;
1090 } 1090 }
1091 1091
1092 static int 1092 static int
1093 cfq_forced_dispatch_cfqqs(struct list_head *list) 1093 cfq_forced_dispatch_cfqqs(struct list_head *list)
1094 { 1094 {
1095 int dispatched = 0; 1095 int dispatched = 0;
1096 struct cfq_queue *cfqq, *next; 1096 struct cfq_queue *cfqq, *next;
1097 struct cfq_rq *crq; 1097 struct cfq_rq *crq;
1098 1098
1099 list_for_each_entry_safe(cfqq, next, list, cfq_list) { 1099 list_for_each_entry_safe(cfqq, next, list, cfq_list) {
1100 while ((crq = cfqq->next_crq)) { 1100 while ((crq = cfqq->next_crq)) {
1101 cfq_dispatch_insert(cfqq->cfqd->queue, crq); 1101 cfq_dispatch_insert(cfqq->cfqd->queue, crq);
1102 dispatched++; 1102 dispatched++;
1103 } 1103 }
1104 BUG_ON(!list_empty(&cfqq->fifo)); 1104 BUG_ON(!list_empty(&cfqq->fifo));
1105 } 1105 }
1106 return dispatched; 1106 return dispatched;
1107 } 1107 }
1108 1108
1109 static int 1109 static int
1110 cfq_forced_dispatch(struct cfq_data *cfqd) 1110 cfq_forced_dispatch(struct cfq_data *cfqd)
1111 { 1111 {
1112 int i, dispatched = 0; 1112 int i, dispatched = 0;
1113 1113
1114 for (i = 0; i < CFQ_PRIO_LISTS; i++) 1114 for (i = 0; i < CFQ_PRIO_LISTS; i++)
1115 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]); 1115 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]);
1116 1116
1117 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr); 1117 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr);
1118 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr); 1118 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr);
1119 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr); 1119 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);
1120 1120
1121 cfq_slice_expired(cfqd, 0); 1121 cfq_slice_expired(cfqd, 0);
1122 1122
1123 BUG_ON(cfqd->busy_queues); 1123 BUG_ON(cfqd->busy_queues);
1124 1124
1125 return dispatched; 1125 return dispatched;
1126 } 1126 }
1127 1127
1128 static int 1128 static int
1129 cfq_dispatch_requests(request_queue_t *q, int force) 1129 cfq_dispatch_requests(request_queue_t *q, int force)
1130 { 1130 {
1131 struct cfq_data *cfqd = q->elevator->elevator_data; 1131 struct cfq_data *cfqd = q->elevator->elevator_data;
1132 struct cfq_queue *cfqq; 1132 struct cfq_queue *cfqq;
1133 1133
1134 if (!cfqd->busy_queues) 1134 if (!cfqd->busy_queues)
1135 return 0; 1135 return 0;
1136 1136
1137 if (unlikely(force)) 1137 if (unlikely(force))
1138 return cfq_forced_dispatch(cfqd); 1138 return cfq_forced_dispatch(cfqd);
1139 1139
1140 cfqq = cfq_select_queue(cfqd); 1140 cfqq = cfq_select_queue(cfqd);
1141 if (cfqq) { 1141 if (cfqq) {
1142 int max_dispatch; 1142 int max_dispatch;
1143 1143
1144 /* 1144 /*
1145 * if idle window is disabled, allow queue buildup 1145 * if idle window is disabled, allow queue buildup
1146 */ 1146 */
1147 if (!cfq_cfqq_idle_window(cfqq) && 1147 if (!cfq_cfqq_idle_window(cfqq) &&
1148 cfqd->rq_in_driver >= cfqd->cfq_max_depth) 1148 cfqd->rq_in_driver >= cfqd->cfq_max_depth)
1149 return 0; 1149 return 0;
1150 1150
1151 cfq_clear_cfqq_must_dispatch(cfqq); 1151 cfq_clear_cfqq_must_dispatch(cfqq);
1152 cfq_clear_cfqq_wait_request(cfqq); 1152 cfq_clear_cfqq_wait_request(cfqq);
1153 del_timer(&cfqd->idle_slice_timer); 1153 del_timer(&cfqd->idle_slice_timer);
1154 1154
1155 max_dispatch = cfqd->cfq_quantum; 1155 max_dispatch = cfqd->cfq_quantum;
1156 if (cfq_class_idle(cfqq)) 1156 if (cfq_class_idle(cfqq))
1157 max_dispatch = 1; 1157 max_dispatch = 1;
1158 1158
1159 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1159 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1160 } 1160 }
1161 1161
1162 return 0; 1162 return 0;
1163 } 1163 }
1164 1164
1165 /* 1165 /*
1166 * task holds one reference to the queue, dropped when task exits. each crq 1166 * task holds one reference to the queue, dropped when task exits. each crq
1167 * in-flight on this queue also holds a reference, dropped when crq is freed. 1167 * in-flight on this queue also holds a reference, dropped when crq is freed.
1168 * 1168 *
1169 * queue lock must be held here. 1169 * queue lock must be held here.
1170 */ 1170 */
1171 static void cfq_put_queue(struct cfq_queue *cfqq) 1171 static void cfq_put_queue(struct cfq_queue *cfqq)
1172 { 1172 {
1173 struct cfq_data *cfqd = cfqq->cfqd; 1173 struct cfq_data *cfqd = cfqq->cfqd;
1174 1174
1175 BUG_ON(atomic_read(&cfqq->ref) <= 0); 1175 BUG_ON(atomic_read(&cfqq->ref) <= 0);
1176 1176
1177 if (!atomic_dec_and_test(&cfqq->ref)) 1177 if (!atomic_dec_and_test(&cfqq->ref))
1178 return; 1178 return;
1179 1179
1180 BUG_ON(rb_first(&cfqq->sort_list)); 1180 BUG_ON(rb_first(&cfqq->sort_list));
1181 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 1181 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
1182 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1182 BUG_ON(cfq_cfqq_on_rr(cfqq));
1183 1183
1184 if (unlikely(cfqd->active_queue == cfqq)) { 1184 if (unlikely(cfqd->active_queue == cfqq)) {
1185 __cfq_slice_expired(cfqd, cfqq, 0); 1185 __cfq_slice_expired(cfqd, cfqq, 0);
1186 cfq_schedule_dispatch(cfqd); 1186 cfq_schedule_dispatch(cfqd);
1187 } 1187 }
1188 1188
1189 cfq_put_cfqd(cfqq->cfqd); 1189 cfq_put_cfqd(cfqq->cfqd);
1190 1190
1191 /* 1191 /*
1192 * it's on the empty list and still hashed 1192 * it's on the empty list and still hashed
1193 */ 1193 */
1194 list_del(&cfqq->cfq_list); 1194 list_del(&cfqq->cfq_list);
1195 hlist_del(&cfqq->cfq_hash); 1195 hlist_del(&cfqq->cfq_hash);
1196 kmem_cache_free(cfq_pool, cfqq); 1196 kmem_cache_free(cfq_pool, cfqq);
1197 } 1197 }
1198 1198
1199 static inline struct cfq_queue * 1199 static inline struct cfq_queue *
1200 __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, 1200 __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1201 const int hashval) 1201 const int hashval)
1202 { 1202 {
1203 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; 1203 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
1204 struct hlist_node *entry, *next; 1204 struct hlist_node *entry, *next;
1205 1205
1206 hlist_for_each_safe(entry, next, hash_list) { 1206 hlist_for_each_safe(entry, next, hash_list) {
1207 struct cfq_queue *__cfqq = list_entry_qhash(entry); 1207 struct cfq_queue *__cfqq = list_entry_qhash(entry);
1208 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->ioprio_class, __cfqq->ioprio); 1208 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->ioprio_class, __cfqq->ioprio);
1209 1209
1210 if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY)) 1210 if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY))
1211 return __cfqq; 1211 return __cfqq;
1212 } 1212 }
1213 1213
1214 return NULL; 1214 return NULL;
1215 } 1215 }
1216 1216
1217 static struct cfq_queue * 1217 static struct cfq_queue *
1218 cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio) 1218 cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
1219 { 1219 {
1220 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); 1220 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
1221 } 1221 }
1222 1222
1223 static void cfq_free_io_context(struct cfq_io_context *cic) 1223 static void cfq_free_io_context(struct cfq_io_context *cic)
1224 { 1224 {
1225 struct cfq_io_context *__cic; 1225 struct cfq_io_context *__cic;
1226 struct list_head *entry, *next; 1226 struct list_head *entry, *next;
1227 1227
1228 list_for_each_safe(entry, next, &cic->list) { 1228 list_for_each_safe(entry, next, &cic->list) {
1229 __cic = list_entry(entry, struct cfq_io_context, list); 1229 __cic = list_entry(entry, struct cfq_io_context, list);
1230 kmem_cache_free(cfq_ioc_pool, __cic); 1230 kmem_cache_free(cfq_ioc_pool, __cic);
1231 } 1231 }
1232 1232
1233 kmem_cache_free(cfq_ioc_pool, cic); 1233 kmem_cache_free(cfq_ioc_pool, cic);
1234 } 1234 }
1235 1235
1236 /* 1236 /*
1237 * Called with interrupts disabled 1237 * Called with interrupts disabled
1238 */ 1238 */
1239 static void cfq_exit_single_io_context(struct cfq_io_context *cic) 1239 static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1240 { 1240 {
1241 struct cfq_data *cfqd = cic->cfqq->cfqd; 1241 struct cfq_data *cfqd = cic->cfqq->cfqd;
1242 request_queue_t *q = cfqd->queue; 1242 request_queue_t *q = cfqd->queue;
1243 1243
1244 WARN_ON(!irqs_disabled()); 1244 WARN_ON(!irqs_disabled());
1245 1245
1246 spin_lock(q->queue_lock); 1246 spin_lock(q->queue_lock);
1247 1247
1248 if (unlikely(cic->cfqq == cfqd->active_queue)) { 1248 if (unlikely(cic->cfqq == cfqd->active_queue)) {
1249 __cfq_slice_expired(cfqd, cic->cfqq, 0); 1249 __cfq_slice_expired(cfqd, cic->cfqq, 0);
1250 cfq_schedule_dispatch(cfqd); 1250 cfq_schedule_dispatch(cfqd);
1251 } 1251 }
1252 1252
1253 cfq_put_queue(cic->cfqq); 1253 cfq_put_queue(cic->cfqq);
1254 cic->cfqq = NULL; 1254 cic->cfqq = NULL;
1255 spin_unlock(q->queue_lock); 1255 spin_unlock(q->queue_lock);
1256 } 1256 }
1257 1257
1258 /* 1258 /*
1259 * Another task may update the task cic list, if it is doing a queue lookup 1259 * Another task may update the task cic list, if it is doing a queue lookup
1260 * on its behalf. cfq_cic_lock excludes such concurrent updates 1260 * on its behalf. cfq_cic_lock excludes such concurrent updates
1261 */ 1261 */
1262 static void cfq_exit_io_context(struct cfq_io_context *cic) 1262 static void cfq_exit_io_context(struct cfq_io_context *cic)
1263 { 1263 {
1264 struct cfq_io_context *__cic; 1264 struct cfq_io_context *__cic;
1265 struct list_head *entry; 1265 struct list_head *entry;
1266 unsigned long flags; 1266 unsigned long flags;
1267 1267
1268 local_irq_save(flags); 1268 local_irq_save(flags);
1269 1269
1270 /* 1270 /*
1271 * put the reference this task is holding to the various queues 1271 * put the reference this task is holding to the various queues
1272 */ 1272 */
1273 list_for_each(entry, &cic->list) { 1273 list_for_each(entry, &cic->list) {
1274 __cic = list_entry(entry, struct cfq_io_context, list); 1274 __cic = list_entry(entry, struct cfq_io_context, list);
1275 cfq_exit_single_io_context(__cic); 1275 cfq_exit_single_io_context(__cic);
1276 } 1276 }
1277 1277
1278 cfq_exit_single_io_context(cic); 1278 cfq_exit_single_io_context(cic);
1279 local_irq_restore(flags); 1279 local_irq_restore(flags);
1280 } 1280 }
1281 1281
1282 static struct cfq_io_context * 1282 static struct cfq_io_context *
1283 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1283 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1284 { 1284 {
1285 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); 1285 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
1286 1286
1287 if (cic) { 1287 if (cic) {
1288 INIT_LIST_HEAD(&cic->list); 1288 INIT_LIST_HEAD(&cic->list);
1289 cic->cfqq = NULL; 1289 cic->cfqq = NULL;
1290 cic->key = NULL; 1290 cic->key = NULL;
1291 cic->last_end_request = jiffies; 1291 cic->last_end_request = jiffies;
1292 cic->ttime_total = 0; 1292 cic->ttime_total = 0;
1293 cic->ttime_samples = 0; 1293 cic->ttime_samples = 0;
1294 cic->ttime_mean = 0; 1294 cic->ttime_mean = 0;
1295 cic->dtor = cfq_free_io_context; 1295 cic->dtor = cfq_free_io_context;
1296 cic->exit = cfq_exit_io_context; 1296 cic->exit = cfq_exit_io_context;
1297 } 1297 }
1298 1298
1299 return cic; 1299 return cic;
1300 } 1300 }
1301 1301
1302 static void cfq_init_prio_data(struct cfq_queue *cfqq) 1302 static void cfq_init_prio_data(struct cfq_queue *cfqq)
1303 { 1303 {
1304 struct task_struct *tsk = current; 1304 struct task_struct *tsk = current;
1305 int ioprio_class; 1305 int ioprio_class;
1306 1306
1307 if (!cfq_cfqq_prio_changed(cfqq)) 1307 if (!cfq_cfqq_prio_changed(cfqq))
1308 return; 1308 return;
1309 1309
1310 ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); 1310 ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio);
1311 switch (ioprio_class) { 1311 switch (ioprio_class) {
1312 default: 1312 default:
1313 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); 1313 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
1314 case IOPRIO_CLASS_NONE: 1314 case IOPRIO_CLASS_NONE:
1315 /* 1315 /*
1316 * no prio set, place us in the middle of the BE classes 1316 * no prio set, place us in the middle of the BE classes
1317 */ 1317 */
1318 cfqq->ioprio = task_nice_ioprio(tsk); 1318 cfqq->ioprio = task_nice_ioprio(tsk);
1319 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1319 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1320 break; 1320 break;
1321 case IOPRIO_CLASS_RT: 1321 case IOPRIO_CLASS_RT:
1322 cfqq->ioprio = task_ioprio(tsk); 1322 cfqq->ioprio = task_ioprio(tsk);
1323 cfqq->ioprio_class = IOPRIO_CLASS_RT; 1323 cfqq->ioprio_class = IOPRIO_CLASS_RT;
1324 break; 1324 break;
1325 case IOPRIO_CLASS_BE: 1325 case IOPRIO_CLASS_BE:
1326 cfqq->ioprio = task_ioprio(tsk); 1326 cfqq->ioprio = task_ioprio(tsk);
1327 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1327 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1328 break; 1328 break;
1329 case IOPRIO_CLASS_IDLE: 1329 case IOPRIO_CLASS_IDLE:
1330 cfqq->ioprio_class = IOPRIO_CLASS_IDLE; 1330 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
1331 cfqq->ioprio = 7; 1331 cfqq->ioprio = 7;
1332 cfq_clear_cfqq_idle_window(cfqq); 1332 cfq_clear_cfqq_idle_window(cfqq);
1333 break; 1333 break;
1334 } 1334 }
1335 1335
1336 /* 1336 /*
1337 * keep track of original prio settings in case we have to temporarily 1337 * keep track of original prio settings in case we have to temporarily
1338 * elevate the priority of this queue 1338 * elevate the priority of this queue
1339 */ 1339 */
1340 cfqq->org_ioprio = cfqq->ioprio; 1340 cfqq->org_ioprio = cfqq->ioprio;
1341 cfqq->org_ioprio_class = cfqq->ioprio_class; 1341 cfqq->org_ioprio_class = cfqq->ioprio_class;
1342 1342
1343 if (cfq_cfqq_on_rr(cfqq)) 1343 if (cfq_cfqq_on_rr(cfqq))
1344 cfq_resort_rr_list(cfqq, 0); 1344 cfq_resort_rr_list(cfqq, 0);
1345 1345
1346 cfq_clear_cfqq_prio_changed(cfqq); 1346 cfq_clear_cfqq_prio_changed(cfqq);
1347 } 1347 }
1348 1348
1349 static inline void changed_ioprio(struct cfq_queue *cfqq) 1349 static inline void changed_ioprio(struct cfq_queue *cfqq)
1350 { 1350 {
1351 if (cfqq) { 1351 if (cfqq) {
1352 struct cfq_data *cfqd = cfqq->cfqd; 1352 struct cfq_data *cfqd = cfqq->cfqd;
1353 1353
1354 spin_lock(cfqd->queue->queue_lock); 1354 spin_lock(cfqd->queue->queue_lock);
1355 cfq_mark_cfqq_prio_changed(cfqq); 1355 cfq_mark_cfqq_prio_changed(cfqq);
1356 cfq_init_prio_data(cfqq); 1356 cfq_init_prio_data(cfqq);
1357 spin_unlock(cfqd->queue->queue_lock); 1357 spin_unlock(cfqd->queue->queue_lock);
1358 } 1358 }
1359 } 1359 }
1360 1360
1361 /* 1361 /*
1362 * callback from sys_ioprio_set, irqs are disabled 1362 * callback from sys_ioprio_set, irqs are disabled
1363 */ 1363 */
1364 static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) 1364 static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1365 { 1365 {
1366 struct cfq_io_context *cic = ioc->cic; 1366 struct cfq_io_context *cic = ioc->cic;
1367 1367
1368 changed_ioprio(cic->cfqq); 1368 changed_ioprio(cic->cfqq);
1369 1369
1370 list_for_each_entry(cic, &cic->list, list) 1370 list_for_each_entry(cic, &cic->list, list)
1371 changed_ioprio(cic->cfqq); 1371 changed_ioprio(cic->cfqq);
1372 1372
1373 return 0; 1373 return 0;
1374 } 1374 }
1375 1375
1376 static struct cfq_queue * 1376 static struct cfq_queue *
1377 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio, 1377 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,
1378 gfp_t gfp_mask) 1378 gfp_t gfp_mask)
1379 { 1379 {
1380 const int hashval = hash_long(key, CFQ_QHASH_SHIFT); 1380 const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
1381 struct cfq_queue *cfqq, *new_cfqq = NULL; 1381 struct cfq_queue *cfqq, *new_cfqq = NULL;
1382 1382
1383 retry: 1383 retry:
1384 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval); 1384 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval);
1385 1385
1386 if (!cfqq) { 1386 if (!cfqq) {
1387 if (new_cfqq) { 1387 if (new_cfqq) {
1388 cfqq = new_cfqq; 1388 cfqq = new_cfqq;
1389 new_cfqq = NULL; 1389 new_cfqq = NULL;
1390 } else if (gfp_mask & __GFP_WAIT) { 1390 } else if (gfp_mask & __GFP_WAIT) {
1391 spin_unlock_irq(cfqd->queue->queue_lock); 1391 spin_unlock_irq(cfqd->queue->queue_lock);
1392 new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1392 new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
1393 spin_lock_irq(cfqd->queue->queue_lock); 1393 spin_lock_irq(cfqd->queue->queue_lock);
1394 goto retry; 1394 goto retry;
1395 } else { 1395 } else {
1396 cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1396 cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
1397 if (!cfqq) 1397 if (!cfqq)
1398 goto out; 1398 goto out;
1399 } 1399 }
1400 1400
1401 memset(cfqq, 0, sizeof(*cfqq)); 1401 memset(cfqq, 0, sizeof(*cfqq));
1402 1402
1403 INIT_HLIST_NODE(&cfqq->cfq_hash); 1403 INIT_HLIST_NODE(&cfqq->cfq_hash);
1404 INIT_LIST_HEAD(&cfqq->cfq_list); 1404 INIT_LIST_HEAD(&cfqq->cfq_list);
1405 RB_CLEAR_ROOT(&cfqq->sort_list); 1405 RB_CLEAR_ROOT(&cfqq->sort_list);
1406 INIT_LIST_HEAD(&cfqq->fifo); 1406 INIT_LIST_HEAD(&cfqq->fifo);
1407 1407
1408 cfqq->key = key; 1408 cfqq->key = key;
1409 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); 1409 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
1410 atomic_set(&cfqq->ref, 0); 1410 atomic_set(&cfqq->ref, 0);
1411 cfqq->cfqd = cfqd; 1411 cfqq->cfqd = cfqd;
1412 atomic_inc(&cfqd->ref); 1412 atomic_inc(&cfqd->ref);
1413 cfqq->service_last = 0; 1413 cfqq->service_last = 0;
1414 /* 1414 /*
1415 * set ->slice_left to allow preemption for a new process 1415 * set ->slice_left to allow preemption for a new process
1416 */ 1416 */
1417 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1417 cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
1418 cfq_mark_cfqq_idle_window(cfqq); 1418 cfq_mark_cfqq_idle_window(cfqq);
1419 cfq_mark_cfqq_prio_changed(cfqq); 1419 cfq_mark_cfqq_prio_changed(cfqq);
1420 cfq_init_prio_data(cfqq); 1420 cfq_init_prio_data(cfqq);
1421 } 1421 }
1422 1422
1423 if (new_cfqq) 1423 if (new_cfqq)
1424 kmem_cache_free(cfq_pool, new_cfqq); 1424 kmem_cache_free(cfq_pool, new_cfqq);
1425 1425
1426 atomic_inc(&cfqq->ref); 1426 atomic_inc(&cfqq->ref);
1427 out: 1427 out:
1428 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); 1428 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
1429 return cfqq; 1429 return cfqq;
1430 } 1430 }
1431 1431
1432 /* 1432 /*
1433 * Setup general io context and cfq io context. There can be several cfq 1433 * Setup general io context and cfq io context. There can be several cfq
1434 * io contexts per general io context, if this process is doing io to more 1434 * io contexts per general io context, if this process is doing io to more
1435 * than one device managed by cfq. Note that caller is holding a reference to 1435 * than one device managed by cfq. Note that caller is holding a reference to
1436 * cfqq, so we don't need to worry about it disappearing 1436 * cfqq, so we don't need to worry about it disappearing
1437 */ 1437 */
1438 static struct cfq_io_context * 1438 static struct cfq_io_context *
1439 cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) 1439 cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
1440 { 1440 {
1441 struct io_context *ioc = NULL; 1441 struct io_context *ioc = NULL;
1442 struct cfq_io_context *cic; 1442 struct cfq_io_context *cic;
1443 1443
1444 might_sleep_if(gfp_mask & __GFP_WAIT); 1444 might_sleep_if(gfp_mask & __GFP_WAIT);
1445 1445
1446 ioc = get_io_context(gfp_mask); 1446 ioc = get_io_context(gfp_mask);
1447 if (!ioc) 1447 if (!ioc)
1448 return NULL; 1448 return NULL;
1449 1449
1450 if ((cic = ioc->cic) == NULL) { 1450 if ((cic = ioc->cic) == NULL) {
1451 cic = cfq_alloc_io_context(cfqd, gfp_mask); 1451 cic = cfq_alloc_io_context(cfqd, gfp_mask);
1452 1452
1453 if (cic == NULL) 1453 if (cic == NULL)
1454 goto err; 1454 goto err;
1455 1455
1456 /* 1456 /*
1457 * manually increment generic io_context usage count, it 1457 * manually increment generic io_context usage count, it
1458 * cannot go away since we are already holding one ref to it 1458 * cannot go away since we are already holding one ref to it
1459 */ 1459 */
1460 ioc->cic = cic; 1460 ioc->cic = cic;
1461 ioc->set_ioprio = cfq_ioc_set_ioprio; 1461 ioc->set_ioprio = cfq_ioc_set_ioprio;
1462 cic->ioc = ioc; 1462 cic->ioc = ioc;
1463 cic->key = cfqd; 1463 cic->key = cfqd;
1464 atomic_inc(&cfqd->ref); 1464 atomic_inc(&cfqd->ref);
1465 } else { 1465 } else {
1466 struct cfq_io_context *__cic; 1466 struct cfq_io_context *__cic;
1467 1467
1468 /* 1468 /*
1469 * the first cic on the list is actually the head itself 1469 * the first cic on the list is actually the head itself
1470 */ 1470 */
1471 if (cic->key == cfqd) 1471 if (cic->key == cfqd)
1472 goto out; 1472 goto out;
1473 1473
1474 /* 1474 /*
1475 * cic exists, check if we already are there. linear search 1475 * cic exists, check if we already are there. linear search
1476 * should be ok here, the list will usually not be more than 1476 * should be ok here, the list will usually not be more than
1477 * 1 or a few entries long 1477 * 1 or a few entries long
1478 */ 1478 */
1479 list_for_each_entry(__cic, &cic->list, list) { 1479 list_for_each_entry(__cic, &cic->list, list) {
1480 /* 1480 /*
1481 * this process is already holding a reference to 1481 * this process is already holding a reference to
1482 * this queue, so no need to get one more 1482 * this queue, so no need to get one more
1483 */ 1483 */
1484 if (__cic->key == cfqd) { 1484 if (__cic->key == cfqd) {
1485 cic = __cic; 1485 cic = __cic;
1486 goto out; 1486 goto out;
1487 } 1487 }
1488 } 1488 }
1489 1489
1490 /* 1490 /*
1491 * nope, process doesn't have a cic assoicated with this 1491 * nope, process doesn't have a cic assoicated with this
1492 * cfqq yet. get a new one and add to list 1492 * cfqq yet. get a new one and add to list
1493 */ 1493 */
1494 __cic = cfq_alloc_io_context(cfqd, gfp_mask); 1494 __cic = cfq_alloc_io_context(cfqd, gfp_mask);
1495 if (__cic == NULL) 1495 if (__cic == NULL)
1496 goto err; 1496 goto err;
1497 1497
1498 __cic->ioc = ioc; 1498 __cic->ioc = ioc;
1499 __cic->key = cfqd; 1499 __cic->key = cfqd;
1500 atomic_inc(&cfqd->ref); 1500 atomic_inc(&cfqd->ref);
1501 list_add(&__cic->list, &cic->list); 1501 list_add(&__cic->list, &cic->list);
1502 cic = __cic; 1502 cic = __cic;
1503 } 1503 }
1504 1504
1505 out: 1505 out:
1506 return cic; 1506 return cic;
1507 err: 1507 err:
1508 put_io_context(ioc); 1508 put_io_context(ioc);
1509 return NULL; 1509 return NULL;
1510 } 1510 }
1511 1511
1512 static void 1512 static void
1513 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 1513 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1514 { 1514 {
1515 unsigned long elapsed, ttime; 1515 unsigned long elapsed, ttime;
1516 1516
1517 /* 1517 /*
1518 * if this context already has stuff queued, thinktime is from 1518 * if this context already has stuff queued, thinktime is from
1519 * last queue not last end 1519 * last queue not last end
1520 */ 1520 */
1521 #if 0 1521 #if 0
1522 if (time_after(cic->last_end_request, cic->last_queue)) 1522 if (time_after(cic->last_end_request, cic->last_queue))
1523 elapsed = jiffies - cic->last_end_request; 1523 elapsed = jiffies - cic->last_end_request;
1524 else 1524 else
1525 elapsed = jiffies - cic->last_queue; 1525 elapsed = jiffies - cic->last_queue;
1526 #else 1526 #else
1527 elapsed = jiffies - cic->last_end_request; 1527 elapsed = jiffies - cic->last_end_request;
1528 #endif 1528 #endif
1529 1529
1530 ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 1530 ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
1531 1531
1532 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 1532 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
1533 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 1533 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
1534 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 1534 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
1535 } 1535 }
1536 1536
1537 #define sample_valid(samples) ((samples) > 80) 1537 #define sample_valid(samples) ((samples) > 80)
1538 1538
1539 /* 1539 /*
1540 * Disable idle window if the process thinks too long or seeks so much that 1540 * Disable idle window if the process thinks too long or seeks so much that
1541 * it doesn't matter 1541 * it doesn't matter
1542 */ 1542 */
1543 static void 1543 static void
1544 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1544 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1545 struct cfq_io_context *cic) 1545 struct cfq_io_context *cic)
1546 { 1546 {
1547 int enable_idle = cfq_cfqq_idle_window(cfqq); 1547 int enable_idle = cfq_cfqq_idle_window(cfqq);
1548 1548
1549 if (!cic->ioc->task || !cfqd->cfq_slice_idle) 1549 if (!cic->ioc->task || !cfqd->cfq_slice_idle)
1550 enable_idle = 0; 1550 enable_idle = 0;
1551 else if (sample_valid(cic->ttime_samples)) { 1551 else if (sample_valid(cic->ttime_samples)) {
1552 if (cic->ttime_mean > cfqd->cfq_slice_idle) 1552 if (cic->ttime_mean > cfqd->cfq_slice_idle)
1553 enable_idle = 0; 1553 enable_idle = 0;
1554 else 1554 else
1555 enable_idle = 1; 1555 enable_idle = 1;
1556 } 1556 }
1557 1557
1558 if (enable_idle) 1558 if (enable_idle)
1559 cfq_mark_cfqq_idle_window(cfqq); 1559 cfq_mark_cfqq_idle_window(cfqq);
1560 else 1560 else
1561 cfq_clear_cfqq_idle_window(cfqq); 1561 cfq_clear_cfqq_idle_window(cfqq);
1562 } 1562 }
1563 1563
1564 1564
1565 /* 1565 /*
1566 * Check if new_cfqq should preempt the currently active queue. Return 0 for 1566 * Check if new_cfqq should preempt the currently active queue. Return 0 for
1567 * no or if we aren't sure, a 1 will cause a preempt. 1567 * no or if we aren't sure, a 1 will cause a preempt.
1568 */ 1568 */
1569 static int 1569 static int
1570 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 1570 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1571 struct cfq_rq *crq) 1571 struct cfq_rq *crq)
1572 { 1572 {
1573 struct cfq_queue *cfqq = cfqd->active_queue; 1573 struct cfq_queue *cfqq = cfqd->active_queue;
1574 1574
1575 if (cfq_class_idle(new_cfqq)) 1575 if (cfq_class_idle(new_cfqq))
1576 return 0; 1576 return 0;
1577 1577
1578 if (!cfqq) 1578 if (!cfqq)
1579 return 1; 1579 return 1;
1580 1580
1581 if (cfq_class_idle(cfqq)) 1581 if (cfq_class_idle(cfqq))
1582 return 1; 1582 return 1;
1583 if (!cfq_cfqq_wait_request(new_cfqq)) 1583 if (!cfq_cfqq_wait_request(new_cfqq))
1584 return 0; 1584 return 0;
1585 /* 1585 /*
1586 * if it doesn't have slice left, forget it 1586 * if it doesn't have slice left, forget it
1587 */ 1587 */
1588 if (new_cfqq->slice_left < cfqd->cfq_slice_idle) 1588 if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
1589 return 0; 1589 return 0;
1590 if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) 1590 if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))
1591 return 1; 1591 return 1;
1592 1592
1593 return 0; 1593 return 0;
1594 } 1594 }
1595 1595
1596 /* 1596 /*
1597 * cfqq preempts the active queue. if we allowed preempt with no slice left, 1597 * cfqq preempts the active queue. if we allowed preempt with no slice left,
1598 * let it have half of its nominal slice. 1598 * let it have half of its nominal slice.
1599 */ 1599 */
1600 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1600 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1601 { 1601 {
1602 struct cfq_queue *__cfqq, *next; 1602 struct cfq_queue *__cfqq, *next;
1603 1603
1604 list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list) 1604 list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
1605 cfq_resort_rr_list(__cfqq, 1); 1605 cfq_resort_rr_list(__cfqq, 1);
1606 1606
1607 if (!cfqq->slice_left) 1607 if (!cfqq->slice_left)
1608 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; 1608 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
1609 1609
1610 cfqq->slice_end = cfqq->slice_left + jiffies; 1610 cfqq->slice_end = cfqq->slice_left + jiffies;
1611 __cfq_slice_expired(cfqd, cfqq, 1); 1611 __cfq_slice_expired(cfqd, cfqq, 1);
1612 __cfq_set_active_queue(cfqd, cfqq); 1612 __cfq_set_active_queue(cfqd, cfqq);
1613 } 1613 }
1614 1614
1615 /* 1615 /*
1616 * should really be a ll_rw_blk.c helper 1616 * should really be a ll_rw_blk.c helper
1617 */ 1617 */
1618 static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1618 static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1619 { 1619 {
1620 request_queue_t *q = cfqd->queue; 1620 request_queue_t *q = cfqd->queue;
1621 1621
1622 if (!blk_queue_plugged(q)) 1622 if (!blk_queue_plugged(q))
1623 q->request_fn(q); 1623 q->request_fn(q);
1624 else 1624 else
1625 __generic_unplug_device(q); 1625 __generic_unplug_device(q);
1626 } 1626 }
1627 1627
1628 /* 1628 /*
1629 * Called when a new fs request (crq) is added (to cfqq). Check if there's 1629 * Called when a new fs request (crq) is added (to cfqq). Check if there's
1630 * something we should do about it 1630 * something we should do about it
1631 */ 1631 */
1632 static void 1632 static void
1633 cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1633 cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1634 struct cfq_rq *crq) 1634 struct cfq_rq *crq)
1635 { 1635 {
1636 struct cfq_io_context *cic; 1636 struct cfq_io_context *cic;
1637 1637
1638 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 1638 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
1639 1639
1640 /* 1640 /*
1641 * we never wait for an async request and we don't allow preemption 1641 * we never wait for an async request and we don't allow preemption
1642 * of an async request. so just return early 1642 * of an async request. so just return early
1643 */ 1643 */
1644 if (!cfq_crq_is_sync(crq)) 1644 if (!cfq_crq_is_sync(crq))
1645 return; 1645 return;
1646 1646
1647 cic = crq->io_context; 1647 cic = crq->io_context;
1648 1648
1649 cfq_update_io_thinktime(cfqd, cic); 1649 cfq_update_io_thinktime(cfqd, cic);
1650 cfq_update_idle_window(cfqd, cfqq, cic); 1650 cfq_update_idle_window(cfqd, cfqq, cic);
1651 1651
1652 cic->last_queue = jiffies; 1652 cic->last_queue = jiffies;
1653 1653
1654 if (cfqq == cfqd->active_queue) { 1654 if (cfqq == cfqd->active_queue) {
1655 /* 1655 /*
1656 * if we are waiting for a request for this queue, let it rip 1656 * if we are waiting for a request for this queue, let it rip
1657 * immediately and flag that we must not expire this queue 1657 * immediately and flag that we must not expire this queue
1658 * just now 1658 * just now
1659 */ 1659 */
1660 if (cfq_cfqq_wait_request(cfqq)) { 1660 if (cfq_cfqq_wait_request(cfqq)) {
1661 cfq_mark_cfqq_must_dispatch(cfqq); 1661 cfq_mark_cfqq_must_dispatch(cfqq);
1662 del_timer(&cfqd->idle_slice_timer); 1662 del_timer(&cfqd->idle_slice_timer);
1663 cfq_start_queueing(cfqd, cfqq); 1663 cfq_start_queueing(cfqd, cfqq);
1664 } 1664 }
1665 } else if (cfq_should_preempt(cfqd, cfqq, crq)) { 1665 } else if (cfq_should_preempt(cfqd, cfqq, crq)) {
1666 /* 1666 /*
1667 * not the active queue - expire current slice if it is 1667 * not the active queue - expire current slice if it is
1668 * idle and has expired it's mean thinktime or this new queue 1668 * idle and has expired it's mean thinktime or this new queue
1669 * has some old slice time left and is of higher priority 1669 * has some old slice time left and is of higher priority
1670 */ 1670 */
1671 cfq_preempt_queue(cfqd, cfqq); 1671 cfq_preempt_queue(cfqd, cfqq);
1672 cfq_mark_cfqq_must_dispatch(cfqq); 1672 cfq_mark_cfqq_must_dispatch(cfqq);
1673 cfq_start_queueing(cfqd, cfqq); 1673 cfq_start_queueing(cfqd, cfqq);
1674 } 1674 }
1675 } 1675 }
1676 1676
1677 static void cfq_insert_request(request_queue_t *q, struct request *rq) 1677 static void cfq_insert_request(request_queue_t *q, struct request *rq)
1678 { 1678 {
1679 struct cfq_data *cfqd = q->elevator->elevator_data; 1679 struct cfq_data *cfqd = q->elevator->elevator_data;
1680 struct cfq_rq *crq = RQ_DATA(rq); 1680 struct cfq_rq *crq = RQ_DATA(rq);
1681 struct cfq_queue *cfqq = crq->cfq_queue; 1681 struct cfq_queue *cfqq = crq->cfq_queue;
1682 1682
1683 cfq_init_prio_data(cfqq); 1683 cfq_init_prio_data(cfqq);
1684 1684
1685 cfq_add_crq_rb(crq); 1685 cfq_add_crq_rb(crq);
1686 1686
1687 list_add_tail(&rq->queuelist, &cfqq->fifo); 1687 list_add_tail(&rq->queuelist, &cfqq->fifo);
1688 1688
1689 if (rq_mergeable(rq)) 1689 if (rq_mergeable(rq))
1690 cfq_add_crq_hash(cfqd, crq); 1690 cfq_add_crq_hash(cfqd, crq);
1691 1691
1692 cfq_crq_enqueued(cfqd, cfqq, crq); 1692 cfq_crq_enqueued(cfqd, cfqq, crq);
1693 } 1693 }
1694 1694
1695 static void cfq_completed_request(request_queue_t *q, struct request *rq) 1695 static void cfq_completed_request(request_queue_t *q, struct request *rq)
1696 { 1696 {
1697 struct cfq_rq *crq = RQ_DATA(rq); 1697 struct cfq_rq *crq = RQ_DATA(rq);
1698 struct cfq_queue *cfqq = crq->cfq_queue; 1698 struct cfq_queue *cfqq = crq->cfq_queue;
1699 struct cfq_data *cfqd = cfqq->cfqd; 1699 struct cfq_data *cfqd = cfqq->cfqd;
1700 const int sync = cfq_crq_is_sync(crq); 1700 const int sync = cfq_crq_is_sync(crq);
1701 unsigned long now; 1701 unsigned long now;
1702 1702
1703 now = jiffies; 1703 now = jiffies;
1704 1704
1705 WARN_ON(!cfqd->rq_in_driver); 1705 WARN_ON(!cfqd->rq_in_driver);
1706 WARN_ON(!cfqq->on_dispatch[sync]); 1706 WARN_ON(!cfqq->on_dispatch[sync]);
1707 cfqd->rq_in_driver--; 1707 cfqd->rq_in_driver--;
1708 cfqq->on_dispatch[sync]--; 1708 cfqq->on_dispatch[sync]--;
1709 1709
1710 if (!cfq_class_idle(cfqq)) 1710 if (!cfq_class_idle(cfqq))
1711 cfqd->last_end_request = now; 1711 cfqd->last_end_request = now;
1712 1712
1713 if (!cfq_cfqq_dispatched(cfqq)) { 1713 if (!cfq_cfqq_dispatched(cfqq)) {
1714 if (cfq_cfqq_on_rr(cfqq)) { 1714 if (cfq_cfqq_on_rr(cfqq)) {
1715 cfqq->service_last = now; 1715 cfqq->service_last = now;
1716 cfq_resort_rr_list(cfqq, 0); 1716 cfq_resort_rr_list(cfqq, 0);
1717 } 1717 }
1718 if (cfq_cfqq_expired(cfqq)) { 1718 if (cfq_cfqq_expired(cfqq)) {
1719 __cfq_slice_expired(cfqd, cfqq, 0); 1719 __cfq_slice_expired(cfqd, cfqq, 0);
1720 cfq_schedule_dispatch(cfqd); 1720 cfq_schedule_dispatch(cfqd);
1721 } 1721 }
1722 } 1722 }
1723 1723
1724 if (cfq_crq_is_sync(crq)) 1724 if (cfq_crq_is_sync(crq))
1725 crq->io_context->last_end_request = now; 1725 crq->io_context->last_end_request = now;
1726 } 1726 }
1727 1727
1728 static struct request * 1728 static struct request *
1729 cfq_former_request(request_queue_t *q, struct request *rq) 1729 cfq_former_request(request_queue_t *q, struct request *rq)
1730 { 1730 {
1731 struct cfq_rq *crq = RQ_DATA(rq); 1731 struct cfq_rq *crq = RQ_DATA(rq);
1732 struct rb_node *rbprev = rb_prev(&crq->rb_node); 1732 struct rb_node *rbprev = rb_prev(&crq->rb_node);
1733 1733
1734 if (rbprev) 1734 if (rbprev)
1735 return rb_entry_crq(rbprev)->request; 1735 return rb_entry_crq(rbprev)->request;
1736 1736
1737 return NULL; 1737 return NULL;
1738 } 1738 }
1739 1739
1740 static struct request * 1740 static struct request *
1741 cfq_latter_request(request_queue_t *q, struct request *rq) 1741 cfq_latter_request(request_queue_t *q, struct request *rq)
1742 { 1742 {
1743 struct cfq_rq *crq = RQ_DATA(rq); 1743 struct cfq_rq *crq = RQ_DATA(rq);
1744 struct rb_node *rbnext = rb_next(&crq->rb_node); 1744 struct rb_node *rbnext = rb_next(&crq->rb_node);
1745 1745
1746 if (rbnext) 1746 if (rbnext)
1747 return rb_entry_crq(rbnext)->request; 1747 return rb_entry_crq(rbnext)->request;
1748 1748
1749 return NULL; 1749 return NULL;
1750 } 1750 }
1751 1751
1752 /* 1752 /*
1753 * we temporarily boost lower priority queues if they are holding fs exclusive 1753 * we temporarily boost lower priority queues if they are holding fs exclusive
1754 * resources. they are boosted to normal prio (CLASS_BE/4) 1754 * resources. they are boosted to normal prio (CLASS_BE/4)
1755 */ 1755 */
1756 static void cfq_prio_boost(struct cfq_queue *cfqq) 1756 static void cfq_prio_boost(struct cfq_queue *cfqq)
1757 { 1757 {
1758 const int ioprio_class = cfqq->ioprio_class; 1758 const int ioprio_class = cfqq->ioprio_class;
1759 const int ioprio = cfqq->ioprio; 1759 const int ioprio = cfqq->ioprio;
1760 1760
1761 if (has_fs_excl()) { 1761 if (has_fs_excl()) {
1762 /* 1762 /*
1763 * boost idle prio on transactions that would lock out other 1763 * boost idle prio on transactions that would lock out other
1764 * users of the filesystem 1764 * users of the filesystem
1765 */ 1765 */
1766 if (cfq_class_idle(cfqq)) 1766 if (cfq_class_idle(cfqq))
1767 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1767 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1768 if (cfqq->ioprio > IOPRIO_NORM) 1768 if (cfqq->ioprio > IOPRIO_NORM)
1769 cfqq->ioprio = IOPRIO_NORM; 1769 cfqq->ioprio = IOPRIO_NORM;
1770 } else { 1770 } else {
1771 /* 1771 /*
1772 * check if we need to unboost the queue 1772 * check if we need to unboost the queue
1773 */ 1773 */
1774 if (cfqq->ioprio_class != cfqq->org_ioprio_class) 1774 if (cfqq->ioprio_class != cfqq->org_ioprio_class)
1775 cfqq->ioprio_class = cfqq->org_ioprio_class; 1775 cfqq->ioprio_class = cfqq->org_ioprio_class;
1776 if (cfqq->ioprio != cfqq->org_ioprio) 1776 if (cfqq->ioprio != cfqq->org_ioprio)
1777 cfqq->ioprio = cfqq->org_ioprio; 1777 cfqq->ioprio = cfqq->org_ioprio;
1778 } 1778 }
1779 1779
1780 /* 1780 /*
1781 * refile between round-robin lists if we moved the priority class 1781 * refile between round-robin lists if we moved the priority class
1782 */ 1782 */
1783 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) && 1783 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) &&
1784 cfq_cfqq_on_rr(cfqq)) 1784 cfq_cfqq_on_rr(cfqq))
1785 cfq_resort_rr_list(cfqq, 0); 1785 cfq_resort_rr_list(cfqq, 0);
1786 } 1786 }
1787 1787
1788 static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) 1788 static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
1789 { 1789 {
1790 if (rw == READ || process_sync(task)) 1790 if (rw == READ || process_sync(task))
1791 return task->pid; 1791 return task->pid;
1792 1792
1793 return CFQ_KEY_ASYNC; 1793 return CFQ_KEY_ASYNC;
1794 } 1794 }
1795 1795
1796 static inline int 1796 static inline int
1797 __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1797 __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1798 struct task_struct *task, int rw) 1798 struct task_struct *task, int rw)
1799 { 1799 {
1800 #if 1 1800 #if 1
1801 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 1801 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
1802 !cfq_cfqq_must_alloc_slice(cfqq)) { 1802 !cfq_cfqq_must_alloc_slice(cfqq)) {
1803 cfq_mark_cfqq_must_alloc_slice(cfqq); 1803 cfq_mark_cfqq_must_alloc_slice(cfqq);
1804 return ELV_MQUEUE_MUST; 1804 return ELV_MQUEUE_MUST;
1805 } 1805 }
1806 1806
1807 return ELV_MQUEUE_MAY; 1807 return ELV_MQUEUE_MAY;
1808 #else 1808 #else
1809 if (!cfqq || task->flags & PF_MEMALLOC) 1809 if (!cfqq || task->flags & PF_MEMALLOC)
1810 return ELV_MQUEUE_MAY; 1810 return ELV_MQUEUE_MAY;
1811 if (!cfqq->allocated[rw] || cfq_cfqq_must_alloc(cfqq)) { 1811 if (!cfqq->allocated[rw] || cfq_cfqq_must_alloc(cfqq)) {
1812 if (cfq_cfqq_wait_request(cfqq)) 1812 if (cfq_cfqq_wait_request(cfqq))
1813 return ELV_MQUEUE_MUST; 1813 return ELV_MQUEUE_MUST;
1814 1814
1815 /* 1815 /*
1816 * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we 1816 * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we
1817 * can quickly flood the queue with writes from a single task 1817 * can quickly flood the queue with writes from a single task
1818 */ 1818 */
1819 if (rw == READ || !cfq_cfqq_must_alloc_slice(cfqq)) { 1819 if (rw == READ || !cfq_cfqq_must_alloc_slice(cfqq)) {
1820 cfq_mark_cfqq_must_alloc_slice(cfqq); 1820 cfq_mark_cfqq_must_alloc_slice(cfqq);
1821 return ELV_MQUEUE_MUST; 1821 return ELV_MQUEUE_MUST;
1822 } 1822 }
1823 1823
1824 return ELV_MQUEUE_MAY; 1824 return ELV_MQUEUE_MAY;
1825 } 1825 }
1826 if (cfq_class_idle(cfqq)) 1826 if (cfq_class_idle(cfqq))
1827 return ELV_MQUEUE_NO; 1827 return ELV_MQUEUE_NO;
1828 if (cfqq->allocated[rw] >= cfqd->max_queued) { 1828 if (cfqq->allocated[rw] >= cfqd->max_queued) {
1829 struct io_context *ioc = get_io_context(GFP_ATOMIC); 1829 struct io_context *ioc = get_io_context(GFP_ATOMIC);
1830 int ret = ELV_MQUEUE_NO; 1830 int ret = ELV_MQUEUE_NO;
1831 1831
1832 if (ioc && ioc->nr_batch_requests) 1832 if (ioc && ioc->nr_batch_requests)
1833 ret = ELV_MQUEUE_MAY; 1833 ret = ELV_MQUEUE_MAY;
1834 1834
1835 put_io_context(ioc); 1835 put_io_context(ioc);
1836 return ret; 1836 return ret;
1837 } 1837 }
1838 1838
1839 return ELV_MQUEUE_MAY; 1839 return ELV_MQUEUE_MAY;
1840 #endif 1840 #endif
1841 } 1841 }
1842 1842
1843 static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) 1843 static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
1844 { 1844 {
1845 struct cfq_data *cfqd = q->elevator->elevator_data; 1845 struct cfq_data *cfqd = q->elevator->elevator_data;
1846 struct task_struct *tsk = current; 1846 struct task_struct *tsk = current;
1847 struct cfq_queue *cfqq; 1847 struct cfq_queue *cfqq;
1848 1848
1849 /* 1849 /*
1850 * don't force setup of a queue from here, as a call to may_queue 1850 * don't force setup of a queue from here, as a call to may_queue
1851 * does not necessarily imply that a request actually will be queued. 1851 * does not necessarily imply that a request actually will be queued.
1852 * so just lookup a possibly existing queue, or return 'may queue' 1852 * so just lookup a possibly existing queue, or return 'may queue'
1853 * if that fails 1853 * if that fails
1854 */ 1854 */
1855 cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio); 1855 cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio);
1856 if (cfqq) { 1856 if (cfqq) {
1857 cfq_init_prio_data(cfqq); 1857 cfq_init_prio_data(cfqq);
1858 cfq_prio_boost(cfqq); 1858 cfq_prio_boost(cfqq);
1859 1859
1860 return __cfq_may_queue(cfqd, cfqq, tsk, rw); 1860 return __cfq_may_queue(cfqd, cfqq, tsk, rw);
1861 } 1861 }
1862 1862
1863 return ELV_MQUEUE_MAY; 1863 return ELV_MQUEUE_MAY;
1864 } 1864 }
1865 1865
1866 static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq) 1866 static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
1867 { 1867 {
1868 struct cfq_data *cfqd = q->elevator->elevator_data; 1868 struct cfq_data *cfqd = q->elevator->elevator_data;
1869 struct request_list *rl = &q->rq; 1869 struct request_list *rl = &q->rq;
1870 1870
1871 if (cfqq->allocated[READ] <= cfqd->max_queued || cfqd->rq_starved) { 1871 if (cfqq->allocated[READ] <= cfqd->max_queued || cfqd->rq_starved) {
1872 smp_mb(); 1872 smp_mb();
1873 if (waitqueue_active(&rl->wait[READ])) 1873 if (waitqueue_active(&rl->wait[READ]))
1874 wake_up(&rl->wait[READ]); 1874 wake_up(&rl->wait[READ]);
1875 } 1875 }
1876 1876
1877 if (cfqq->allocated[WRITE] <= cfqd->max_queued || cfqd->rq_starved) { 1877 if (cfqq->allocated[WRITE] <= cfqd->max_queued || cfqd->rq_starved) {
1878 smp_mb(); 1878 smp_mb();
1879 if (waitqueue_active(&rl->wait[WRITE])) 1879 if (waitqueue_active(&rl->wait[WRITE]))
1880 wake_up(&rl->wait[WRITE]); 1880 wake_up(&rl->wait[WRITE]);
1881 } 1881 }
1882 } 1882 }
1883 1883
1884 /* 1884 /*
1885 * queue lock held here 1885 * queue lock held here
1886 */ 1886 */
1887 static void cfq_put_request(request_queue_t *q, struct request *rq) 1887 static void cfq_put_request(request_queue_t *q, struct request *rq)
1888 { 1888 {
1889 struct cfq_data *cfqd = q->elevator->elevator_data; 1889 struct cfq_data *cfqd = q->elevator->elevator_data;
1890 struct cfq_rq *crq = RQ_DATA(rq); 1890 struct cfq_rq *crq = RQ_DATA(rq);
1891 1891
1892 if (crq) { 1892 if (crq) {
1893 struct cfq_queue *cfqq = crq->cfq_queue; 1893 struct cfq_queue *cfqq = crq->cfq_queue;
1894 const int rw = rq_data_dir(rq); 1894 const int rw = rq_data_dir(rq);
1895 1895
1896 BUG_ON(!cfqq->allocated[rw]); 1896 BUG_ON(!cfqq->allocated[rw]);
1897 cfqq->allocated[rw]--; 1897 cfqq->allocated[rw]--;
1898 1898
1899 put_io_context(crq->io_context->ioc); 1899 put_io_context(crq->io_context->ioc);
1900 1900
1901 mempool_free(crq, cfqd->crq_pool); 1901 mempool_free(crq, cfqd->crq_pool);
1902 rq->elevator_private = NULL; 1902 rq->elevator_private = NULL;
1903 1903
1904 cfq_check_waiters(q, cfqq); 1904 cfq_check_waiters(q, cfqq);
1905 cfq_put_queue(cfqq); 1905 cfq_put_queue(cfqq);
1906 } 1906 }
1907 } 1907 }
1908 1908
1909 /* 1909 /*
1910 * Allocate cfq data structures associated with this request. 1910 * Allocate cfq data structures associated with this request.
1911 */ 1911 */
1912 static int 1912 static int
1913 cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 1913 cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
1914 gfp_t gfp_mask) 1914 gfp_t gfp_mask)
1915 { 1915 {
1916 struct cfq_data *cfqd = q->elevator->elevator_data; 1916 struct cfq_data *cfqd = q->elevator->elevator_data;
1917 struct task_struct *tsk = current; 1917 struct task_struct *tsk = current;
1918 struct cfq_io_context *cic; 1918 struct cfq_io_context *cic;
1919 const int rw = rq_data_dir(rq); 1919 const int rw = rq_data_dir(rq);
1920 pid_t key = cfq_queue_pid(tsk, rw); 1920 pid_t key = cfq_queue_pid(tsk, rw);
1921 struct cfq_queue *cfqq; 1921 struct cfq_queue *cfqq;
1922 struct cfq_rq *crq; 1922 struct cfq_rq *crq;
1923 unsigned long flags; 1923 unsigned long flags;
1924 1924
1925 might_sleep_if(gfp_mask & __GFP_WAIT); 1925 might_sleep_if(gfp_mask & __GFP_WAIT);
1926 1926
1927 cic = cfq_get_io_context(cfqd, key, gfp_mask); 1927 cic = cfq_get_io_context(cfqd, key, gfp_mask);
1928 1928
1929 spin_lock_irqsave(q->queue_lock, flags); 1929 spin_lock_irqsave(q->queue_lock, flags);
1930 1930
1931 if (!cic) 1931 if (!cic)
1932 goto queue_fail; 1932 goto queue_fail;
1933 1933
1934 if (!cic->cfqq) { 1934 if (!cic->cfqq) {
1935 cfqq = cfq_get_queue(cfqd, key, tsk->ioprio, gfp_mask); 1935 cfqq = cfq_get_queue(cfqd, key, tsk->ioprio, gfp_mask);
1936 if (!cfqq) 1936 if (!cfqq)
1937 goto queue_fail; 1937 goto queue_fail;
1938 1938
1939 cic->cfqq = cfqq; 1939 cic->cfqq = cfqq;
1940 } else 1940 } else
1941 cfqq = cic->cfqq; 1941 cfqq = cic->cfqq;
1942 1942
1943 cfqq->allocated[rw]++; 1943 cfqq->allocated[rw]++;
1944 cfq_clear_cfqq_must_alloc(cfqq); 1944 cfq_clear_cfqq_must_alloc(cfqq);
1945 cfqd->rq_starved = 0; 1945 cfqd->rq_starved = 0;
1946 atomic_inc(&cfqq->ref); 1946 atomic_inc(&cfqq->ref);
1947 spin_unlock_irqrestore(q->queue_lock, flags); 1947 spin_unlock_irqrestore(q->queue_lock, flags);
1948 1948
1949 crq = mempool_alloc(cfqd->crq_pool, gfp_mask); 1949 crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
1950 if (crq) { 1950 if (crq) {
1951 RB_CLEAR(&crq->rb_node); 1951 RB_CLEAR(&crq->rb_node);
1952 crq->rb_key = 0; 1952 crq->rb_key = 0;
1953 crq->request = rq; 1953 crq->request = rq;
1954 INIT_HLIST_NODE(&crq->hash); 1954 INIT_HLIST_NODE(&crq->hash);
1955 crq->cfq_queue = cfqq; 1955 crq->cfq_queue = cfqq;
1956 crq->io_context = cic; 1956 crq->io_context = cic;
1957 1957
1958 if (rw == READ || process_sync(tsk)) 1958 if (rw == READ || process_sync(tsk))
1959 cfq_mark_crq_is_sync(crq); 1959 cfq_mark_crq_is_sync(crq);
1960 else 1960 else
1961 cfq_clear_crq_is_sync(crq); 1961 cfq_clear_crq_is_sync(crq);
1962 1962
1963 rq->elevator_private = crq; 1963 rq->elevator_private = crq;
1964 return 0; 1964 return 0;
1965 } 1965 }
1966 1966
1967 spin_lock_irqsave(q->queue_lock, flags); 1967 spin_lock_irqsave(q->queue_lock, flags);
1968 cfqq->allocated[rw]--; 1968 cfqq->allocated[rw]--;
1969 if (!(cfqq->allocated[0] + cfqq->allocated[1])) 1969 if (!(cfqq->allocated[0] + cfqq->allocated[1]))
1970 cfq_mark_cfqq_must_alloc(cfqq); 1970 cfq_mark_cfqq_must_alloc(cfqq);
1971 cfq_put_queue(cfqq); 1971 cfq_put_queue(cfqq);
1972 queue_fail: 1972 queue_fail:
1973 if (cic) 1973 if (cic)
1974 put_io_context(cic->ioc); 1974 put_io_context(cic->ioc);
1975 /* 1975 /*
1976 * mark us rq allocation starved. we need to kickstart the process 1976 * mark us rq allocation starved. we need to kickstart the process
1977 * ourselves if there are no pending requests that can do it for us. 1977 * ourselves if there are no pending requests that can do it for us.
1978 * that would be an extremely rare OOM situation 1978 * that would be an extremely rare OOM situation
1979 */ 1979 */
1980 cfqd->rq_starved = 1; 1980 cfqd->rq_starved = 1;
1981 cfq_schedule_dispatch(cfqd); 1981 cfq_schedule_dispatch(cfqd);
1982 spin_unlock_irqrestore(q->queue_lock, flags); 1982 spin_unlock_irqrestore(q->queue_lock, flags);
1983 return 1; 1983 return 1;
1984 } 1984 }
1985 1985
1986 static void cfq_kick_queue(void *data) 1986 static void cfq_kick_queue(void *data)
1987 { 1987 {
1988 request_queue_t *q = data; 1988 request_queue_t *q = data;
1989 struct cfq_data *cfqd = q->elevator->elevator_data; 1989 struct cfq_data *cfqd = q->elevator->elevator_data;
1990 unsigned long flags; 1990 unsigned long flags;
1991 1991
1992 spin_lock_irqsave(q->queue_lock, flags); 1992 spin_lock_irqsave(q->queue_lock, flags);
1993 1993
1994 if (cfqd->rq_starved) { 1994 if (cfqd->rq_starved) {
1995 struct request_list *rl = &q->rq; 1995 struct request_list *rl = &q->rq;
1996 1996
1997 /* 1997 /*
1998 * we aren't guaranteed to get a request after this, but we 1998 * we aren't guaranteed to get a request after this, but we
1999 * have to be opportunistic 1999 * have to be opportunistic
2000 */ 2000 */
2001 smp_mb(); 2001 smp_mb();
2002 if (waitqueue_active(&rl->wait[READ])) 2002 if (waitqueue_active(&rl->wait[READ]))
2003 wake_up(&rl->wait[READ]); 2003 wake_up(&rl->wait[READ]);
2004 if (waitqueue_active(&rl->wait[WRITE])) 2004 if (waitqueue_active(&rl->wait[WRITE]))
2005 wake_up(&rl->wait[WRITE]); 2005 wake_up(&rl->wait[WRITE]);
2006 } 2006 }
2007 2007
2008 blk_remove_plug(q); 2008 blk_remove_plug(q);
2009 q->request_fn(q); 2009 q->request_fn(q);
2010 spin_unlock_irqrestore(q->queue_lock, flags); 2010 spin_unlock_irqrestore(q->queue_lock, flags);
2011 } 2011 }
2012 2012
2013 /* 2013 /*
2014 * Timer running if the active_queue is currently idling inside its time slice 2014 * Timer running if the active_queue is currently idling inside its time slice
2015 */ 2015 */
2016 static void cfq_idle_slice_timer(unsigned long data) 2016 static void cfq_idle_slice_timer(unsigned long data)
2017 { 2017 {
2018 struct cfq_data *cfqd = (struct cfq_data *) data; 2018 struct cfq_data *cfqd = (struct cfq_data *) data;
2019 struct cfq_queue *cfqq; 2019 struct cfq_queue *cfqq;
2020 unsigned long flags; 2020 unsigned long flags;
2021 2021
2022 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2022 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2023 2023
2024 if ((cfqq = cfqd->active_queue) != NULL) { 2024 if ((cfqq = cfqd->active_queue) != NULL) {
2025 unsigned long now = jiffies; 2025 unsigned long now = jiffies;
2026 2026
2027 /* 2027 /*
2028 * expired 2028 * expired
2029 */ 2029 */
2030 if (time_after(now, cfqq->slice_end)) 2030 if (time_after(now, cfqq->slice_end))
2031 goto expire; 2031 goto expire;
2032 2032
2033 /* 2033 /*
2034 * only expire and reinvoke request handler, if there are 2034 * only expire and reinvoke request handler, if there are
2035 * other queues with pending requests 2035 * other queues with pending requests
2036 */ 2036 */
2037 if (!cfqd->busy_queues) { 2037 if (!cfqd->busy_queues) {
2038 cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end); 2038 cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
2039 add_timer(&cfqd->idle_slice_timer); 2039 add_timer(&cfqd->idle_slice_timer);
2040 goto out_cont; 2040 goto out_cont;
2041 } 2041 }
2042 2042
2043 /* 2043 /*
2044 * not expired and it has a request pending, let it dispatch 2044 * not expired and it has a request pending, let it dispatch
2045 */ 2045 */
2046 if (!RB_EMPTY(&cfqq->sort_list)) { 2046 if (!RB_EMPTY(&cfqq->sort_list)) {
2047 cfq_mark_cfqq_must_dispatch(cfqq); 2047 cfq_mark_cfqq_must_dispatch(cfqq);
2048 goto out_kick; 2048 goto out_kick;
2049 } 2049 }
2050 } 2050 }
2051 expire: 2051 expire:
2052 cfq_slice_expired(cfqd, 0); 2052 cfq_slice_expired(cfqd, 0);
2053 out_kick: 2053 out_kick:
2054 cfq_schedule_dispatch(cfqd); 2054 cfq_schedule_dispatch(cfqd);
2055 out_cont: 2055 out_cont:
2056 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2056 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2057 } 2057 }
2058 2058
2059 /* 2059 /*
2060 * Timer running if an idle class queue is waiting for service 2060 * Timer running if an idle class queue is waiting for service
2061 */ 2061 */
2062 static void cfq_idle_class_timer(unsigned long data) 2062 static void cfq_idle_class_timer(unsigned long data)
2063 { 2063 {
2064 struct cfq_data *cfqd = (struct cfq_data *) data; 2064 struct cfq_data *cfqd = (struct cfq_data *) data;
2065 unsigned long flags, end; 2065 unsigned long flags, end;
2066 2066
2067 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2067 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2068 2068
2069 /* 2069 /*
2070 * race with a non-idle queue, reset timer 2070 * race with a non-idle queue, reset timer
2071 */ 2071 */
2072 end = cfqd->last_end_request + CFQ_IDLE_GRACE; 2072 end = cfqd->last_end_request + CFQ_IDLE_GRACE;
2073 if (!time_after_eq(jiffies, end)) { 2073 if (!time_after_eq(jiffies, end)) {
2074 cfqd->idle_class_timer.expires = end; 2074 cfqd->idle_class_timer.expires = end;
2075 add_timer(&cfqd->idle_class_timer); 2075 add_timer(&cfqd->idle_class_timer);
2076 } else 2076 } else
2077 cfq_schedule_dispatch(cfqd); 2077 cfq_schedule_dispatch(cfqd);
2078 2078
2079 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2079 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2080 } 2080 }
2081 2081
2082 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 2082 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
2083 { 2083 {
2084 del_timer_sync(&cfqd->idle_slice_timer); 2084 del_timer_sync(&cfqd->idle_slice_timer);
2085 del_timer_sync(&cfqd->idle_class_timer); 2085 del_timer_sync(&cfqd->idle_class_timer);
2086 blk_sync_queue(cfqd->queue); 2086 blk_sync_queue(cfqd->queue);
2087 } 2087 }
2088 2088
2089 static void cfq_put_cfqd(struct cfq_data *cfqd) 2089 static void cfq_put_cfqd(struct cfq_data *cfqd)
2090 { 2090 {
2091 request_queue_t *q = cfqd->queue; 2091 request_queue_t *q = cfqd->queue;
2092 2092
2093 if (!atomic_dec_and_test(&cfqd->ref)) 2093 if (!atomic_dec_and_test(&cfqd->ref))
2094 return; 2094 return;
2095 2095
2096 cfq_shutdown_timer_wq(cfqd); 2096 cfq_shutdown_timer_wq(cfqd);
2097 blk_put_queue(q); 2097 blk_put_queue(q);
2098 2098
2099 mempool_destroy(cfqd->crq_pool); 2099 mempool_destroy(cfqd->crq_pool);
2100 kfree(cfqd->crq_hash); 2100 kfree(cfqd->crq_hash);
2101 kfree(cfqd->cfq_hash); 2101 kfree(cfqd->cfq_hash);
2102 kfree(cfqd); 2102 kfree(cfqd);
2103 } 2103 }
2104 2104
2105 static void cfq_exit_queue(elevator_t *e) 2105 static void cfq_exit_queue(elevator_t *e)
2106 { 2106 {
2107 struct cfq_data *cfqd = e->elevator_data; 2107 struct cfq_data *cfqd = e->elevator_data;
2108 2108
2109 cfq_shutdown_timer_wq(cfqd); 2109 cfq_shutdown_timer_wq(cfqd);
2110 cfq_put_cfqd(cfqd); 2110 cfq_put_cfqd(cfqd);
2111 } 2111 }
2112 2112
2113 static int cfq_init_queue(request_queue_t *q, elevator_t *e) 2113 static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2114 { 2114 {
2115 struct cfq_data *cfqd; 2115 struct cfq_data *cfqd;
2116 int i; 2116 int i;
2117 2117
2118 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 2118 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
2119 if (!cfqd) 2119 if (!cfqd)
2120 return -ENOMEM; 2120 return -ENOMEM;
2121 2121
2122 memset(cfqd, 0, sizeof(*cfqd)); 2122 memset(cfqd, 0, sizeof(*cfqd));
2123 2123
2124 for (i = 0; i < CFQ_PRIO_LISTS; i++) 2124 for (i = 0; i < CFQ_PRIO_LISTS; i++)
2125 INIT_LIST_HEAD(&cfqd->rr_list[i]); 2125 INIT_LIST_HEAD(&cfqd->rr_list[i]);
2126 2126
2127 INIT_LIST_HEAD(&cfqd->busy_rr); 2127 INIT_LIST_HEAD(&cfqd->busy_rr);
2128 INIT_LIST_HEAD(&cfqd->cur_rr); 2128 INIT_LIST_HEAD(&cfqd->cur_rr);
2129 INIT_LIST_HEAD(&cfqd->idle_rr); 2129 INIT_LIST_HEAD(&cfqd->idle_rr);
2130 INIT_LIST_HEAD(&cfqd->empty_list); 2130 INIT_LIST_HEAD(&cfqd->empty_list);
2131 2131
2132 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); 2132 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
2133 if (!cfqd->crq_hash) 2133 if (!cfqd->crq_hash)
2134 goto out_crqhash; 2134 goto out_crqhash;
2135 2135
2136 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); 2136 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
2137 if (!cfqd->cfq_hash) 2137 if (!cfqd->cfq_hash)
2138 goto out_cfqhash; 2138 goto out_cfqhash;
2139 2139
2140 cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool); 2140 cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
2141 if (!cfqd->crq_pool) 2141 if (!cfqd->crq_pool)
2142 goto out_crqpool; 2142 goto out_crqpool;
2143 2143
2144 for (i = 0; i < CFQ_MHASH_ENTRIES; i++) 2144 for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
2145 INIT_HLIST_HEAD(&cfqd->crq_hash[i]); 2145 INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
2146 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 2146 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2147 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 2147 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2148 2148
2149 e->elevator_data = cfqd; 2149 e->elevator_data = cfqd;
2150 2150
2151 cfqd->queue = q; 2151 cfqd->queue = q;
2152 atomic_inc(&q->refcnt); 2152 atomic_inc(&q->refcnt);
2153 2153
2154 cfqd->max_queued = q->nr_requests / 4; 2154 cfqd->max_queued = q->nr_requests / 4;
2155 q->nr_batching = cfq_queued; 2155 q->nr_batching = cfq_queued;
2156 2156
2157 init_timer(&cfqd->idle_slice_timer); 2157 init_timer(&cfqd->idle_slice_timer);
2158 cfqd->idle_slice_timer.function = cfq_idle_slice_timer; 2158 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
2159 cfqd->idle_slice_timer.data = (unsigned long) cfqd; 2159 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
2160 2160
2161 init_timer(&cfqd->idle_class_timer); 2161 init_timer(&cfqd->idle_class_timer);
2162 cfqd->idle_class_timer.function = cfq_idle_class_timer; 2162 cfqd->idle_class_timer.function = cfq_idle_class_timer;
2163 cfqd->idle_class_timer.data = (unsigned long) cfqd; 2163 cfqd->idle_class_timer.data = (unsigned long) cfqd;
2164 2164
2165 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); 2165 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
2166 2166
2167 atomic_set(&cfqd->ref, 1); 2167 atomic_set(&cfqd->ref, 1);
2168 2168
2169 cfqd->cfq_queued = cfq_queued; 2169 cfqd->cfq_queued = cfq_queued;
2170 cfqd->cfq_quantum = cfq_quantum; 2170 cfqd->cfq_quantum = cfq_quantum;
2171 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 2171 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
2172 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 2172 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
2173 cfqd->cfq_back_max = cfq_back_max; 2173 cfqd->cfq_back_max = cfq_back_max;
2174 cfqd->cfq_back_penalty = cfq_back_penalty; 2174 cfqd->cfq_back_penalty = cfq_back_penalty;
2175 cfqd->cfq_slice[0] = cfq_slice_async; 2175 cfqd->cfq_slice[0] = cfq_slice_async;
2176 cfqd->cfq_slice[1] = cfq_slice_sync; 2176 cfqd->cfq_slice[1] = cfq_slice_sync;
2177 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2177 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2178 cfqd->cfq_slice_idle = cfq_slice_idle; 2178 cfqd->cfq_slice_idle = cfq_slice_idle;
2179 cfqd->cfq_max_depth = cfq_max_depth; 2179 cfqd->cfq_max_depth = cfq_max_depth;
2180 2180
2181 return 0; 2181 return 0;
2182 out_crqpool: 2182 out_crqpool:
2183 kfree(cfqd->cfq_hash); 2183 kfree(cfqd->cfq_hash);
2184 out_cfqhash: 2184 out_cfqhash:
2185 kfree(cfqd->crq_hash); 2185 kfree(cfqd->crq_hash);
2186 out_crqhash: 2186 out_crqhash:
2187 kfree(cfqd); 2187 kfree(cfqd);
2188 return -ENOMEM; 2188 return -ENOMEM;
2189 } 2189 }
2190 2190
2191 static void cfq_slab_kill(void) 2191 static void cfq_slab_kill(void)
2192 { 2192 {
2193 if (crq_pool) 2193 if (crq_pool)
2194 kmem_cache_destroy(crq_pool); 2194 kmem_cache_destroy(crq_pool);
2195 if (cfq_pool) 2195 if (cfq_pool)
2196 kmem_cache_destroy(cfq_pool); 2196 kmem_cache_destroy(cfq_pool);
2197 if (cfq_ioc_pool) 2197 if (cfq_ioc_pool)
2198 kmem_cache_destroy(cfq_ioc_pool); 2198 kmem_cache_destroy(cfq_ioc_pool);
2199 } 2199 }
2200 2200
2201 static int __init cfq_slab_setup(void) 2201 static int __init cfq_slab_setup(void)
2202 { 2202 {
2203 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, 2203 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
2204 NULL, NULL); 2204 NULL, NULL);
2205 if (!crq_pool) 2205 if (!crq_pool)
2206 goto fail; 2206 goto fail;
2207 2207
2208 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, 2208 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
2209 NULL, NULL); 2209 NULL, NULL);
2210 if (!cfq_pool) 2210 if (!cfq_pool)
2211 goto fail; 2211 goto fail;
2212 2212
2213 cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool", 2213 cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool",
2214 sizeof(struct cfq_io_context), 0, 0, NULL, NULL); 2214 sizeof(struct cfq_io_context), 0, 0, NULL, NULL);
2215 if (!cfq_ioc_pool) 2215 if (!cfq_ioc_pool)
2216 goto fail; 2216 goto fail;
2217 2217
2218 return 0; 2218 return 0;
2219 fail: 2219 fail:
2220 cfq_slab_kill(); 2220 cfq_slab_kill();
2221 return -ENOMEM; 2221 return -ENOMEM;
2222 } 2222 }
2223 2223
2224 /* 2224 /*
2225 * sysfs parts below --> 2225 * sysfs parts below -->
2226 */ 2226 */
2227 struct cfq_fs_entry { 2227 struct cfq_fs_entry {
2228 struct attribute attr; 2228 struct attribute attr;
2229 ssize_t (*show)(struct cfq_data *, char *); 2229 ssize_t (*show)(struct cfq_data *, char *);
2230 ssize_t (*store)(struct cfq_data *, const char *, size_t); 2230 ssize_t (*store)(struct cfq_data *, const char *, size_t);
2231 }; 2231 };
2232 2232
2233 static ssize_t 2233 static ssize_t
2234 cfq_var_show(unsigned int var, char *page) 2234 cfq_var_show(unsigned int var, char *page)
2235 { 2235 {
2236 return sprintf(page, "%d\n", var); 2236 return sprintf(page, "%d\n", var);
2237 } 2237 }
2238 2238
2239 static ssize_t 2239 static ssize_t
2240 cfq_var_store(unsigned int *var, const char *page, size_t count) 2240 cfq_var_store(unsigned int *var, const char *page, size_t count)
2241 { 2241 {
2242 char *p = (char *) page; 2242 char *p = (char *) page;
2243 2243
2244 *var = simple_strtoul(p, &p, 10); 2244 *var = simple_strtoul(p, &p, 10);
2245 return count; 2245 return count;
2246 } 2246 }
2247 2247
2248 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 2248 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
2249 static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \ 2249 static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \
2250 { \ 2250 { \
2251 unsigned int __data = __VAR; \ 2251 unsigned int __data = __VAR; \
2252 if (__CONV) \ 2252 if (__CONV) \
2253 __data = jiffies_to_msecs(__data); \ 2253 __data = jiffies_to_msecs(__data); \
2254 return cfq_var_show(__data, (page)); \ 2254 return cfq_var_show(__data, (page)); \
2255 } 2255 }
2256 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 2256 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
2257 SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); 2257 SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
2258 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 2258 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
2259 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 2259 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
2260 SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0); 2260 SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);
2261 SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0); 2261 SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);
2262 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); 2262 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
2263 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 2263 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
2264 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 2264 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
2265 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 2265 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
2266 SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0); 2266 SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
2267 #undef SHOW_FUNCTION 2267 #undef SHOW_FUNCTION
2268 2268
2269 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 2269 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
2270 static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \ 2270 static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \
2271 { \ 2271 { \
2272 unsigned int __data; \ 2272 unsigned int __data; \
2273 int ret = cfq_var_store(&__data, (page), count); \ 2273 int ret = cfq_var_store(&__data, (page), count); \
2274 if (__data < (MIN)) \ 2274 if (__data < (MIN)) \
2275 __data = (MIN); \ 2275 __data = (MIN); \
2276 else if (__data > (MAX)) \ 2276 else if (__data > (MAX)) \
2277 __data = (MAX); \ 2277 __data = (MAX); \
2278 if (__CONV) \ 2278 if (__CONV) \
2279 *(__PTR) = msecs_to_jiffies(__data); \ 2279 *(__PTR) = msecs_to_jiffies(__data); \
2280 else \ 2280 else \
2281 *(__PTR) = __data; \ 2281 *(__PTR) = __data; \
2282 return ret; \ 2282 return ret; \
2283 } 2283 }
2284 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 2284 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
2285 STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); 2285 STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
2286 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); 2286 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
2287 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); 2287 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
2288 STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 2288 STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
2289 STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); 2289 STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
2290 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); 2290 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
2291 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 2291 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
2292 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 2292 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
2293 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); 2293 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
2294 STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0); 2294 STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0);
2295 #undef STORE_FUNCTION 2295 #undef STORE_FUNCTION
2296 2296
2297 static struct cfq_fs_entry cfq_quantum_entry = { 2297 static struct cfq_fs_entry cfq_quantum_entry = {
2298 .attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR }, 2298 .attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR },
2299 .show = cfq_quantum_show, 2299 .show = cfq_quantum_show,
2300 .store = cfq_quantum_store, 2300 .store = cfq_quantum_store,
2301 }; 2301 };
2302 static struct cfq_fs_entry cfq_queued_entry = { 2302 static struct cfq_fs_entry cfq_queued_entry = {
2303 .attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR }, 2303 .attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR },
2304 .show = cfq_queued_show, 2304 .show = cfq_queued_show,
2305 .store = cfq_queued_store, 2305 .store = cfq_queued_store,
2306 }; 2306 };
2307 static struct cfq_fs_entry cfq_fifo_expire_sync_entry = { 2307 static struct cfq_fs_entry cfq_fifo_expire_sync_entry = {
2308 .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR }, 2308 .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR },
2309 .show = cfq_fifo_expire_sync_show, 2309 .show = cfq_fifo_expire_sync_show,
2310 .store = cfq_fifo_expire_sync_store, 2310 .store = cfq_fifo_expire_sync_store,
2311 }; 2311 };
2312 static struct cfq_fs_entry cfq_fifo_expire_async_entry = { 2312 static struct cfq_fs_entry cfq_fifo_expire_async_entry = {
2313 .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR }, 2313 .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR },
2314 .show = cfq_fifo_expire_async_show, 2314 .show = cfq_fifo_expire_async_show,
2315 .store = cfq_fifo_expire_async_store, 2315 .store = cfq_fifo_expire_async_store,
2316 }; 2316 };
2317 static struct cfq_fs_entry cfq_back_max_entry = { 2317 static struct cfq_fs_entry cfq_back_max_entry = {
2318 .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR }, 2318 .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR },
2319 .show = cfq_back_max_show, 2319 .show = cfq_back_max_show,
2320 .store = cfq_back_max_store, 2320 .store = cfq_back_max_store,
2321 }; 2321 };
2322 static struct cfq_fs_entry cfq_back_penalty_entry = { 2322 static struct cfq_fs_entry cfq_back_penalty_entry = {
2323 .attr = {.name = "back_seek_penalty", .mode = S_IRUGO | S_IWUSR }, 2323 .attr = {.name = "back_seek_penalty", .mode = S_IRUGO | S_IWUSR },
2324 .show = cfq_back_penalty_show, 2324 .show = cfq_back_penalty_show,
2325 .store = cfq_back_penalty_store, 2325 .store = cfq_back_penalty_store,
2326 }; 2326 };
2327 static struct cfq_fs_entry cfq_slice_sync_entry = { 2327 static struct cfq_fs_entry cfq_slice_sync_entry = {
2328 .attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR }, 2328 .attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR },
2329 .show = cfq_slice_sync_show, 2329 .show = cfq_slice_sync_show,
2330 .store = cfq_slice_sync_store, 2330 .store = cfq_slice_sync_store,
2331 }; 2331 };
2332 static struct cfq_fs_entry cfq_slice_async_entry = { 2332 static struct cfq_fs_entry cfq_slice_async_entry = {
2333 .attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR }, 2333 .attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR },
2334 .show = cfq_slice_async_show, 2334 .show = cfq_slice_async_show,
2335 .store = cfq_slice_async_store, 2335 .store = cfq_slice_async_store,
2336 }; 2336 };
2337 static struct cfq_fs_entry cfq_slice_async_rq_entry = { 2337 static struct cfq_fs_entry cfq_slice_async_rq_entry = {
2338 .attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR }, 2338 .attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR },
2339 .show = cfq_slice_async_rq_show, 2339 .show = cfq_slice_async_rq_show,
2340 .store = cfq_slice_async_rq_store, 2340 .store = cfq_slice_async_rq_store,
2341 }; 2341 };
2342 static struct cfq_fs_entry cfq_slice_idle_entry = { 2342 static struct cfq_fs_entry cfq_slice_idle_entry = {
2343 .attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR }, 2343 .attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR },
2344 .show = cfq_slice_idle_show, 2344 .show = cfq_slice_idle_show,
2345 .store = cfq_slice_idle_store, 2345 .store = cfq_slice_idle_store,
2346 }; 2346 };
2347 static struct cfq_fs_entry cfq_max_depth_entry = { 2347 static struct cfq_fs_entry cfq_max_depth_entry = {
2348 .attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR }, 2348 .attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR },
2349 .show = cfq_max_depth_show, 2349 .show = cfq_max_depth_show,
2350 .store = cfq_max_depth_store, 2350 .store = cfq_max_depth_store,
2351 }; 2351 };
2352 2352
2353 static struct attribute *default_attrs[] = { 2353 static struct attribute *default_attrs[] = {
2354 &cfq_quantum_entry.attr, 2354 &cfq_quantum_entry.attr,
2355 &cfq_queued_entry.attr, 2355 &cfq_queued_entry.attr,
2356 &cfq_fifo_expire_sync_entry.attr, 2356 &cfq_fifo_expire_sync_entry.attr,
2357 &cfq_fifo_expire_async_entry.attr, 2357 &cfq_fifo_expire_async_entry.attr,
2358 &cfq_back_max_entry.attr, 2358 &cfq_back_max_entry.attr,
2359 &cfq_back_penalty_entry.attr, 2359 &cfq_back_penalty_entry.attr,
2360 &cfq_slice_sync_entry.attr, 2360 &cfq_slice_sync_entry.attr,
2361 &cfq_slice_async_entry.attr, 2361 &cfq_slice_async_entry.attr,
2362 &cfq_slice_async_rq_entry.attr, 2362 &cfq_slice_async_rq_entry.attr,
2363 &cfq_slice_idle_entry.attr, 2363 &cfq_slice_idle_entry.attr,
2364 &cfq_max_depth_entry.attr, 2364 &cfq_max_depth_entry.attr,
2365 NULL, 2365 NULL,
2366 }; 2366 };
2367 2367
2368 #define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr) 2368 #define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr)
2369 2369
2370 static ssize_t 2370 static ssize_t
2371 cfq_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 2371 cfq_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2372 { 2372 {
2373 elevator_t *e = container_of(kobj, elevator_t, kobj); 2373 elevator_t *e = container_of(kobj, elevator_t, kobj);
2374 struct cfq_fs_entry *entry = to_cfq(attr); 2374 struct cfq_fs_entry *entry = to_cfq(attr);
2375 2375
2376 if (!entry->show) 2376 if (!entry->show)
2377 return -EIO; 2377 return -EIO;
2378 2378
2379 return entry->show(e->elevator_data, page); 2379 return entry->show(e->elevator_data, page);
2380 } 2380 }
2381 2381
2382 static ssize_t 2382 static ssize_t
2383 cfq_attr_store(struct kobject *kobj, struct attribute *attr, 2383 cfq_attr_store(struct kobject *kobj, struct attribute *attr,
2384 const char *page, size_t length) 2384 const char *page, size_t length)
2385 { 2385 {
2386 elevator_t *e = container_of(kobj, elevator_t, kobj); 2386 elevator_t *e = container_of(kobj, elevator_t, kobj);
2387 struct cfq_fs_entry *entry = to_cfq(attr); 2387 struct cfq_fs_entry *entry = to_cfq(attr);
2388 2388
2389 if (!entry->store) 2389 if (!entry->store)
2390 return -EIO; 2390 return -EIO;
2391 2391
2392 return entry->store(e->elevator_data, page, length); 2392 return entry->store(e->elevator_data, page, length);
2393 } 2393 }
2394 2394
2395 static struct sysfs_ops cfq_sysfs_ops = { 2395 static struct sysfs_ops cfq_sysfs_ops = {
2396 .show = cfq_attr_show, 2396 .show = cfq_attr_show,
2397 .store = cfq_attr_store, 2397 .store = cfq_attr_store,
2398 }; 2398 };
2399 2399
2400 static struct kobj_type cfq_ktype = { 2400 static struct kobj_type cfq_ktype = {
2401 .sysfs_ops = &cfq_sysfs_ops, 2401 .sysfs_ops = &cfq_sysfs_ops,
2402 .default_attrs = default_attrs, 2402 .default_attrs = default_attrs,
2403 }; 2403 };
2404 2404
2405 static struct elevator_type iosched_cfq = { 2405 static struct elevator_type iosched_cfq = {
2406 .ops = { 2406 .ops = {
2407 .elevator_merge_fn = cfq_merge, 2407 .elevator_merge_fn = cfq_merge,
2408 .elevator_merged_fn = cfq_merged_request, 2408 .elevator_merged_fn = cfq_merged_request,
2409 .elevator_merge_req_fn = cfq_merged_requests, 2409 .elevator_merge_req_fn = cfq_merged_requests,
2410 .elevator_dispatch_fn = cfq_dispatch_requests, 2410 .elevator_dispatch_fn = cfq_dispatch_requests,
2411 .elevator_add_req_fn = cfq_insert_request, 2411 .elevator_add_req_fn = cfq_insert_request,
2412 .elevator_activate_req_fn = cfq_activate_request, 2412 .elevator_activate_req_fn = cfq_activate_request,
2413 .elevator_deactivate_req_fn = cfq_deactivate_request, 2413 .elevator_deactivate_req_fn = cfq_deactivate_request,
2414 .elevator_queue_empty_fn = cfq_queue_empty, 2414 .elevator_queue_empty_fn = cfq_queue_empty,
2415 .elevator_completed_req_fn = cfq_completed_request, 2415 .elevator_completed_req_fn = cfq_completed_request,
2416 .elevator_former_req_fn = cfq_former_request, 2416 .elevator_former_req_fn = cfq_former_request,
2417 .elevator_latter_req_fn = cfq_latter_request, 2417 .elevator_latter_req_fn = cfq_latter_request,
2418 .elevator_set_req_fn = cfq_set_request, 2418 .elevator_set_req_fn = cfq_set_request,
2419 .elevator_put_req_fn = cfq_put_request, 2419 .elevator_put_req_fn = cfq_put_request,
2420 .elevator_may_queue_fn = cfq_may_queue, 2420 .elevator_may_queue_fn = cfq_may_queue,
2421 .elevator_init_fn = cfq_init_queue, 2421 .elevator_init_fn = cfq_init_queue,
2422 .elevator_exit_fn = cfq_exit_queue, 2422 .elevator_exit_fn = cfq_exit_queue,
2423 }, 2423 },
2424 .elevator_ktype = &cfq_ktype, 2424 .elevator_ktype = &cfq_ktype,
2425 .elevator_name = "cfq", 2425 .elevator_name = "cfq",
2426 .elevator_owner = THIS_MODULE, 2426 .elevator_owner = THIS_MODULE,
2427 }; 2427 };
2428 2428
2429 static int __init cfq_init(void) 2429 static int __init cfq_init(void)
2430 { 2430 {
2431 int ret; 2431 int ret;
2432 2432
2433 /* 2433 /*
2434 * could be 0 on HZ < 1000 setups 2434 * could be 0 on HZ < 1000 setups
2435 */ 2435 */
2436 if (!cfq_slice_async) 2436 if (!cfq_slice_async)
2437 cfq_slice_async = 1; 2437 cfq_slice_async = 1;
2438 if (!cfq_slice_idle) 2438 if (!cfq_slice_idle)
2439 cfq_slice_idle = 1; 2439 cfq_slice_idle = 1;
2440 2440
2441 if (cfq_slab_setup()) 2441 if (cfq_slab_setup())
2442 return -ENOMEM; 2442 return -ENOMEM;
2443 2443
2444 ret = elv_register(&iosched_cfq); 2444 ret = elv_register(&iosched_cfq);
2445 if (ret) 2445 if (ret)
2446 cfq_slab_kill(); 2446 cfq_slab_kill();
2447 2447
2448 return ret; 2448 return ret;
2449 } 2449 }
2450 2450
2451 static void __exit cfq_exit(void) 2451 static void __exit cfq_exit(void)
2452 { 2452 {
2453 elv_unregister(&iosched_cfq); 2453 elv_unregister(&iosched_cfq);
2454 cfq_slab_kill(); 2454 cfq_slab_kill();
2455 } 2455 }
2456 2456
2457 module_init(cfq_init); 2457 module_init(cfq_init);
2458 module_exit(cfq_exit); 2458 module_exit(cfq_exit);
2459 2459
2460 MODULE_AUTHOR("Jens Axboe"); 2460 MODULE_AUTHOR("Jens Axboe");
2461 MODULE_LICENSE("GPL"); 2461 MODULE_LICENSE("GPL");
2462 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); 2462 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");
2463 2463
block/deadline-iosched.c
1 /* 1 /*
2 * Deadline i/o scheduler. 2 * Deadline i/o scheduler.
3 * 3 *
4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
5 */ 5 */
6 #include <linux/kernel.h> 6 #include <linux/kernel.h>
7 #include <linux/fs.h> 7 #include <linux/fs.h>
8 #include <linux/blkdev.h> 8 #include <linux/blkdev.h>
9 #include <linux/elevator.h> 9 #include <linux/elevator.h>
10 #include <linux/bio.h> 10 #include <linux/bio.h>
11 #include <linux/config.h> 11 #include <linux/config.h>
12 #include <linux/module.h> 12 #include <linux/module.h>
13 #include <linux/slab.h> 13 #include <linux/slab.h>
14 #include <linux/init.h> 14 #include <linux/init.h>
15 #include <linux/compiler.h> 15 #include <linux/compiler.h>
16 #include <linux/hash.h> 16 #include <linux/hash.h>
17 #include <linux/rbtree.h> 17 #include <linux/rbtree.h>
18 18
19 /* 19 /*
20 * See Documentation/block/deadline-iosched.txt 20 * See Documentation/block/deadline-iosched.txt
21 */ 21 */
22 static int read_expire = HZ / 2; /* max time before a read is submitted. */ 22 static const int read_expire = HZ / 2; /* max time before a read is submitted. */
23 static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ 23 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
24 static int writes_starved = 2; /* max times reads can starve a write */ 24 static const int writes_starved = 2; /* max times reads can starve a write */
25 static int fifo_batch = 16; /* # of sequential requests treated as one 25 static const int fifo_batch = 16; /* # of sequential requests treated as one
26 by the above parameters. For throughput. */ 26 by the above parameters. For throughput. */
27 27
28 static const int deadline_hash_shift = 5; 28 static const int deadline_hash_shift = 5;
29 #define DL_HASH_BLOCK(sec) ((sec) >> 3) 29 #define DL_HASH_BLOCK(sec) ((sec) >> 3)
30 #define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift)) 30 #define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
31 #define DL_HASH_ENTRIES (1 << deadline_hash_shift) 31 #define DL_HASH_ENTRIES (1 << deadline_hash_shift)
32 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 32 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
33 #define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash) 33 #define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash)
34 #define ON_HASH(drq) (drq)->on_hash 34 #define ON_HASH(drq) (drq)->on_hash
35 35
36 struct deadline_data { 36 struct deadline_data {
37 /* 37 /*
38 * run time data 38 * run time data
39 */ 39 */
40 40
41 /* 41 /*
42 * requests (deadline_rq s) are present on both sort_list and fifo_list 42 * requests (deadline_rq s) are present on both sort_list and fifo_list
43 */ 43 */
44 struct rb_root sort_list[2]; 44 struct rb_root sort_list[2];
45 struct list_head fifo_list[2]; 45 struct list_head fifo_list[2];
46 46
47 /* 47 /*
48 * next in sort order. read, write or both are NULL 48 * next in sort order. read, write or both are NULL
49 */ 49 */
50 struct deadline_rq *next_drq[2]; 50 struct deadline_rq *next_drq[2];
51 struct list_head *hash; /* request hash */ 51 struct list_head *hash; /* request hash */
52 unsigned int batching; /* number of sequential requests made */ 52 unsigned int batching; /* number of sequential requests made */
53 sector_t last_sector; /* head position */ 53 sector_t last_sector; /* head position */
54 unsigned int starved; /* times reads have starved writes */ 54 unsigned int starved; /* times reads have starved writes */
55 55
56 /* 56 /*
57 * settings that change how the i/o scheduler behaves 57 * settings that change how the i/o scheduler behaves
58 */ 58 */
59 int fifo_expire[2]; 59 int fifo_expire[2];
60 int fifo_batch; 60 int fifo_batch;
61 int writes_starved; 61 int writes_starved;
62 int front_merges; 62 int front_merges;
63 63
64 mempool_t *drq_pool; 64 mempool_t *drq_pool;
65 }; 65 };
66 66
67 /* 67 /*
68 * pre-request data. 68 * pre-request data.
69 */ 69 */
70 struct deadline_rq { 70 struct deadline_rq {
71 /* 71 /*
72 * rbtree index, key is the starting offset 72 * rbtree index, key is the starting offset
73 */ 73 */
74 struct rb_node rb_node; 74 struct rb_node rb_node;
75 sector_t rb_key; 75 sector_t rb_key;
76 76
77 struct request *request; 77 struct request *request;
78 78
79 /* 79 /*
80 * request hash, key is the ending offset (for back merge lookup) 80 * request hash, key is the ending offset (for back merge lookup)
81 */ 81 */
82 struct list_head hash; 82 struct list_head hash;
83 char on_hash; 83 char on_hash;
84 84
85 /* 85 /*
86 * expire fifo 86 * expire fifo
87 */ 87 */
88 struct list_head fifo; 88 struct list_head fifo;
89 unsigned long expires; 89 unsigned long expires;
90 }; 90 };
91 91
92 static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq); 92 static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
93 93
94 static kmem_cache_t *drq_pool; 94 static kmem_cache_t *drq_pool;
95 95
96 #define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private) 96 #define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
97 97
98 /* 98 /*
99 * the back merge hash support functions 99 * the back merge hash support functions
100 */ 100 */
101 static inline void __deadline_del_drq_hash(struct deadline_rq *drq) 101 static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
102 { 102 {
103 drq->on_hash = 0; 103 drq->on_hash = 0;
104 list_del_init(&drq->hash); 104 list_del_init(&drq->hash);
105 } 105 }
106 106
107 static inline void deadline_del_drq_hash(struct deadline_rq *drq) 107 static inline void deadline_del_drq_hash(struct deadline_rq *drq)
108 { 108 {
109 if (ON_HASH(drq)) 109 if (ON_HASH(drq))
110 __deadline_del_drq_hash(drq); 110 __deadline_del_drq_hash(drq);
111 } 111 }
112 112
113 static inline void 113 static inline void
114 deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) 114 deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
115 { 115 {
116 struct request *rq = drq->request; 116 struct request *rq = drq->request;
117 117
118 BUG_ON(ON_HASH(drq)); 118 BUG_ON(ON_HASH(drq));
119 119
120 drq->on_hash = 1; 120 drq->on_hash = 1;
121 list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]); 121 list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
122 } 122 }
123 123
124 /* 124 /*
125 * move hot entry to front of chain 125 * move hot entry to front of chain
126 */ 126 */
127 static inline void 127 static inline void
128 deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) 128 deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
129 { 129 {
130 struct request *rq = drq->request; 130 struct request *rq = drq->request;
131 struct list_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))]; 131 struct list_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
132 132
133 if (ON_HASH(drq) && drq->hash.prev != head) { 133 if (ON_HASH(drq) && drq->hash.prev != head) {
134 list_del(&drq->hash); 134 list_del(&drq->hash);
135 list_add(&drq->hash, head); 135 list_add(&drq->hash, head);
136 } 136 }
137 } 137 }
138 138
139 static struct request * 139 static struct request *
140 deadline_find_drq_hash(struct deadline_data *dd, sector_t offset) 140 deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
141 { 141 {
142 struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)]; 142 struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
143 struct list_head *entry, *next = hash_list->next; 143 struct list_head *entry, *next = hash_list->next;
144 144
145 while ((entry = next) != hash_list) { 145 while ((entry = next) != hash_list) {
146 struct deadline_rq *drq = list_entry_hash(entry); 146 struct deadline_rq *drq = list_entry_hash(entry);
147 struct request *__rq = drq->request; 147 struct request *__rq = drq->request;
148 148
149 next = entry->next; 149 next = entry->next;
150 150
151 BUG_ON(!ON_HASH(drq)); 151 BUG_ON(!ON_HASH(drq));
152 152
153 if (!rq_mergeable(__rq)) { 153 if (!rq_mergeable(__rq)) {
154 __deadline_del_drq_hash(drq); 154 __deadline_del_drq_hash(drq);
155 continue; 155 continue;
156 } 156 }
157 157
158 if (rq_hash_key(__rq) == offset) 158 if (rq_hash_key(__rq) == offset)
159 return __rq; 159 return __rq;
160 } 160 }
161 161
162 return NULL; 162 return NULL;
163 } 163 }
164 164
165 /* 165 /*
166 * rb tree support functions 166 * rb tree support functions
167 */ 167 */
168 #define RB_NONE (2) 168 #define RB_NONE (2)
169 #define RB_EMPTY(root) ((root)->rb_node == NULL) 169 #define RB_EMPTY(root) ((root)->rb_node == NULL)
170 #define ON_RB(node) ((node)->rb_color != RB_NONE) 170 #define ON_RB(node) ((node)->rb_color != RB_NONE)
171 #define RB_CLEAR(node) ((node)->rb_color = RB_NONE) 171 #define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
172 #define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node) 172 #define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node)
173 #define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)]) 173 #define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)])
174 #define rq_rb_key(rq) (rq)->sector 174 #define rq_rb_key(rq) (rq)->sector
175 175
176 static struct deadline_rq * 176 static struct deadline_rq *
177 __deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 177 __deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
178 { 178 {
179 struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node; 179 struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
180 struct rb_node *parent = NULL; 180 struct rb_node *parent = NULL;
181 struct deadline_rq *__drq; 181 struct deadline_rq *__drq;
182 182
183 while (*p) { 183 while (*p) {
184 parent = *p; 184 parent = *p;
185 __drq = rb_entry_drq(parent); 185 __drq = rb_entry_drq(parent);
186 186
187 if (drq->rb_key < __drq->rb_key) 187 if (drq->rb_key < __drq->rb_key)
188 p = &(*p)->rb_left; 188 p = &(*p)->rb_left;
189 else if (drq->rb_key > __drq->rb_key) 189 else if (drq->rb_key > __drq->rb_key)
190 p = &(*p)->rb_right; 190 p = &(*p)->rb_right;
191 else 191 else
192 return __drq; 192 return __drq;
193 } 193 }
194 194
195 rb_link_node(&drq->rb_node, parent, p); 195 rb_link_node(&drq->rb_node, parent, p);
196 return NULL; 196 return NULL;
197 } 197 }
198 198
199 static void 199 static void
200 deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 200 deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
201 { 201 {
202 struct deadline_rq *__alias; 202 struct deadline_rq *__alias;
203 203
204 drq->rb_key = rq_rb_key(drq->request); 204 drq->rb_key = rq_rb_key(drq->request);
205 205
206 retry: 206 retry:
207 __alias = __deadline_add_drq_rb(dd, drq); 207 __alias = __deadline_add_drq_rb(dd, drq);
208 if (!__alias) { 208 if (!__alias) {
209 rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); 209 rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
210 return; 210 return;
211 } 211 }
212 212
213 deadline_move_request(dd, __alias); 213 deadline_move_request(dd, __alias);
214 goto retry; 214 goto retry;
215 } 215 }
216 216
217 static inline void 217 static inline void
218 deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 218 deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
219 { 219 {
220 const int data_dir = rq_data_dir(drq->request); 220 const int data_dir = rq_data_dir(drq->request);
221 221
222 if (dd->next_drq[data_dir] == drq) { 222 if (dd->next_drq[data_dir] == drq) {
223 struct rb_node *rbnext = rb_next(&drq->rb_node); 223 struct rb_node *rbnext = rb_next(&drq->rb_node);
224 224
225 dd->next_drq[data_dir] = NULL; 225 dd->next_drq[data_dir] = NULL;
226 if (rbnext) 226 if (rbnext)
227 dd->next_drq[data_dir] = rb_entry_drq(rbnext); 227 dd->next_drq[data_dir] = rb_entry_drq(rbnext);
228 } 228 }
229 229
230 BUG_ON(!ON_RB(&drq->rb_node)); 230 BUG_ON(!ON_RB(&drq->rb_node));
231 rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); 231 rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
232 RB_CLEAR(&drq->rb_node); 232 RB_CLEAR(&drq->rb_node);
233 } 233 }
234 234
235 static struct request * 235 static struct request *
236 deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir) 236 deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
237 { 237 {
238 struct rb_node *n = dd->sort_list[data_dir].rb_node; 238 struct rb_node *n = dd->sort_list[data_dir].rb_node;
239 struct deadline_rq *drq; 239 struct deadline_rq *drq;
240 240
241 while (n) { 241 while (n) {
242 drq = rb_entry_drq(n); 242 drq = rb_entry_drq(n);
243 243
244 if (sector < drq->rb_key) 244 if (sector < drq->rb_key)
245 n = n->rb_left; 245 n = n->rb_left;
246 else if (sector > drq->rb_key) 246 else if (sector > drq->rb_key)
247 n = n->rb_right; 247 n = n->rb_right;
248 else 248 else
249 return drq->request; 249 return drq->request;
250 } 250 }
251 251
252 return NULL; 252 return NULL;
253 } 253 }
254 254
255 /* 255 /*
256 * deadline_find_first_drq finds the first (lowest sector numbered) request 256 * deadline_find_first_drq finds the first (lowest sector numbered) request
257 * for the specified data_dir. Used to sweep back to the start of the disk 257 * for the specified data_dir. Used to sweep back to the start of the disk
258 * (1-way elevator) after we process the last (highest sector) request. 258 * (1-way elevator) after we process the last (highest sector) request.
259 */ 259 */
260 static struct deadline_rq * 260 static struct deadline_rq *
261 deadline_find_first_drq(struct deadline_data *dd, int data_dir) 261 deadline_find_first_drq(struct deadline_data *dd, int data_dir)
262 { 262 {
263 struct rb_node *n = dd->sort_list[data_dir].rb_node; 263 struct rb_node *n = dd->sort_list[data_dir].rb_node;
264 264
265 for (;;) { 265 for (;;) {
266 if (n->rb_left == NULL) 266 if (n->rb_left == NULL)
267 return rb_entry_drq(n); 267 return rb_entry_drq(n);
268 268
269 n = n->rb_left; 269 n = n->rb_left;
270 } 270 }
271 } 271 }
272 272
273 /* 273 /*
274 * add drq to rbtree and fifo 274 * add drq to rbtree and fifo
275 */ 275 */
276 static void 276 static void
277 deadline_add_request(struct request_queue *q, struct request *rq) 277 deadline_add_request(struct request_queue *q, struct request *rq)
278 { 278 {
279 struct deadline_data *dd = q->elevator->elevator_data; 279 struct deadline_data *dd = q->elevator->elevator_data;
280 struct deadline_rq *drq = RQ_DATA(rq); 280 struct deadline_rq *drq = RQ_DATA(rq);
281 281
282 const int data_dir = rq_data_dir(drq->request); 282 const int data_dir = rq_data_dir(drq->request);
283 283
284 deadline_add_drq_rb(dd, drq); 284 deadline_add_drq_rb(dd, drq);
285 /* 285 /*
286 * set expire time (only used for reads) and add to fifo list 286 * set expire time (only used for reads) and add to fifo list
287 */ 287 */
288 drq->expires = jiffies + dd->fifo_expire[data_dir]; 288 drq->expires = jiffies + dd->fifo_expire[data_dir];
289 list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]); 289 list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
290 290
291 if (rq_mergeable(rq)) 291 if (rq_mergeable(rq))
292 deadline_add_drq_hash(dd, drq); 292 deadline_add_drq_hash(dd, drq);
293 } 293 }
294 294
295 /* 295 /*
296 * remove rq from rbtree, fifo, and hash 296 * remove rq from rbtree, fifo, and hash
297 */ 297 */
298 static void deadline_remove_request(request_queue_t *q, struct request *rq) 298 static void deadline_remove_request(request_queue_t *q, struct request *rq)
299 { 299 {
300 struct deadline_rq *drq = RQ_DATA(rq); 300 struct deadline_rq *drq = RQ_DATA(rq);
301 struct deadline_data *dd = q->elevator->elevator_data; 301 struct deadline_data *dd = q->elevator->elevator_data;
302 302
303 list_del_init(&drq->fifo); 303 list_del_init(&drq->fifo);
304 deadline_del_drq_rb(dd, drq); 304 deadline_del_drq_rb(dd, drq);
305 deadline_del_drq_hash(drq); 305 deadline_del_drq_hash(drq);
306 } 306 }
307 307
308 static int 308 static int
309 deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) 309 deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
310 { 310 {
311 struct deadline_data *dd = q->elevator->elevator_data; 311 struct deadline_data *dd = q->elevator->elevator_data;
312 struct request *__rq; 312 struct request *__rq;
313 int ret; 313 int ret;
314 314
315 /* 315 /*
316 * see if the merge hash can satisfy a back merge 316 * see if the merge hash can satisfy a back merge
317 */ 317 */
318 __rq = deadline_find_drq_hash(dd, bio->bi_sector); 318 __rq = deadline_find_drq_hash(dd, bio->bi_sector);
319 if (__rq) { 319 if (__rq) {
320 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); 320 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
321 321
322 if (elv_rq_merge_ok(__rq, bio)) { 322 if (elv_rq_merge_ok(__rq, bio)) {
323 ret = ELEVATOR_BACK_MERGE; 323 ret = ELEVATOR_BACK_MERGE;
324 goto out; 324 goto out;
325 } 325 }
326 } 326 }
327 327
328 /* 328 /*
329 * check for front merge 329 * check for front merge
330 */ 330 */
331 if (dd->front_merges) { 331 if (dd->front_merges) {
332 sector_t rb_key = bio->bi_sector + bio_sectors(bio); 332 sector_t rb_key = bio->bi_sector + bio_sectors(bio);
333 333
334 __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio)); 334 __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
335 if (__rq) { 335 if (__rq) {
336 BUG_ON(rb_key != rq_rb_key(__rq)); 336 BUG_ON(rb_key != rq_rb_key(__rq));
337 337
338 if (elv_rq_merge_ok(__rq, bio)) { 338 if (elv_rq_merge_ok(__rq, bio)) {
339 ret = ELEVATOR_FRONT_MERGE; 339 ret = ELEVATOR_FRONT_MERGE;
340 goto out; 340 goto out;
341 } 341 }
342 } 342 }
343 } 343 }
344 344
345 return ELEVATOR_NO_MERGE; 345 return ELEVATOR_NO_MERGE;
346 out: 346 out:
347 if (ret) 347 if (ret)
348 deadline_hot_drq_hash(dd, RQ_DATA(__rq)); 348 deadline_hot_drq_hash(dd, RQ_DATA(__rq));
349 *req = __rq; 349 *req = __rq;
350 return ret; 350 return ret;
351 } 351 }
352 352
353 static void deadline_merged_request(request_queue_t *q, struct request *req) 353 static void deadline_merged_request(request_queue_t *q, struct request *req)
354 { 354 {
355 struct deadline_data *dd = q->elevator->elevator_data; 355 struct deadline_data *dd = q->elevator->elevator_data;
356 struct deadline_rq *drq = RQ_DATA(req); 356 struct deadline_rq *drq = RQ_DATA(req);
357 357
358 /* 358 /*
359 * hash always needs to be repositioned, key is end sector 359 * hash always needs to be repositioned, key is end sector
360 */ 360 */
361 deadline_del_drq_hash(drq); 361 deadline_del_drq_hash(drq);
362 deadline_add_drq_hash(dd, drq); 362 deadline_add_drq_hash(dd, drq);
363 363
364 /* 364 /*
365 * if the merge was a front merge, we need to reposition request 365 * if the merge was a front merge, we need to reposition request
366 */ 366 */
367 if (rq_rb_key(req) != drq->rb_key) { 367 if (rq_rb_key(req) != drq->rb_key) {
368 deadline_del_drq_rb(dd, drq); 368 deadline_del_drq_rb(dd, drq);
369 deadline_add_drq_rb(dd, drq); 369 deadline_add_drq_rb(dd, drq);
370 } 370 }
371 } 371 }
372 372
373 static void 373 static void
374 deadline_merged_requests(request_queue_t *q, struct request *req, 374 deadline_merged_requests(request_queue_t *q, struct request *req,
375 struct request *next) 375 struct request *next)
376 { 376 {
377 struct deadline_data *dd = q->elevator->elevator_data; 377 struct deadline_data *dd = q->elevator->elevator_data;
378 struct deadline_rq *drq = RQ_DATA(req); 378 struct deadline_rq *drq = RQ_DATA(req);
379 struct deadline_rq *dnext = RQ_DATA(next); 379 struct deadline_rq *dnext = RQ_DATA(next);
380 380
381 BUG_ON(!drq); 381 BUG_ON(!drq);
382 BUG_ON(!dnext); 382 BUG_ON(!dnext);
383 383
384 /* 384 /*
385 * reposition drq (this is the merged request) in hash, and in rbtree 385 * reposition drq (this is the merged request) in hash, and in rbtree
386 * in case of a front merge 386 * in case of a front merge
387 */ 387 */
388 deadline_del_drq_hash(drq); 388 deadline_del_drq_hash(drq);
389 deadline_add_drq_hash(dd, drq); 389 deadline_add_drq_hash(dd, drq);
390 390
391 if (rq_rb_key(req) != drq->rb_key) { 391 if (rq_rb_key(req) != drq->rb_key) {
392 deadline_del_drq_rb(dd, drq); 392 deadline_del_drq_rb(dd, drq);
393 deadline_add_drq_rb(dd, drq); 393 deadline_add_drq_rb(dd, drq);
394 } 394 }
395 395
396 /* 396 /*
397 * if dnext expires before drq, assign its expire time to drq 397 * if dnext expires before drq, assign its expire time to drq
398 * and move into dnext position (dnext will be deleted) in fifo 398 * and move into dnext position (dnext will be deleted) in fifo
399 */ 399 */
400 if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) { 400 if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
401 if (time_before(dnext->expires, drq->expires)) { 401 if (time_before(dnext->expires, drq->expires)) {
402 list_move(&drq->fifo, &dnext->fifo); 402 list_move(&drq->fifo, &dnext->fifo);
403 drq->expires = dnext->expires; 403 drq->expires = dnext->expires;
404 } 404 }
405 } 405 }
406 406
407 /* 407 /*
408 * kill knowledge of next, this one is a goner 408 * kill knowledge of next, this one is a goner
409 */ 409 */
410 deadline_remove_request(q, next); 410 deadline_remove_request(q, next);
411 } 411 }
412 412
413 /* 413 /*
414 * move request from sort list to dispatch queue. 414 * move request from sort list to dispatch queue.
415 */ 415 */
416 static inline void 416 static inline void
417 deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq) 417 deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
418 { 418 {
419 request_queue_t *q = drq->request->q; 419 request_queue_t *q = drq->request->q;
420 420
421 deadline_remove_request(q, drq->request); 421 deadline_remove_request(q, drq->request);
422 elv_dispatch_add_tail(q, drq->request); 422 elv_dispatch_add_tail(q, drq->request);
423 } 423 }
424 424
425 /* 425 /*
426 * move an entry to dispatch queue 426 * move an entry to dispatch queue
427 */ 427 */
428 static void 428 static void
429 deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq) 429 deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq)
430 { 430 {
431 const int data_dir = rq_data_dir(drq->request); 431 const int data_dir = rq_data_dir(drq->request);
432 struct rb_node *rbnext = rb_next(&drq->rb_node); 432 struct rb_node *rbnext = rb_next(&drq->rb_node);
433 433
434 dd->next_drq[READ] = NULL; 434 dd->next_drq[READ] = NULL;
435 dd->next_drq[WRITE] = NULL; 435 dd->next_drq[WRITE] = NULL;
436 436
437 if (rbnext) 437 if (rbnext)
438 dd->next_drq[data_dir] = rb_entry_drq(rbnext); 438 dd->next_drq[data_dir] = rb_entry_drq(rbnext);
439 439
440 dd->last_sector = drq->request->sector + drq->request->nr_sectors; 440 dd->last_sector = drq->request->sector + drq->request->nr_sectors;
441 441
442 /* 442 /*
443 * take it off the sort and fifo list, move 443 * take it off the sort and fifo list, move
444 * to dispatch queue 444 * to dispatch queue
445 */ 445 */
446 deadline_move_to_dispatch(dd, drq); 446 deadline_move_to_dispatch(dd, drq);
447 } 447 }
448 448
449 #define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo) 449 #define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
450 450
451 /* 451 /*
452 * deadline_check_fifo returns 0 if there are no expired reads on the fifo, 452 * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
453 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) 453 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
454 */ 454 */
455 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) 455 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
456 { 456 {
457 struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next); 457 struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
458 458
459 /* 459 /*
460 * drq is expired! 460 * drq is expired!
461 */ 461 */
462 if (time_after(jiffies, drq->expires)) 462 if (time_after(jiffies, drq->expires))
463 return 1; 463 return 1;
464 464
465 return 0; 465 return 0;
466 } 466 }
467 467
468 /* 468 /*
469 * deadline_dispatch_requests selects the best request according to 469 * deadline_dispatch_requests selects the best request according to
470 * read/write expire, fifo_batch, etc 470 * read/write expire, fifo_batch, etc
471 */ 471 */
472 static int deadline_dispatch_requests(request_queue_t *q, int force) 472 static int deadline_dispatch_requests(request_queue_t *q, int force)
473 { 473 {
474 struct deadline_data *dd = q->elevator->elevator_data; 474 struct deadline_data *dd = q->elevator->elevator_data;
475 const int reads = !list_empty(&dd->fifo_list[READ]); 475 const int reads = !list_empty(&dd->fifo_list[READ]);
476 const int writes = !list_empty(&dd->fifo_list[WRITE]); 476 const int writes = !list_empty(&dd->fifo_list[WRITE]);
477 struct deadline_rq *drq; 477 struct deadline_rq *drq;
478 int data_dir; 478 int data_dir;
479 479
480 /* 480 /*
481 * batches are currently reads XOR writes 481 * batches are currently reads XOR writes
482 */ 482 */
483 if (dd->next_drq[WRITE]) 483 if (dd->next_drq[WRITE])
484 drq = dd->next_drq[WRITE]; 484 drq = dd->next_drq[WRITE];
485 else 485 else
486 drq = dd->next_drq[READ]; 486 drq = dd->next_drq[READ];
487 487
488 if (drq) { 488 if (drq) {
489 /* we have a "next request" */ 489 /* we have a "next request" */
490 490
491 if (dd->last_sector != drq->request->sector) 491 if (dd->last_sector != drq->request->sector)
492 /* end the batch on a non sequential request */ 492 /* end the batch on a non sequential request */
493 dd->batching += dd->fifo_batch; 493 dd->batching += dd->fifo_batch;
494 494
495 if (dd->batching < dd->fifo_batch) 495 if (dd->batching < dd->fifo_batch)
496 /* we are still entitled to batch */ 496 /* we are still entitled to batch */
497 goto dispatch_request; 497 goto dispatch_request;
498 } 498 }
499 499
500 /* 500 /*
501 * at this point we are not running a batch. select the appropriate 501 * at this point we are not running a batch. select the appropriate
502 * data direction (read / write) 502 * data direction (read / write)
503 */ 503 */
504 504
505 if (reads) { 505 if (reads) {
506 BUG_ON(RB_EMPTY(&dd->sort_list[READ])); 506 BUG_ON(RB_EMPTY(&dd->sort_list[READ]));
507 507
508 if (writes && (dd->starved++ >= dd->writes_starved)) 508 if (writes && (dd->starved++ >= dd->writes_starved))
509 goto dispatch_writes; 509 goto dispatch_writes;
510 510
511 data_dir = READ; 511 data_dir = READ;
512 512
513 goto dispatch_find_request; 513 goto dispatch_find_request;
514 } 514 }
515 515
516 /* 516 /*
517 * there are either no reads or writes have been starved 517 * there are either no reads or writes have been starved
518 */ 518 */
519 519
520 if (writes) { 520 if (writes) {
521 dispatch_writes: 521 dispatch_writes:
522 BUG_ON(RB_EMPTY(&dd->sort_list[WRITE])); 522 BUG_ON(RB_EMPTY(&dd->sort_list[WRITE]));
523 523
524 dd->starved = 0; 524 dd->starved = 0;
525 525
526 data_dir = WRITE; 526 data_dir = WRITE;
527 527
528 goto dispatch_find_request; 528 goto dispatch_find_request;
529 } 529 }
530 530
531 return 0; 531 return 0;
532 532
533 dispatch_find_request: 533 dispatch_find_request:
534 /* 534 /*
535 * we are not running a batch, find best request for selected data_dir 535 * we are not running a batch, find best request for selected data_dir
536 */ 536 */
537 if (deadline_check_fifo(dd, data_dir)) { 537 if (deadline_check_fifo(dd, data_dir)) {
538 /* An expired request exists - satisfy it */ 538 /* An expired request exists - satisfy it */
539 dd->batching = 0; 539 dd->batching = 0;
540 drq = list_entry_fifo(dd->fifo_list[data_dir].next); 540 drq = list_entry_fifo(dd->fifo_list[data_dir].next);
541 541
542 } else if (dd->next_drq[data_dir]) { 542 } else if (dd->next_drq[data_dir]) {
543 /* 543 /*
544 * The last req was the same dir and we have a next request in 544 * The last req was the same dir and we have a next request in
545 * sort order. No expired requests so continue on from here. 545 * sort order. No expired requests so continue on from here.
546 */ 546 */
547 drq = dd->next_drq[data_dir]; 547 drq = dd->next_drq[data_dir];
548 } else { 548 } else {
549 /* 549 /*
550 * The last req was the other direction or we have run out of 550 * The last req was the other direction or we have run out of
551 * higher-sectored requests. Go back to the lowest sectored 551 * higher-sectored requests. Go back to the lowest sectored
552 * request (1 way elevator) and start a new batch. 552 * request (1 way elevator) and start a new batch.
553 */ 553 */
554 dd->batching = 0; 554 dd->batching = 0;
555 drq = deadline_find_first_drq(dd, data_dir); 555 drq = deadline_find_first_drq(dd, data_dir);
556 } 556 }
557 557
558 dispatch_request: 558 dispatch_request:
559 /* 559 /*
560 * drq is the selected appropriate request. 560 * drq is the selected appropriate request.
561 */ 561 */
562 dd->batching++; 562 dd->batching++;
563 deadline_move_request(dd, drq); 563 deadline_move_request(dd, drq);
564 564
565 return 1; 565 return 1;
566 } 566 }
567 567
568 static int deadline_queue_empty(request_queue_t *q) 568 static int deadline_queue_empty(request_queue_t *q)
569 { 569 {
570 struct deadline_data *dd = q->elevator->elevator_data; 570 struct deadline_data *dd = q->elevator->elevator_data;
571 571
572 return list_empty(&dd->fifo_list[WRITE]) 572 return list_empty(&dd->fifo_list[WRITE])
573 && list_empty(&dd->fifo_list[READ]); 573 && list_empty(&dd->fifo_list[READ]);
574 } 574 }
575 575
576 static struct request * 576 static struct request *
577 deadline_former_request(request_queue_t *q, struct request *rq) 577 deadline_former_request(request_queue_t *q, struct request *rq)
578 { 578 {
579 struct deadline_rq *drq = RQ_DATA(rq); 579 struct deadline_rq *drq = RQ_DATA(rq);
580 struct rb_node *rbprev = rb_prev(&drq->rb_node); 580 struct rb_node *rbprev = rb_prev(&drq->rb_node);
581 581
582 if (rbprev) 582 if (rbprev)
583 return rb_entry_drq(rbprev)->request; 583 return rb_entry_drq(rbprev)->request;
584 584
585 return NULL; 585 return NULL;
586 } 586 }
587 587
588 static struct request * 588 static struct request *
589 deadline_latter_request(request_queue_t *q, struct request *rq) 589 deadline_latter_request(request_queue_t *q, struct request *rq)
590 { 590 {
591 struct deadline_rq *drq = RQ_DATA(rq); 591 struct deadline_rq *drq = RQ_DATA(rq);
592 struct rb_node *rbnext = rb_next(&drq->rb_node); 592 struct rb_node *rbnext = rb_next(&drq->rb_node);
593 593
594 if (rbnext) 594 if (rbnext)
595 return rb_entry_drq(rbnext)->request; 595 return rb_entry_drq(rbnext)->request;
596 596
597 return NULL; 597 return NULL;
598 } 598 }
599 599
600 static void deadline_exit_queue(elevator_t *e) 600 static void deadline_exit_queue(elevator_t *e)
601 { 601 {
602 struct deadline_data *dd = e->elevator_data; 602 struct deadline_data *dd = e->elevator_data;
603 603
604 BUG_ON(!list_empty(&dd->fifo_list[READ])); 604 BUG_ON(!list_empty(&dd->fifo_list[READ]));
605 BUG_ON(!list_empty(&dd->fifo_list[WRITE])); 605 BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
606 606
607 mempool_destroy(dd->drq_pool); 607 mempool_destroy(dd->drq_pool);
608 kfree(dd->hash); 608 kfree(dd->hash);
609 kfree(dd); 609 kfree(dd);
610 } 610 }
611 611
612 /* 612 /*
613 * initialize elevator private data (deadline_data), and alloc a drq for 613 * initialize elevator private data (deadline_data), and alloc a drq for
614 * each request on the free lists 614 * each request on the free lists
615 */ 615 */
616 static int deadline_init_queue(request_queue_t *q, elevator_t *e) 616 static int deadline_init_queue(request_queue_t *q, elevator_t *e)
617 { 617 {
618 struct deadline_data *dd; 618 struct deadline_data *dd;
619 int i; 619 int i;
620 620
621 if (!drq_pool) 621 if (!drq_pool)
622 return -ENOMEM; 622 return -ENOMEM;
623 623
624 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); 624 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
625 if (!dd) 625 if (!dd)
626 return -ENOMEM; 626 return -ENOMEM;
627 memset(dd, 0, sizeof(*dd)); 627 memset(dd, 0, sizeof(*dd));
628 628
629 dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES, 629 dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES,
630 GFP_KERNEL, q->node); 630 GFP_KERNEL, q->node);
631 if (!dd->hash) { 631 if (!dd->hash) {
632 kfree(dd); 632 kfree(dd);
633 return -ENOMEM; 633 return -ENOMEM;
634 } 634 }
635 635
636 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 636 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
637 mempool_free_slab, drq_pool, q->node); 637 mempool_free_slab, drq_pool, q->node);
638 if (!dd->drq_pool) { 638 if (!dd->drq_pool) {
639 kfree(dd->hash); 639 kfree(dd->hash);
640 kfree(dd); 640 kfree(dd);
641 return -ENOMEM; 641 return -ENOMEM;
642 } 642 }
643 643
644 for (i = 0; i < DL_HASH_ENTRIES; i++) 644 for (i = 0; i < DL_HASH_ENTRIES; i++)
645 INIT_LIST_HEAD(&dd->hash[i]); 645 INIT_LIST_HEAD(&dd->hash[i]);
646 646
647 INIT_LIST_HEAD(&dd->fifo_list[READ]); 647 INIT_LIST_HEAD(&dd->fifo_list[READ]);
648 INIT_LIST_HEAD(&dd->fifo_list[WRITE]); 648 INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
649 dd->sort_list[READ] = RB_ROOT; 649 dd->sort_list[READ] = RB_ROOT;
650 dd->sort_list[WRITE] = RB_ROOT; 650 dd->sort_list[WRITE] = RB_ROOT;
651 dd->fifo_expire[READ] = read_expire; 651 dd->fifo_expire[READ] = read_expire;
652 dd->fifo_expire[WRITE] = write_expire; 652 dd->fifo_expire[WRITE] = write_expire;
653 dd->writes_starved = writes_starved; 653 dd->writes_starved = writes_starved;
654 dd->front_merges = 1; 654 dd->front_merges = 1;
655 dd->fifo_batch = fifo_batch; 655 dd->fifo_batch = fifo_batch;
656 e->elevator_data = dd; 656 e->elevator_data = dd;
657 return 0; 657 return 0;
658 } 658 }
659 659
660 static void deadline_put_request(request_queue_t *q, struct request *rq) 660 static void deadline_put_request(request_queue_t *q, struct request *rq)
661 { 661 {
662 struct deadline_data *dd = q->elevator->elevator_data; 662 struct deadline_data *dd = q->elevator->elevator_data;
663 struct deadline_rq *drq = RQ_DATA(rq); 663 struct deadline_rq *drq = RQ_DATA(rq);
664 664
665 mempool_free(drq, dd->drq_pool); 665 mempool_free(drq, dd->drq_pool);
666 rq->elevator_private = NULL; 666 rq->elevator_private = NULL;
667 } 667 }
668 668
669 static int 669 static int
670 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 670 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
671 gfp_t gfp_mask) 671 gfp_t gfp_mask)
672 { 672 {
673 struct deadline_data *dd = q->elevator->elevator_data; 673 struct deadline_data *dd = q->elevator->elevator_data;
674 struct deadline_rq *drq; 674 struct deadline_rq *drq;
675 675
676 drq = mempool_alloc(dd->drq_pool, gfp_mask); 676 drq = mempool_alloc(dd->drq_pool, gfp_mask);
677 if (drq) { 677 if (drq) {
678 memset(drq, 0, sizeof(*drq)); 678 memset(drq, 0, sizeof(*drq));
679 RB_CLEAR(&drq->rb_node); 679 RB_CLEAR(&drq->rb_node);
680 drq->request = rq; 680 drq->request = rq;
681 681
682 INIT_LIST_HEAD(&drq->hash); 682 INIT_LIST_HEAD(&drq->hash);
683 drq->on_hash = 0; 683 drq->on_hash = 0;
684 684
685 INIT_LIST_HEAD(&drq->fifo); 685 INIT_LIST_HEAD(&drq->fifo);
686 686
687 rq->elevator_private = drq; 687 rq->elevator_private = drq;
688 return 0; 688 return 0;
689 } 689 }
690 690
691 return 1; 691 return 1;
692 } 692 }
693 693
694 /* 694 /*
695 * sysfs parts below 695 * sysfs parts below
696 */ 696 */
697 struct deadline_fs_entry { 697 struct deadline_fs_entry {
698 struct attribute attr; 698 struct attribute attr;
699 ssize_t (*show)(struct deadline_data *, char *); 699 ssize_t (*show)(struct deadline_data *, char *);
700 ssize_t (*store)(struct deadline_data *, const char *, size_t); 700 ssize_t (*store)(struct deadline_data *, const char *, size_t);
701 }; 701 };
702 702
703 static ssize_t 703 static ssize_t
704 deadline_var_show(int var, char *page) 704 deadline_var_show(int var, char *page)
705 { 705 {
706 return sprintf(page, "%d\n", var); 706 return sprintf(page, "%d\n", var);
707 } 707 }
708 708
709 static ssize_t 709 static ssize_t
710 deadline_var_store(int *var, const char *page, size_t count) 710 deadline_var_store(int *var, const char *page, size_t count)
711 { 711 {
712 char *p = (char *) page; 712 char *p = (char *) page;
713 713
714 *var = simple_strtol(p, &p, 10); 714 *var = simple_strtol(p, &p, 10);
715 return count; 715 return count;
716 } 716 }
717 717
718 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 718 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
719 static ssize_t __FUNC(struct deadline_data *dd, char *page) \ 719 static ssize_t __FUNC(struct deadline_data *dd, char *page) \
720 { \ 720 { \
721 int __data = __VAR; \ 721 int __data = __VAR; \
722 if (__CONV) \ 722 if (__CONV) \
723 __data = jiffies_to_msecs(__data); \ 723 __data = jiffies_to_msecs(__data); \
724 return deadline_var_show(__data, (page)); \ 724 return deadline_var_show(__data, (page)); \
725 } 725 }
726 SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ], 1); 726 SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ], 1);
727 SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE], 1); 727 SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE], 1);
728 SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved, 0); 728 SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved, 0);
729 SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges, 0); 729 SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges, 0);
730 SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch, 0); 730 SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch, 0);
731 #undef SHOW_FUNCTION 731 #undef SHOW_FUNCTION
732 732
733 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 733 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
734 static ssize_t __FUNC(struct deadline_data *dd, const char *page, size_t count) \ 734 static ssize_t __FUNC(struct deadline_data *dd, const char *page, size_t count) \
735 { \ 735 { \
736 int __data; \ 736 int __data; \
737 int ret = deadline_var_store(&__data, (page), count); \ 737 int ret = deadline_var_store(&__data, (page), count); \
738 if (__data < (MIN)) \ 738 if (__data < (MIN)) \
739 __data = (MIN); \ 739 __data = (MIN); \
740 else if (__data > (MAX)) \ 740 else if (__data > (MAX)) \
741 __data = (MAX); \ 741 __data = (MAX); \
742 if (__CONV) \ 742 if (__CONV) \
743 *(__PTR) = msecs_to_jiffies(__data); \ 743 *(__PTR) = msecs_to_jiffies(__data); \
744 else \ 744 else \
745 *(__PTR) = __data; \ 745 *(__PTR) = __data; \
746 return ret; \ 746 return ret; \
747 } 747 }
748 STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); 748 STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
749 STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); 749 STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
750 STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); 750 STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
751 STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1, 0); 751 STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1, 0);
752 STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX, 0); 752 STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX, 0);
753 #undef STORE_FUNCTION 753 #undef STORE_FUNCTION
754 754
755 static struct deadline_fs_entry deadline_readexpire_entry = { 755 static struct deadline_fs_entry deadline_readexpire_entry = {
756 .attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR }, 756 .attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR },
757 .show = deadline_readexpire_show, 757 .show = deadline_readexpire_show,
758 .store = deadline_readexpire_store, 758 .store = deadline_readexpire_store,
759 }; 759 };
760 static struct deadline_fs_entry deadline_writeexpire_entry = { 760 static struct deadline_fs_entry deadline_writeexpire_entry = {
761 .attr = {.name = "write_expire", .mode = S_IRUGO | S_IWUSR }, 761 .attr = {.name = "write_expire", .mode = S_IRUGO | S_IWUSR },
762 .show = deadline_writeexpire_show, 762 .show = deadline_writeexpire_show,
763 .store = deadline_writeexpire_store, 763 .store = deadline_writeexpire_store,
764 }; 764 };
765 static struct deadline_fs_entry deadline_writesstarved_entry = { 765 static struct deadline_fs_entry deadline_writesstarved_entry = {
766 .attr = {.name = "writes_starved", .mode = S_IRUGO | S_IWUSR }, 766 .attr = {.name = "writes_starved", .mode = S_IRUGO | S_IWUSR },
767 .show = deadline_writesstarved_show, 767 .show = deadline_writesstarved_show,
768 .store = deadline_writesstarved_store, 768 .store = deadline_writesstarved_store,
769 }; 769 };
770 static struct deadline_fs_entry deadline_frontmerges_entry = { 770 static struct deadline_fs_entry deadline_frontmerges_entry = {
771 .attr = {.name = "front_merges", .mode = S_IRUGO | S_IWUSR }, 771 .attr = {.name = "front_merges", .mode = S_IRUGO | S_IWUSR },
772 .show = deadline_frontmerges_show, 772 .show = deadline_frontmerges_show,
773 .store = deadline_frontmerges_store, 773 .store = deadline_frontmerges_store,
774 }; 774 };
775 static struct deadline_fs_entry deadline_fifobatch_entry = { 775 static struct deadline_fs_entry deadline_fifobatch_entry = {
776 .attr = {.name = "fifo_batch", .mode = S_IRUGO | S_IWUSR }, 776 .attr = {.name = "fifo_batch", .mode = S_IRUGO | S_IWUSR },
777 .show = deadline_fifobatch_show, 777 .show = deadline_fifobatch_show,
778 .store = deadline_fifobatch_store, 778 .store = deadline_fifobatch_store,
779 }; 779 };
780 780
781 static struct attribute *default_attrs[] = { 781 static struct attribute *default_attrs[] = {
782 &deadline_readexpire_entry.attr, 782 &deadline_readexpire_entry.attr,
783 &deadline_writeexpire_entry.attr, 783 &deadline_writeexpire_entry.attr,
784 &deadline_writesstarved_entry.attr, 784 &deadline_writesstarved_entry.attr,
785 &deadline_frontmerges_entry.attr, 785 &deadline_frontmerges_entry.attr,
786 &deadline_fifobatch_entry.attr, 786 &deadline_fifobatch_entry.attr,
787 NULL, 787 NULL,
788 }; 788 };
789 789
790 #define to_deadline(atr) container_of((atr), struct deadline_fs_entry, attr) 790 #define to_deadline(atr) container_of((atr), struct deadline_fs_entry, attr)
791 791
792 static ssize_t 792 static ssize_t
793 deadline_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 793 deadline_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
794 { 794 {
795 elevator_t *e = container_of(kobj, elevator_t, kobj); 795 elevator_t *e = container_of(kobj, elevator_t, kobj);
796 struct deadline_fs_entry *entry = to_deadline(attr); 796 struct deadline_fs_entry *entry = to_deadline(attr);
797 797
798 if (!entry->show) 798 if (!entry->show)
799 return -EIO; 799 return -EIO;
800 800
801 return entry->show(e->elevator_data, page); 801 return entry->show(e->elevator_data, page);
802 } 802 }
803 803
804 static ssize_t 804 static ssize_t
805 deadline_attr_store(struct kobject *kobj, struct attribute *attr, 805 deadline_attr_store(struct kobject *kobj, struct attribute *attr,
806 const char *page, size_t length) 806 const char *page, size_t length)
807 { 807 {
808 elevator_t *e = container_of(kobj, elevator_t, kobj); 808 elevator_t *e = container_of(kobj, elevator_t, kobj);
809 struct deadline_fs_entry *entry = to_deadline(attr); 809 struct deadline_fs_entry *entry = to_deadline(attr);
810 810
811 if (!entry->store) 811 if (!entry->store)
812 return -EIO; 812 return -EIO;
813 813
814 return entry->store(e->elevator_data, page, length); 814 return entry->store(e->elevator_data, page, length);
815 } 815 }
816 816
817 static struct sysfs_ops deadline_sysfs_ops = { 817 static struct sysfs_ops deadline_sysfs_ops = {
818 .show = deadline_attr_show, 818 .show = deadline_attr_show,
819 .store = deadline_attr_store, 819 .store = deadline_attr_store,
820 }; 820 };
821 821
822 static struct kobj_type deadline_ktype = { 822 static struct kobj_type deadline_ktype = {
823 .sysfs_ops = &deadline_sysfs_ops, 823 .sysfs_ops = &deadline_sysfs_ops,
824 .default_attrs = default_attrs, 824 .default_attrs = default_attrs,
825 }; 825 };
826 826
827 static struct elevator_type iosched_deadline = { 827 static struct elevator_type iosched_deadline = {
828 .ops = { 828 .ops = {
829 .elevator_merge_fn = deadline_merge, 829 .elevator_merge_fn = deadline_merge,
830 .elevator_merged_fn = deadline_merged_request, 830 .elevator_merged_fn = deadline_merged_request,
831 .elevator_merge_req_fn = deadline_merged_requests, 831 .elevator_merge_req_fn = deadline_merged_requests,
832 .elevator_dispatch_fn = deadline_dispatch_requests, 832 .elevator_dispatch_fn = deadline_dispatch_requests,
833 .elevator_add_req_fn = deadline_add_request, 833 .elevator_add_req_fn = deadline_add_request,
834 .elevator_queue_empty_fn = deadline_queue_empty, 834 .elevator_queue_empty_fn = deadline_queue_empty,
835 .elevator_former_req_fn = deadline_former_request, 835 .elevator_former_req_fn = deadline_former_request,
836 .elevator_latter_req_fn = deadline_latter_request, 836 .elevator_latter_req_fn = deadline_latter_request,
837 .elevator_set_req_fn = deadline_set_request, 837 .elevator_set_req_fn = deadline_set_request,
838 .elevator_put_req_fn = deadline_put_request, 838 .elevator_put_req_fn = deadline_put_request,
839 .elevator_init_fn = deadline_init_queue, 839 .elevator_init_fn = deadline_init_queue,
840 .elevator_exit_fn = deadline_exit_queue, 840 .elevator_exit_fn = deadline_exit_queue,
841 }, 841 },
842 842
843 .elevator_ktype = &deadline_ktype, 843 .elevator_ktype = &deadline_ktype,
844 .elevator_name = "deadline", 844 .elevator_name = "deadline",
845 .elevator_owner = THIS_MODULE, 845 .elevator_owner = THIS_MODULE,
846 }; 846 };
847 847
848 static int __init deadline_init(void) 848 static int __init deadline_init(void)
849 { 849 {
850 int ret; 850 int ret;
851 851
852 drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq), 852 drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
853 0, 0, NULL, NULL); 853 0, 0, NULL, NULL);
854 854
855 if (!drq_pool) 855 if (!drq_pool)
856 return -ENOMEM; 856 return -ENOMEM;
857 857
858 ret = elv_register(&iosched_deadline); 858 ret = elv_register(&iosched_deadline);
859 if (ret) 859 if (ret)
860 kmem_cache_destroy(drq_pool); 860 kmem_cache_destroy(drq_pool);
861 861
862 return ret; 862 return ret;
863 } 863 }
864 864
865 static void __exit deadline_exit(void) 865 static void __exit deadline_exit(void)
866 { 866 {
867 kmem_cache_destroy(drq_pool); 867 kmem_cache_destroy(drq_pool);
868 elv_unregister(&iosched_deadline); 868 elv_unregister(&iosched_deadline);
869 } 869 }
870 870
871 module_init(deadline_init); 871 module_init(deadline_init);
872 module_exit(deadline_exit); 872 module_exit(deadline_exit);
873 873
874 MODULE_AUTHOR("Jens Axboe"); 874 MODULE_AUTHOR("Jens Axboe");
875 MODULE_LICENSE("GPL"); 875 MODULE_LICENSE("GPL");
876 MODULE_DESCRIPTION("deadline IO scheduler"); 876 MODULE_DESCRIPTION("deadline IO scheduler");
877 877
1 /* 1 /*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
8 */ 8 */
9 9
10 /* 10 /*
11 * This handles all read/write requests to block devices 11 * This handles all read/write requests to block devices
12 */ 12 */
13 #include <linux/config.h> 13 #include <linux/config.h>
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/module.h> 15 #include <linux/module.h>
16 #include <linux/backing-dev.h> 16 #include <linux/backing-dev.h>
17 #include <linux/bio.h> 17 #include <linux/bio.h>
18 #include <linux/blkdev.h> 18 #include <linux/blkdev.h>
19 #include <linux/highmem.h> 19 #include <linux/highmem.h>
20 #include <linux/mm.h> 20 #include <linux/mm.h>
21 #include <linux/kernel_stat.h> 21 #include <linux/kernel_stat.h>
22 #include <linux/string.h> 22 #include <linux/string.h>
23 #include <linux/init.h> 23 #include <linux/init.h>
24 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ 24 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
25 #include <linux/completion.h> 25 #include <linux/completion.h>
26 #include <linux/slab.h> 26 #include <linux/slab.h>
27 #include <linux/swap.h> 27 #include <linux/swap.h>
28 #include <linux/writeback.h> 28 #include <linux/writeback.h>
29 #include <linux/blkdev.h> 29 #include <linux/blkdev.h>
30 30
31 /* 31 /*
32 * for max sense size 32 * for max sense size
33 */ 33 */
34 #include <scsi/scsi_cmnd.h> 34 #include <scsi/scsi_cmnd.h>
35 35
36 static void blk_unplug_work(void *data); 36 static void blk_unplug_work(void *data);
37 static void blk_unplug_timeout(unsigned long data); 37 static void blk_unplug_timeout(unsigned long data);
38 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); 38 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
39 39
40 /* 40 /*
41 * For the allocated request tables 41 * For the allocated request tables
42 */ 42 */
43 static kmem_cache_t *request_cachep; 43 static kmem_cache_t *request_cachep;
44 44
45 /* 45 /*
46 * For queue allocation 46 * For queue allocation
47 */ 47 */
48 static kmem_cache_t *requestq_cachep; 48 static kmem_cache_t *requestq_cachep;
49 49
50 /* 50 /*
51 * For io context allocations 51 * For io context allocations
52 */ 52 */
53 static kmem_cache_t *iocontext_cachep; 53 static kmem_cache_t *iocontext_cachep;
54 54
55 static wait_queue_head_t congestion_wqh[2] = { 55 static wait_queue_head_t congestion_wqh[2] = {
56 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), 56 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
57 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) 57 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
58 }; 58 };
59 59
60 /* 60 /*
61 * Controlling structure to kblockd 61 * Controlling structure to kblockd
62 */ 62 */
63 static struct workqueue_struct *kblockd_workqueue; 63 static struct workqueue_struct *kblockd_workqueue;
64 64
65 unsigned long blk_max_low_pfn, blk_max_pfn; 65 unsigned long blk_max_low_pfn, blk_max_pfn;
66 66
67 EXPORT_SYMBOL(blk_max_low_pfn); 67 EXPORT_SYMBOL(blk_max_low_pfn);
68 EXPORT_SYMBOL(blk_max_pfn); 68 EXPORT_SYMBOL(blk_max_pfn);
69 69
70 /* Amount of time in which a process may batch requests */ 70 /* Amount of time in which a process may batch requests */
71 #define BLK_BATCH_TIME (HZ/50UL) 71 #define BLK_BATCH_TIME (HZ/50UL)
72 72
73 /* Number of requests a "batching" process may submit */ 73 /* Number of requests a "batching" process may submit */
74 #define BLK_BATCH_REQ 32 74 #define BLK_BATCH_REQ 32
75 75
76 /* 76 /*
77 * Return the threshold (number of used requests) at which the queue is 77 * Return the threshold (number of used requests) at which the queue is
78 * considered to be congested. It include a little hysteresis to keep the 78 * considered to be congested. It include a little hysteresis to keep the
79 * context switch rate down. 79 * context switch rate down.
80 */ 80 */
81 static inline int queue_congestion_on_threshold(struct request_queue *q) 81 static inline int queue_congestion_on_threshold(struct request_queue *q)
82 { 82 {
83 return q->nr_congestion_on; 83 return q->nr_congestion_on;
84 } 84 }
85 85
86 /* 86 /*
87 * The threshold at which a queue is considered to be uncongested 87 * The threshold at which a queue is considered to be uncongested
88 */ 88 */
89 static inline int queue_congestion_off_threshold(struct request_queue *q) 89 static inline int queue_congestion_off_threshold(struct request_queue *q)
90 { 90 {
91 return q->nr_congestion_off; 91 return q->nr_congestion_off;
92 } 92 }
93 93
94 static void blk_queue_congestion_threshold(struct request_queue *q) 94 static void blk_queue_congestion_threshold(struct request_queue *q)
95 { 95 {
96 int nr; 96 int nr;
97 97
98 nr = q->nr_requests - (q->nr_requests / 8) + 1; 98 nr = q->nr_requests - (q->nr_requests / 8) + 1;
99 if (nr > q->nr_requests) 99 if (nr > q->nr_requests)
100 nr = q->nr_requests; 100 nr = q->nr_requests;
101 q->nr_congestion_on = nr; 101 q->nr_congestion_on = nr;
102 102
103 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; 103 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
104 if (nr < 1) 104 if (nr < 1)
105 nr = 1; 105 nr = 1;
106 q->nr_congestion_off = nr; 106 q->nr_congestion_off = nr;
107 } 107 }
108 108
109 /* 109 /*
110 * A queue has just exitted congestion. Note this in the global counter of 110 * A queue has just exitted congestion. Note this in the global counter of
111 * congested queues, and wake up anyone who was waiting for requests to be 111 * congested queues, and wake up anyone who was waiting for requests to be
112 * put back. 112 * put back.
113 */ 113 */
114 static void clear_queue_congested(request_queue_t *q, int rw) 114 static void clear_queue_congested(request_queue_t *q, int rw)
115 { 115 {
116 enum bdi_state bit; 116 enum bdi_state bit;
117 wait_queue_head_t *wqh = &congestion_wqh[rw]; 117 wait_queue_head_t *wqh = &congestion_wqh[rw];
118 118
119 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; 119 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
120 clear_bit(bit, &q->backing_dev_info.state); 120 clear_bit(bit, &q->backing_dev_info.state);
121 smp_mb__after_clear_bit(); 121 smp_mb__after_clear_bit();
122 if (waitqueue_active(wqh)) 122 if (waitqueue_active(wqh))
123 wake_up(wqh); 123 wake_up(wqh);
124 } 124 }
125 125
126 /* 126 /*
127 * A queue has just entered congestion. Flag that in the queue's VM-visible 127 * A queue has just entered congestion. Flag that in the queue's VM-visible
128 * state flags and increment the global gounter of congested queues. 128 * state flags and increment the global gounter of congested queues.
129 */ 129 */
130 static void set_queue_congested(request_queue_t *q, int rw) 130 static void set_queue_congested(request_queue_t *q, int rw)
131 { 131 {
132 enum bdi_state bit; 132 enum bdi_state bit;
133 133
134 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; 134 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
135 set_bit(bit, &q->backing_dev_info.state); 135 set_bit(bit, &q->backing_dev_info.state);
136 } 136 }
137 137
138 /** 138 /**
139 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info 139 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
140 * @bdev: device 140 * @bdev: device
141 * 141 *
142 * Locates the passed device's request queue and returns the address of its 142 * Locates the passed device's request queue and returns the address of its
143 * backing_dev_info 143 * backing_dev_info
144 * 144 *
145 * Will return NULL if the request queue cannot be located. 145 * Will return NULL if the request queue cannot be located.
146 */ 146 */
147 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) 147 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
148 { 148 {
149 struct backing_dev_info *ret = NULL; 149 struct backing_dev_info *ret = NULL;
150 request_queue_t *q = bdev_get_queue(bdev); 150 request_queue_t *q = bdev_get_queue(bdev);
151 151
152 if (q) 152 if (q)
153 ret = &q->backing_dev_info; 153 ret = &q->backing_dev_info;
154 return ret; 154 return ret;
155 } 155 }
156 156
157 EXPORT_SYMBOL(blk_get_backing_dev_info); 157 EXPORT_SYMBOL(blk_get_backing_dev_info);
158 158
159 void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data) 159 void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
160 { 160 {
161 q->activity_fn = fn; 161 q->activity_fn = fn;
162 q->activity_data = data; 162 q->activity_data = data;
163 } 163 }
164 164
165 EXPORT_SYMBOL(blk_queue_activity_fn); 165 EXPORT_SYMBOL(blk_queue_activity_fn);
166 166
167 /** 167 /**
168 * blk_queue_prep_rq - set a prepare_request function for queue 168 * blk_queue_prep_rq - set a prepare_request function for queue
169 * @q: queue 169 * @q: queue
170 * @pfn: prepare_request function 170 * @pfn: prepare_request function
171 * 171 *
172 * It's possible for a queue to register a prepare_request callback which 172 * It's possible for a queue to register a prepare_request callback which
173 * is invoked before the request is handed to the request_fn. The goal of 173 * is invoked before the request is handed to the request_fn. The goal of
174 * the function is to prepare a request for I/O, it can be used to build a 174 * the function is to prepare a request for I/O, it can be used to build a
175 * cdb from the request data for instance. 175 * cdb from the request data for instance.
176 * 176 *
177 */ 177 */
178 void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn) 178 void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)
179 { 179 {
180 q->prep_rq_fn = pfn; 180 q->prep_rq_fn = pfn;
181 } 181 }
182 182
183 EXPORT_SYMBOL(blk_queue_prep_rq); 183 EXPORT_SYMBOL(blk_queue_prep_rq);
184 184
185 /** 185 /**
186 * blk_queue_merge_bvec - set a merge_bvec function for queue 186 * blk_queue_merge_bvec - set a merge_bvec function for queue
187 * @q: queue 187 * @q: queue
188 * @mbfn: merge_bvec_fn 188 * @mbfn: merge_bvec_fn
189 * 189 *
190 * Usually queues have static limitations on the max sectors or segments that 190 * Usually queues have static limitations on the max sectors or segments that
191 * we can put in a request. Stacking drivers may have some settings that 191 * we can put in a request. Stacking drivers may have some settings that
192 * are dynamic, and thus we have to query the queue whether it is ok to 192 * are dynamic, and thus we have to query the queue whether it is ok to
193 * add a new bio_vec to a bio at a given offset or not. If the block device 193 * add a new bio_vec to a bio at a given offset or not. If the block device
194 * has such limitations, it needs to register a merge_bvec_fn to control 194 * has such limitations, it needs to register a merge_bvec_fn to control
195 * the size of bio's sent to it. Note that a block device *must* allow a 195 * the size of bio's sent to it. Note that a block device *must* allow a
196 * single page to be added to an empty bio. The block device driver may want 196 * single page to be added to an empty bio. The block device driver may want
197 * to use the bio_split() function to deal with these bio's. By default 197 * to use the bio_split() function to deal with these bio's. By default
198 * no merge_bvec_fn is defined for a queue, and only the fixed limits are 198 * no merge_bvec_fn is defined for a queue, and only the fixed limits are
199 * honored. 199 * honored.
200 */ 200 */
201 void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) 201 void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
202 { 202 {
203 q->merge_bvec_fn = mbfn; 203 q->merge_bvec_fn = mbfn;
204 } 204 }
205 205
206 EXPORT_SYMBOL(blk_queue_merge_bvec); 206 EXPORT_SYMBOL(blk_queue_merge_bvec);
207 207
208 /** 208 /**
209 * blk_queue_make_request - define an alternate make_request function for a device 209 * blk_queue_make_request - define an alternate make_request function for a device
210 * @q: the request queue for the device to be affected 210 * @q: the request queue for the device to be affected
211 * @mfn: the alternate make_request function 211 * @mfn: the alternate make_request function
212 * 212 *
213 * Description: 213 * Description:
214 * The normal way for &struct bios to be passed to a device 214 * The normal way for &struct bios to be passed to a device
215 * driver is for them to be collected into requests on a request 215 * driver is for them to be collected into requests on a request
216 * queue, and then to allow the device driver to select requests 216 * queue, and then to allow the device driver to select requests
217 * off that queue when it is ready. This works well for many block 217 * off that queue when it is ready. This works well for many block
218 * devices. However some block devices (typically virtual devices 218 * devices. However some block devices (typically virtual devices
219 * such as md or lvm) do not benefit from the processing on the 219 * such as md or lvm) do not benefit from the processing on the
220 * request queue, and are served best by having the requests passed 220 * request queue, and are served best by having the requests passed
221 * directly to them. This can be achieved by providing a function 221 * directly to them. This can be achieved by providing a function
222 * to blk_queue_make_request(). 222 * to blk_queue_make_request().
223 * 223 *
224 * Caveat: 224 * Caveat:
225 * The driver that does this *must* be able to deal appropriately 225 * The driver that does this *must* be able to deal appropriately
226 * with buffers in "highmemory". This can be accomplished by either calling 226 * with buffers in "highmemory". This can be accomplished by either calling
227 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling 227 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
228 * blk_queue_bounce() to create a buffer in normal memory. 228 * blk_queue_bounce() to create a buffer in normal memory.
229 **/ 229 **/
230 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) 230 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
231 { 231 {
232 /* 232 /*
233 * set defaults 233 * set defaults
234 */ 234 */
235 q->nr_requests = BLKDEV_MAX_RQ; 235 q->nr_requests = BLKDEV_MAX_RQ;
236 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); 236 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
237 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); 237 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
238 q->make_request_fn = mfn; 238 q->make_request_fn = mfn;
239 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 239 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
240 q->backing_dev_info.state = 0; 240 q->backing_dev_info.state = 0;
241 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 241 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
242 blk_queue_max_sectors(q, SAFE_MAX_SECTORS); 242 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
243 blk_queue_hardsect_size(q, 512); 243 blk_queue_hardsect_size(q, 512);
244 blk_queue_dma_alignment(q, 511); 244 blk_queue_dma_alignment(q, 511);
245 blk_queue_congestion_threshold(q); 245 blk_queue_congestion_threshold(q);
246 q->nr_batching = BLK_BATCH_REQ; 246 q->nr_batching = BLK_BATCH_REQ;
247 247
248 q->unplug_thresh = 4; /* hmm */ 248 q->unplug_thresh = 4; /* hmm */
249 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ 249 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
250 if (q->unplug_delay == 0) 250 if (q->unplug_delay == 0)
251 q->unplug_delay = 1; 251 q->unplug_delay = 1;
252 252
253 INIT_WORK(&q->unplug_work, blk_unplug_work, q); 253 INIT_WORK(&q->unplug_work, blk_unplug_work, q);
254 254
255 q->unplug_timer.function = blk_unplug_timeout; 255 q->unplug_timer.function = blk_unplug_timeout;
256 q->unplug_timer.data = (unsigned long)q; 256 q->unplug_timer.data = (unsigned long)q;
257 257
258 /* 258 /*
259 * by default assume old behaviour and bounce for any highmem page 259 * by default assume old behaviour and bounce for any highmem page
260 */ 260 */
261 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 261 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
262 262
263 blk_queue_activity_fn(q, NULL, NULL); 263 blk_queue_activity_fn(q, NULL, NULL);
264 } 264 }
265 265
266 EXPORT_SYMBOL(blk_queue_make_request); 266 EXPORT_SYMBOL(blk_queue_make_request);
267 267
268 static inline void rq_init(request_queue_t *q, struct request *rq) 268 static inline void rq_init(request_queue_t *q, struct request *rq)
269 { 269 {
270 INIT_LIST_HEAD(&rq->queuelist); 270 INIT_LIST_HEAD(&rq->queuelist);
271 271
272 rq->errors = 0; 272 rq->errors = 0;
273 rq->rq_status = RQ_ACTIVE; 273 rq->rq_status = RQ_ACTIVE;
274 rq->bio = rq->biotail = NULL; 274 rq->bio = rq->biotail = NULL;
275 rq->ioprio = 0; 275 rq->ioprio = 0;
276 rq->buffer = NULL; 276 rq->buffer = NULL;
277 rq->ref_count = 1; 277 rq->ref_count = 1;
278 rq->q = q; 278 rq->q = q;
279 rq->waiting = NULL; 279 rq->waiting = NULL;
280 rq->special = NULL; 280 rq->special = NULL;
281 rq->data_len = 0; 281 rq->data_len = 0;
282 rq->data = NULL; 282 rq->data = NULL;
283 rq->nr_phys_segments = 0; 283 rq->nr_phys_segments = 0;
284 rq->sense = NULL; 284 rq->sense = NULL;
285 rq->end_io = NULL; 285 rq->end_io = NULL;
286 rq->end_io_data = NULL; 286 rq->end_io_data = NULL;
287 } 287 }
288 288
289 /** 289 /**
290 * blk_queue_ordered - does this queue support ordered writes 290 * blk_queue_ordered - does this queue support ordered writes
291 * @q: the request queue 291 * @q: the request queue
292 * @flag: see below 292 * @flag: see below
293 * 293 *
294 * Description: 294 * Description:
295 * For journalled file systems, doing ordered writes on a commit 295 * For journalled file systems, doing ordered writes on a commit
296 * block instead of explicitly doing wait_on_buffer (which is bad 296 * block instead of explicitly doing wait_on_buffer (which is bad
297 * for performance) can be a big win. Block drivers supporting this 297 * for performance) can be a big win. Block drivers supporting this
298 * feature should call this function and indicate so. 298 * feature should call this function and indicate so.
299 * 299 *
300 **/ 300 **/
301 void blk_queue_ordered(request_queue_t *q, int flag) 301 void blk_queue_ordered(request_queue_t *q, int flag)
302 { 302 {
303 switch (flag) { 303 switch (flag) {
304 case QUEUE_ORDERED_NONE: 304 case QUEUE_ORDERED_NONE:
305 if (q->flush_rq) 305 if (q->flush_rq)
306 kmem_cache_free(request_cachep, q->flush_rq); 306 kmem_cache_free(request_cachep, q->flush_rq);
307 q->flush_rq = NULL; 307 q->flush_rq = NULL;
308 q->ordered = flag; 308 q->ordered = flag;
309 break; 309 break;
310 case QUEUE_ORDERED_TAG: 310 case QUEUE_ORDERED_TAG:
311 q->ordered = flag; 311 q->ordered = flag;
312 break; 312 break;
313 case QUEUE_ORDERED_FLUSH: 313 case QUEUE_ORDERED_FLUSH:
314 q->ordered = flag; 314 q->ordered = flag;
315 if (!q->flush_rq) 315 if (!q->flush_rq)
316 q->flush_rq = kmem_cache_alloc(request_cachep, 316 q->flush_rq = kmem_cache_alloc(request_cachep,
317 GFP_KERNEL); 317 GFP_KERNEL);
318 break; 318 break;
319 default: 319 default:
320 printk("blk_queue_ordered: bad value %d\n", flag); 320 printk("blk_queue_ordered: bad value %d\n", flag);
321 break; 321 break;
322 } 322 }
323 } 323 }
324 324
325 EXPORT_SYMBOL(blk_queue_ordered); 325 EXPORT_SYMBOL(blk_queue_ordered);
326 326
327 /** 327 /**
328 * blk_queue_issue_flush_fn - set function for issuing a flush 328 * blk_queue_issue_flush_fn - set function for issuing a flush
329 * @q: the request queue 329 * @q: the request queue
330 * @iff: the function to be called issuing the flush 330 * @iff: the function to be called issuing the flush
331 * 331 *
332 * Description: 332 * Description:
333 * If a driver supports issuing a flush command, the support is notified 333 * If a driver supports issuing a flush command, the support is notified
334 * to the block layer by defining it through this call. 334 * to the block layer by defining it through this call.
335 * 335 *
336 **/ 336 **/
337 void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff) 337 void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
338 { 338 {
339 q->issue_flush_fn = iff; 339 q->issue_flush_fn = iff;
340 } 340 }
341 341
342 EXPORT_SYMBOL(blk_queue_issue_flush_fn); 342 EXPORT_SYMBOL(blk_queue_issue_flush_fn);
343 343
344 /* 344 /*
345 * Cache flushing for ordered writes handling 345 * Cache flushing for ordered writes handling
346 */ 346 */
347 static void blk_pre_flush_end_io(struct request *flush_rq) 347 static void blk_pre_flush_end_io(struct request *flush_rq)
348 { 348 {
349 struct request *rq = flush_rq->end_io_data; 349 struct request *rq = flush_rq->end_io_data;
350 request_queue_t *q = rq->q; 350 request_queue_t *q = rq->q;
351 351
352 elv_completed_request(q, flush_rq); 352 elv_completed_request(q, flush_rq);
353 353
354 rq->flags |= REQ_BAR_PREFLUSH; 354 rq->flags |= REQ_BAR_PREFLUSH;
355 355
356 if (!flush_rq->errors) 356 if (!flush_rq->errors)
357 elv_requeue_request(q, rq); 357 elv_requeue_request(q, rq);
358 else { 358 else {
359 q->end_flush_fn(q, flush_rq); 359 q->end_flush_fn(q, flush_rq);
360 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); 360 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
361 q->request_fn(q); 361 q->request_fn(q);
362 } 362 }
363 } 363 }
364 364
365 static void blk_post_flush_end_io(struct request *flush_rq) 365 static void blk_post_flush_end_io(struct request *flush_rq)
366 { 366 {
367 struct request *rq = flush_rq->end_io_data; 367 struct request *rq = flush_rq->end_io_data;
368 request_queue_t *q = rq->q; 368 request_queue_t *q = rq->q;
369 369
370 elv_completed_request(q, flush_rq); 370 elv_completed_request(q, flush_rq);
371 371
372 rq->flags |= REQ_BAR_POSTFLUSH; 372 rq->flags |= REQ_BAR_POSTFLUSH;
373 373
374 q->end_flush_fn(q, flush_rq); 374 q->end_flush_fn(q, flush_rq);
375 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); 375 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
376 q->request_fn(q); 376 q->request_fn(q);
377 } 377 }
378 378
379 struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) 379 struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
380 { 380 {
381 struct request *flush_rq = q->flush_rq; 381 struct request *flush_rq = q->flush_rq;
382 382
383 BUG_ON(!blk_barrier_rq(rq)); 383 BUG_ON(!blk_barrier_rq(rq));
384 384
385 if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) 385 if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))
386 return NULL; 386 return NULL;
387 387
388 rq_init(q, flush_rq); 388 rq_init(q, flush_rq);
389 flush_rq->elevator_private = NULL; 389 flush_rq->elevator_private = NULL;
390 flush_rq->flags = REQ_BAR_FLUSH; 390 flush_rq->flags = REQ_BAR_FLUSH;
391 flush_rq->rq_disk = rq->rq_disk; 391 flush_rq->rq_disk = rq->rq_disk;
392 flush_rq->rl = NULL; 392 flush_rq->rl = NULL;
393 393
394 /* 394 /*
395 * prepare_flush returns 0 if no flush is needed, just mark both 395 * prepare_flush returns 0 if no flush is needed, just mark both
396 * pre and post flush as done in that case 396 * pre and post flush as done in that case
397 */ 397 */
398 if (!q->prepare_flush_fn(q, flush_rq)) { 398 if (!q->prepare_flush_fn(q, flush_rq)) {
399 rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH; 399 rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
400 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); 400 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
401 return rq; 401 return rq;
402 } 402 }
403 403
404 /* 404 /*
405 * some drivers dequeue requests right away, some only after io 405 * some drivers dequeue requests right away, some only after io
406 * completion. make sure the request is dequeued. 406 * completion. make sure the request is dequeued.
407 */ 407 */
408 if (!list_empty(&rq->queuelist)) 408 if (!list_empty(&rq->queuelist))
409 blkdev_dequeue_request(rq); 409 blkdev_dequeue_request(rq);
410 410
411 flush_rq->end_io_data = rq; 411 flush_rq->end_io_data = rq;
412 flush_rq->end_io = blk_pre_flush_end_io; 412 flush_rq->end_io = blk_pre_flush_end_io;
413 413
414 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 414 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
415 return flush_rq; 415 return flush_rq;
416 } 416 }
417 417
418 static void blk_start_post_flush(request_queue_t *q, struct request *rq) 418 static void blk_start_post_flush(request_queue_t *q, struct request *rq)
419 { 419 {
420 struct request *flush_rq = q->flush_rq; 420 struct request *flush_rq = q->flush_rq;
421 421
422 BUG_ON(!blk_barrier_rq(rq)); 422 BUG_ON(!blk_barrier_rq(rq));
423 423
424 rq_init(q, flush_rq); 424 rq_init(q, flush_rq);
425 flush_rq->elevator_private = NULL; 425 flush_rq->elevator_private = NULL;
426 flush_rq->flags = REQ_BAR_FLUSH; 426 flush_rq->flags = REQ_BAR_FLUSH;
427 flush_rq->rq_disk = rq->rq_disk; 427 flush_rq->rq_disk = rq->rq_disk;
428 flush_rq->rl = NULL; 428 flush_rq->rl = NULL;
429 429
430 if (q->prepare_flush_fn(q, flush_rq)) { 430 if (q->prepare_flush_fn(q, flush_rq)) {
431 flush_rq->end_io_data = rq; 431 flush_rq->end_io_data = rq;
432 flush_rq->end_io = blk_post_flush_end_io; 432 flush_rq->end_io = blk_post_flush_end_io;
433 433
434 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 434 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
435 q->request_fn(q); 435 q->request_fn(q);
436 } 436 }
437 } 437 }
438 438
439 static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, 439 static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq,
440 int sectors) 440 int sectors)
441 { 441 {
442 if (sectors > rq->nr_sectors) 442 if (sectors > rq->nr_sectors)
443 sectors = rq->nr_sectors; 443 sectors = rq->nr_sectors;
444 444
445 rq->nr_sectors -= sectors; 445 rq->nr_sectors -= sectors;
446 return rq->nr_sectors; 446 return rq->nr_sectors;
447 } 447 }
448 448
449 static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, 449 static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq,
450 int sectors, int queue_locked) 450 int sectors, int queue_locked)
451 { 451 {
452 if (q->ordered != QUEUE_ORDERED_FLUSH) 452 if (q->ordered != QUEUE_ORDERED_FLUSH)
453 return 0; 453 return 0;
454 if (!blk_fs_request(rq) || !blk_barrier_rq(rq)) 454 if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
455 return 0; 455 return 0;
456 if (blk_barrier_postflush(rq)) 456 if (blk_barrier_postflush(rq))
457 return 0; 457 return 0;
458 458
459 if (!blk_check_end_barrier(q, rq, sectors)) { 459 if (!blk_check_end_barrier(q, rq, sectors)) {
460 unsigned long flags = 0; 460 unsigned long flags = 0;
461 461
462 if (!queue_locked) 462 if (!queue_locked)
463 spin_lock_irqsave(q->queue_lock, flags); 463 spin_lock_irqsave(q->queue_lock, flags);
464 464
465 blk_start_post_flush(q, rq); 465 blk_start_post_flush(q, rq);
466 466
467 if (!queue_locked) 467 if (!queue_locked)
468 spin_unlock_irqrestore(q->queue_lock, flags); 468 spin_unlock_irqrestore(q->queue_lock, flags);
469 } 469 }
470 470
471 return 1; 471 return 1;
472 } 472 }
473 473
474 /** 474 /**
475 * blk_complete_barrier_rq - complete possible barrier request 475 * blk_complete_barrier_rq - complete possible barrier request
476 * @q: the request queue for the device 476 * @q: the request queue for the device
477 * @rq: the request 477 * @rq: the request
478 * @sectors: number of sectors to complete 478 * @sectors: number of sectors to complete
479 * 479 *
480 * Description: 480 * Description:
481 * Used in driver end_io handling to determine whether to postpone 481 * Used in driver end_io handling to determine whether to postpone
482 * completion of a barrier request until a post flush has been done. This 482 * completion of a barrier request until a post flush has been done. This
483 * is the unlocked variant, used if the caller doesn't already hold the 483 * is the unlocked variant, used if the caller doesn't already hold the
484 * queue lock. 484 * queue lock.
485 **/ 485 **/
486 int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors) 486 int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
487 { 487 {
488 return __blk_complete_barrier_rq(q, rq, sectors, 0); 488 return __blk_complete_barrier_rq(q, rq, sectors, 0);
489 } 489 }
490 EXPORT_SYMBOL(blk_complete_barrier_rq); 490 EXPORT_SYMBOL(blk_complete_barrier_rq);
491 491
492 /** 492 /**
493 * blk_complete_barrier_rq_locked - complete possible barrier request 493 * blk_complete_barrier_rq_locked - complete possible barrier request
494 * @q: the request queue for the device 494 * @q: the request queue for the device
495 * @rq: the request 495 * @rq: the request
496 * @sectors: number of sectors to complete 496 * @sectors: number of sectors to complete
497 * 497 *
498 * Description: 498 * Description:
499 * See blk_complete_barrier_rq(). This variant must be used if the caller 499 * See blk_complete_barrier_rq(). This variant must be used if the caller
500 * holds the queue lock. 500 * holds the queue lock.
501 **/ 501 **/
502 int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq, 502 int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
503 int sectors) 503 int sectors)
504 { 504 {
505 return __blk_complete_barrier_rq(q, rq, sectors, 1); 505 return __blk_complete_barrier_rq(q, rq, sectors, 1);
506 } 506 }
507 EXPORT_SYMBOL(blk_complete_barrier_rq_locked); 507 EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
508 508
509 /** 509 /**
510 * blk_queue_bounce_limit - set bounce buffer limit for queue 510 * blk_queue_bounce_limit - set bounce buffer limit for queue
511 * @q: the request queue for the device 511 * @q: the request queue for the device
512 * @dma_addr: bus address limit 512 * @dma_addr: bus address limit
513 * 513 *
514 * Description: 514 * Description:
515 * Different hardware can have different requirements as to what pages 515 * Different hardware can have different requirements as to what pages
516 * it can do I/O directly to. A low level driver can call 516 * it can do I/O directly to. A low level driver can call
517 * blk_queue_bounce_limit to have lower memory pages allocated as bounce 517 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
518 * buffers for doing I/O to pages residing above @page. By default 518 * buffers for doing I/O to pages residing above @page. By default
519 * the block layer sets this to the highest numbered "low" memory page. 519 * the block layer sets this to the highest numbered "low" memory page.
520 **/ 520 **/
521 void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) 521 void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
522 { 522 {
523 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; 523 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
524 524
525 /* 525 /*
526 * set appropriate bounce gfp mask -- unfortunately we don't have a 526 * set appropriate bounce gfp mask -- unfortunately we don't have a
527 * full 4GB zone, so we have to resort to low memory for any bounces. 527 * full 4GB zone, so we have to resort to low memory for any bounces.
528 * ISA has its own < 16MB zone. 528 * ISA has its own < 16MB zone.
529 */ 529 */
530 if (bounce_pfn < blk_max_low_pfn) { 530 if (bounce_pfn < blk_max_low_pfn) {
531 BUG_ON(dma_addr < BLK_BOUNCE_ISA); 531 BUG_ON(dma_addr < BLK_BOUNCE_ISA);
532 init_emergency_isa_pool(); 532 init_emergency_isa_pool();
533 q->bounce_gfp = GFP_NOIO | GFP_DMA; 533 q->bounce_gfp = GFP_NOIO | GFP_DMA;
534 } else 534 } else
535 q->bounce_gfp = GFP_NOIO; 535 q->bounce_gfp = GFP_NOIO;
536 536
537 q->bounce_pfn = bounce_pfn; 537 q->bounce_pfn = bounce_pfn;
538 } 538 }
539 539
540 EXPORT_SYMBOL(blk_queue_bounce_limit); 540 EXPORT_SYMBOL(blk_queue_bounce_limit);
541 541
542 /** 542 /**
543 * blk_queue_max_sectors - set max sectors for a request for this queue 543 * blk_queue_max_sectors - set max sectors for a request for this queue
544 * @q: the request queue for the device 544 * @q: the request queue for the device
545 * @max_sectors: max sectors in the usual 512b unit 545 * @max_sectors: max sectors in the usual 512b unit
546 * 546 *
547 * Description: 547 * Description:
548 * Enables a low level driver to set an upper limit on the size of 548 * Enables a low level driver to set an upper limit on the size of
549 * received requests. 549 * received requests.
550 **/ 550 **/
551 void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors) 551 void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
552 { 552 {
553 if ((max_sectors << 9) < PAGE_CACHE_SIZE) { 553 if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
554 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); 554 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
555 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); 555 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
556 } 556 }
557 557
558 if (BLK_DEF_MAX_SECTORS > max_sectors) 558 if (BLK_DEF_MAX_SECTORS > max_sectors)
559 q->max_hw_sectors = q->max_sectors = max_sectors; 559 q->max_hw_sectors = q->max_sectors = max_sectors;
560 else { 560 else {
561 q->max_sectors = BLK_DEF_MAX_SECTORS; 561 q->max_sectors = BLK_DEF_MAX_SECTORS;
562 q->max_hw_sectors = max_sectors; 562 q->max_hw_sectors = max_sectors;
563 } 563 }
564 } 564 }
565 565
566 EXPORT_SYMBOL(blk_queue_max_sectors); 566 EXPORT_SYMBOL(blk_queue_max_sectors);
567 567
568 /** 568 /**
569 * blk_queue_max_phys_segments - set max phys segments for a request for this queue 569 * blk_queue_max_phys_segments - set max phys segments for a request for this queue
570 * @q: the request queue for the device 570 * @q: the request queue for the device
571 * @max_segments: max number of segments 571 * @max_segments: max number of segments
572 * 572 *
573 * Description: 573 * Description:
574 * Enables a low level driver to set an upper limit on the number of 574 * Enables a low level driver to set an upper limit on the number of
575 * physical data segments in a request. This would be the largest sized 575 * physical data segments in a request. This would be the largest sized
576 * scatter list the driver could handle. 576 * scatter list the driver could handle.
577 **/ 577 **/
578 void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments) 578 void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)
579 { 579 {
580 if (!max_segments) { 580 if (!max_segments) {
581 max_segments = 1; 581 max_segments = 1;
582 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 582 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
583 } 583 }
584 584
585 q->max_phys_segments = max_segments; 585 q->max_phys_segments = max_segments;
586 } 586 }
587 587
588 EXPORT_SYMBOL(blk_queue_max_phys_segments); 588 EXPORT_SYMBOL(blk_queue_max_phys_segments);
589 589
590 /** 590 /**
591 * blk_queue_max_hw_segments - set max hw segments for a request for this queue 591 * blk_queue_max_hw_segments - set max hw segments for a request for this queue
592 * @q: the request queue for the device 592 * @q: the request queue for the device
593 * @max_segments: max number of segments 593 * @max_segments: max number of segments
594 * 594 *
595 * Description: 595 * Description:
596 * Enables a low level driver to set an upper limit on the number of 596 * Enables a low level driver to set an upper limit on the number of
597 * hw data segments in a request. This would be the largest number of 597 * hw data segments in a request. This would be the largest number of
598 * address/length pairs the host adapter can actually give as once 598 * address/length pairs the host adapter can actually give as once
599 * to the device. 599 * to the device.
600 **/ 600 **/
601 void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments) 601 void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)
602 { 602 {
603 if (!max_segments) { 603 if (!max_segments) {
604 max_segments = 1; 604 max_segments = 1;
605 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 605 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
606 } 606 }
607 607
608 q->max_hw_segments = max_segments; 608 q->max_hw_segments = max_segments;
609 } 609 }
610 610
611 EXPORT_SYMBOL(blk_queue_max_hw_segments); 611 EXPORT_SYMBOL(blk_queue_max_hw_segments);
612 612
613 /** 613 /**
614 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg 614 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
615 * @q: the request queue for the device 615 * @q: the request queue for the device
616 * @max_size: max size of segment in bytes 616 * @max_size: max size of segment in bytes
617 * 617 *
618 * Description: 618 * Description:
619 * Enables a low level driver to set an upper limit on the size of a 619 * Enables a low level driver to set an upper limit on the size of a
620 * coalesced segment 620 * coalesced segment
621 **/ 621 **/
622 void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size) 622 void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)
623 { 623 {
624 if (max_size < PAGE_CACHE_SIZE) { 624 if (max_size < PAGE_CACHE_SIZE) {
625 max_size = PAGE_CACHE_SIZE; 625 max_size = PAGE_CACHE_SIZE;
626 printk("%s: set to minimum %d\n", __FUNCTION__, max_size); 626 printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
627 } 627 }
628 628
629 q->max_segment_size = max_size; 629 q->max_segment_size = max_size;
630 } 630 }
631 631
632 EXPORT_SYMBOL(blk_queue_max_segment_size); 632 EXPORT_SYMBOL(blk_queue_max_segment_size);
633 633
634 /** 634 /**
635 * blk_queue_hardsect_size - set hardware sector size for the queue 635 * blk_queue_hardsect_size - set hardware sector size for the queue
636 * @q: the request queue for the device 636 * @q: the request queue for the device
637 * @size: the hardware sector size, in bytes 637 * @size: the hardware sector size, in bytes
638 * 638 *
639 * Description: 639 * Description:
640 * This should typically be set to the lowest possible sector size 640 * This should typically be set to the lowest possible sector size
641 * that the hardware can operate on (possible without reverting to 641 * that the hardware can operate on (possible without reverting to
642 * even internal read-modify-write operations). Usually the default 642 * even internal read-modify-write operations). Usually the default
643 * of 512 covers most hardware. 643 * of 512 covers most hardware.
644 **/ 644 **/
645 void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) 645 void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)
646 { 646 {
647 q->hardsect_size = size; 647 q->hardsect_size = size;
648 } 648 }
649 649
650 EXPORT_SYMBOL(blk_queue_hardsect_size); 650 EXPORT_SYMBOL(blk_queue_hardsect_size);
651 651
652 /* 652 /*
653 * Returns the minimum that is _not_ zero, unless both are zero. 653 * Returns the minimum that is _not_ zero, unless both are zero.
654 */ 654 */
655 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 655 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
656 656
657 /** 657 /**
658 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers 658 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
659 * @t: the stacking driver (top) 659 * @t: the stacking driver (top)
660 * @b: the underlying device (bottom) 660 * @b: the underlying device (bottom)
661 **/ 661 **/
662 void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) 662 void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
663 { 663 {
664 /* zero is "infinity" */ 664 /* zero is "infinity" */
665 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); 665 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
666 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); 666 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
667 667
668 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); 668 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
669 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); 669 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
670 t->max_segment_size = min(t->max_segment_size,b->max_segment_size); 670 t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
671 t->hardsect_size = max(t->hardsect_size,b->hardsect_size); 671 t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
672 } 672 }
673 673
674 EXPORT_SYMBOL(blk_queue_stack_limits); 674 EXPORT_SYMBOL(blk_queue_stack_limits);
675 675
676 /** 676 /**
677 * blk_queue_segment_boundary - set boundary rules for segment merging 677 * blk_queue_segment_boundary - set boundary rules for segment merging
678 * @q: the request queue for the device 678 * @q: the request queue for the device
679 * @mask: the memory boundary mask 679 * @mask: the memory boundary mask
680 **/ 680 **/
681 void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask) 681 void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)
682 { 682 {
683 if (mask < PAGE_CACHE_SIZE - 1) { 683 if (mask < PAGE_CACHE_SIZE - 1) {
684 mask = PAGE_CACHE_SIZE - 1; 684 mask = PAGE_CACHE_SIZE - 1;
685 printk("%s: set to minimum %lx\n", __FUNCTION__, mask); 685 printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
686 } 686 }
687 687
688 q->seg_boundary_mask = mask; 688 q->seg_boundary_mask = mask;
689 } 689 }
690 690
691 EXPORT_SYMBOL(blk_queue_segment_boundary); 691 EXPORT_SYMBOL(blk_queue_segment_boundary);
692 692
693 /** 693 /**
694 * blk_queue_dma_alignment - set dma length and memory alignment 694 * blk_queue_dma_alignment - set dma length and memory alignment
695 * @q: the request queue for the device 695 * @q: the request queue for the device
696 * @mask: alignment mask 696 * @mask: alignment mask
697 * 697 *
698 * description: 698 * description:
699 * set required memory and length aligment for direct dma transactions. 699 * set required memory and length aligment for direct dma transactions.
700 * this is used when buiding direct io requests for the queue. 700 * this is used when buiding direct io requests for the queue.
701 * 701 *
702 **/ 702 **/
703 void blk_queue_dma_alignment(request_queue_t *q, int mask) 703 void blk_queue_dma_alignment(request_queue_t *q, int mask)
704 { 704 {
705 q->dma_alignment = mask; 705 q->dma_alignment = mask;
706 } 706 }
707 707
708 EXPORT_SYMBOL(blk_queue_dma_alignment); 708 EXPORT_SYMBOL(blk_queue_dma_alignment);
709 709
710 /** 710 /**
711 * blk_queue_find_tag - find a request by its tag and queue 711 * blk_queue_find_tag - find a request by its tag and queue
712 * @q: The request queue for the device 712 * @q: The request queue for the device
713 * @tag: The tag of the request 713 * @tag: The tag of the request
714 * 714 *
715 * Notes: 715 * Notes:
716 * Should be used when a device returns a tag and you want to match 716 * Should be used when a device returns a tag and you want to match
717 * it with a request. 717 * it with a request.
718 * 718 *
719 * no locks need be held. 719 * no locks need be held.
720 **/ 720 **/
721 struct request *blk_queue_find_tag(request_queue_t *q, int tag) 721 struct request *blk_queue_find_tag(request_queue_t *q, int tag)
722 { 722 {
723 struct blk_queue_tag *bqt = q->queue_tags; 723 struct blk_queue_tag *bqt = q->queue_tags;
724 724
725 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) 725 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
726 return NULL; 726 return NULL;
727 727
728 return bqt->tag_index[tag]; 728 return bqt->tag_index[tag];
729 } 729 }
730 730
731 EXPORT_SYMBOL(blk_queue_find_tag); 731 EXPORT_SYMBOL(blk_queue_find_tag);
732 732
733 /** 733 /**
734 * __blk_queue_free_tags - release tag maintenance info 734 * __blk_queue_free_tags - release tag maintenance info
735 * @q: the request queue for the device 735 * @q: the request queue for the device
736 * 736 *
737 * Notes: 737 * Notes:
738 * blk_cleanup_queue() will take care of calling this function, if tagging 738 * blk_cleanup_queue() will take care of calling this function, if tagging
739 * has been used. So there's no need to call this directly. 739 * has been used. So there's no need to call this directly.
740 **/ 740 **/
741 static void __blk_queue_free_tags(request_queue_t *q) 741 static void __blk_queue_free_tags(request_queue_t *q)
742 { 742 {
743 struct blk_queue_tag *bqt = q->queue_tags; 743 struct blk_queue_tag *bqt = q->queue_tags;
744 744
745 if (!bqt) 745 if (!bqt)
746 return; 746 return;
747 747
748 if (atomic_dec_and_test(&bqt->refcnt)) { 748 if (atomic_dec_and_test(&bqt->refcnt)) {
749 BUG_ON(bqt->busy); 749 BUG_ON(bqt->busy);
750 BUG_ON(!list_empty(&bqt->busy_list)); 750 BUG_ON(!list_empty(&bqt->busy_list));
751 751
752 kfree(bqt->tag_index); 752 kfree(bqt->tag_index);
753 bqt->tag_index = NULL; 753 bqt->tag_index = NULL;
754 754
755 kfree(bqt->tag_map); 755 kfree(bqt->tag_map);
756 bqt->tag_map = NULL; 756 bqt->tag_map = NULL;
757 757
758 kfree(bqt); 758 kfree(bqt);
759 } 759 }
760 760
761 q->queue_tags = NULL; 761 q->queue_tags = NULL;
762 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); 762 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
763 } 763 }
764 764
765 /** 765 /**
766 * blk_queue_free_tags - release tag maintenance info 766 * blk_queue_free_tags - release tag maintenance info
767 * @q: the request queue for the device 767 * @q: the request queue for the device
768 * 768 *
769 * Notes: 769 * Notes:
770 * This is used to disabled tagged queuing to a device, yet leave 770 * This is used to disabled tagged queuing to a device, yet leave
771 * queue in function. 771 * queue in function.
772 **/ 772 **/
773 void blk_queue_free_tags(request_queue_t *q) 773 void blk_queue_free_tags(request_queue_t *q)
774 { 774 {
775 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); 775 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
776 } 776 }
777 777
778 EXPORT_SYMBOL(blk_queue_free_tags); 778 EXPORT_SYMBOL(blk_queue_free_tags);
779 779
780 static int 780 static int
781 init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) 781 init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
782 { 782 {
783 struct request **tag_index; 783 struct request **tag_index;
784 unsigned long *tag_map; 784 unsigned long *tag_map;
785 int nr_ulongs; 785 int nr_ulongs;
786 786
787 if (depth > q->nr_requests * 2) { 787 if (depth > q->nr_requests * 2) {
788 depth = q->nr_requests * 2; 788 depth = q->nr_requests * 2;
789 printk(KERN_ERR "%s: adjusted depth to %d\n", 789 printk(KERN_ERR "%s: adjusted depth to %d\n",
790 __FUNCTION__, depth); 790 __FUNCTION__, depth);
791 } 791 }
792 792
793 tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC); 793 tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);
794 if (!tag_index) 794 if (!tag_index)
795 goto fail; 795 goto fail;
796 796
797 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; 797 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
798 tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); 798 tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
799 if (!tag_map) 799 if (!tag_map)
800 goto fail; 800 goto fail;
801 801
802 memset(tag_index, 0, depth * sizeof(struct request *)); 802 memset(tag_index, 0, depth * sizeof(struct request *));
803 memset(tag_map, 0, nr_ulongs * sizeof(unsigned long)); 803 memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));
804 tags->real_max_depth = depth; 804 tags->real_max_depth = depth;
805 tags->max_depth = depth; 805 tags->max_depth = depth;
806 tags->tag_index = tag_index; 806 tags->tag_index = tag_index;
807 tags->tag_map = tag_map; 807 tags->tag_map = tag_map;
808 808
809 return 0; 809 return 0;
810 fail: 810 fail:
811 kfree(tag_index); 811 kfree(tag_index);
812 return -ENOMEM; 812 return -ENOMEM;
813 } 813 }
814 814
815 /** 815 /**
816 * blk_queue_init_tags - initialize the queue tag info 816 * blk_queue_init_tags - initialize the queue tag info
817 * @q: the request queue for the device 817 * @q: the request queue for the device
818 * @depth: the maximum queue depth supported 818 * @depth: the maximum queue depth supported
819 * @tags: the tag to use 819 * @tags: the tag to use
820 **/ 820 **/
821 int blk_queue_init_tags(request_queue_t *q, int depth, 821 int blk_queue_init_tags(request_queue_t *q, int depth,
822 struct blk_queue_tag *tags) 822 struct blk_queue_tag *tags)
823 { 823 {
824 int rc; 824 int rc;
825 825
826 BUG_ON(tags && q->queue_tags && tags != q->queue_tags); 826 BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
827 827
828 if (!tags && !q->queue_tags) { 828 if (!tags && !q->queue_tags) {
829 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); 829 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
830 if (!tags) 830 if (!tags)
831 goto fail; 831 goto fail;
832 832
833 if (init_tag_map(q, tags, depth)) 833 if (init_tag_map(q, tags, depth))
834 goto fail; 834 goto fail;
835 835
836 INIT_LIST_HEAD(&tags->busy_list); 836 INIT_LIST_HEAD(&tags->busy_list);
837 tags->busy = 0; 837 tags->busy = 0;
838 atomic_set(&tags->refcnt, 1); 838 atomic_set(&tags->refcnt, 1);
839 } else if (q->queue_tags) { 839 } else if (q->queue_tags) {
840 if ((rc = blk_queue_resize_tags(q, depth))) 840 if ((rc = blk_queue_resize_tags(q, depth)))
841 return rc; 841 return rc;
842 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); 842 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
843 return 0; 843 return 0;
844 } else 844 } else
845 atomic_inc(&tags->refcnt); 845 atomic_inc(&tags->refcnt);
846 846
847 /* 847 /*
848 * assign it, all done 848 * assign it, all done
849 */ 849 */
850 q->queue_tags = tags; 850 q->queue_tags = tags;
851 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); 851 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
852 return 0; 852 return 0;
853 fail: 853 fail:
854 kfree(tags); 854 kfree(tags);
855 return -ENOMEM; 855 return -ENOMEM;
856 } 856 }
857 857
858 EXPORT_SYMBOL(blk_queue_init_tags); 858 EXPORT_SYMBOL(blk_queue_init_tags);
859 859
860 /** 860 /**
861 * blk_queue_resize_tags - change the queueing depth 861 * blk_queue_resize_tags - change the queueing depth
862 * @q: the request queue for the device 862 * @q: the request queue for the device
863 * @new_depth: the new max command queueing depth 863 * @new_depth: the new max command queueing depth
864 * 864 *
865 * Notes: 865 * Notes:
866 * Must be called with the queue lock held. 866 * Must be called with the queue lock held.
867 **/ 867 **/
868 int blk_queue_resize_tags(request_queue_t *q, int new_depth) 868 int blk_queue_resize_tags(request_queue_t *q, int new_depth)
869 { 869 {
870 struct blk_queue_tag *bqt = q->queue_tags; 870 struct blk_queue_tag *bqt = q->queue_tags;
871 struct request **tag_index; 871 struct request **tag_index;
872 unsigned long *tag_map; 872 unsigned long *tag_map;
873 int max_depth, nr_ulongs; 873 int max_depth, nr_ulongs;
874 874
875 if (!bqt) 875 if (!bqt)
876 return -ENXIO; 876 return -ENXIO;
877 877
878 /* 878 /*
879 * if we already have large enough real_max_depth. just 879 * if we already have large enough real_max_depth. just
880 * adjust max_depth. *NOTE* as requests with tag value 880 * adjust max_depth. *NOTE* as requests with tag value
881 * between new_depth and real_max_depth can be in-flight, tag 881 * between new_depth and real_max_depth can be in-flight, tag
882 * map can not be shrunk blindly here. 882 * map can not be shrunk blindly here.
883 */ 883 */
884 if (new_depth <= bqt->real_max_depth) { 884 if (new_depth <= bqt->real_max_depth) {
885 bqt->max_depth = new_depth; 885 bqt->max_depth = new_depth;
886 return 0; 886 return 0;
887 } 887 }
888 888
889 /* 889 /*
890 * save the old state info, so we can copy it back 890 * save the old state info, so we can copy it back
891 */ 891 */
892 tag_index = bqt->tag_index; 892 tag_index = bqt->tag_index;
893 tag_map = bqt->tag_map; 893 tag_map = bqt->tag_map;
894 max_depth = bqt->real_max_depth; 894 max_depth = bqt->real_max_depth;
895 895
896 if (init_tag_map(q, bqt, new_depth)) 896 if (init_tag_map(q, bqt, new_depth))
897 return -ENOMEM; 897 return -ENOMEM;
898 898
899 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); 899 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
900 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; 900 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
901 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); 901 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
902 902
903 kfree(tag_index); 903 kfree(tag_index);
904 kfree(tag_map); 904 kfree(tag_map);
905 return 0; 905 return 0;
906 } 906 }
907 907
908 EXPORT_SYMBOL(blk_queue_resize_tags); 908 EXPORT_SYMBOL(blk_queue_resize_tags);
909 909
910 /** 910 /**
911 * blk_queue_end_tag - end tag operations for a request 911 * blk_queue_end_tag - end tag operations for a request
912 * @q: the request queue for the device 912 * @q: the request queue for the device
913 * @rq: the request that has completed 913 * @rq: the request that has completed
914 * 914 *
915 * Description: 915 * Description:
916 * Typically called when end_that_request_first() returns 0, meaning 916 * Typically called when end_that_request_first() returns 0, meaning
917 * all transfers have been done for a request. It's important to call 917 * all transfers have been done for a request. It's important to call
918 * this function before end_that_request_last(), as that will put the 918 * this function before end_that_request_last(), as that will put the
919 * request back on the free list thus corrupting the internal tag list. 919 * request back on the free list thus corrupting the internal tag list.
920 * 920 *
921 * Notes: 921 * Notes:
922 * queue lock must be held. 922 * queue lock must be held.
923 **/ 923 **/
924 void blk_queue_end_tag(request_queue_t *q, struct request *rq) 924 void blk_queue_end_tag(request_queue_t *q, struct request *rq)
925 { 925 {
926 struct blk_queue_tag *bqt = q->queue_tags; 926 struct blk_queue_tag *bqt = q->queue_tags;
927 int tag = rq->tag; 927 int tag = rq->tag;
928 928
929 BUG_ON(tag == -1); 929 BUG_ON(tag == -1);
930 930
931 if (unlikely(tag >= bqt->real_max_depth)) 931 if (unlikely(tag >= bqt->real_max_depth))
932 /* 932 /*
933 * This can happen after tag depth has been reduced. 933 * This can happen after tag depth has been reduced.
934 * FIXME: how about a warning or info message here? 934 * FIXME: how about a warning or info message here?
935 */ 935 */
936 return; 936 return;
937 937
938 if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) { 938 if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {
939 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", 939 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
940 __FUNCTION__, tag); 940 __FUNCTION__, tag);
941 return; 941 return;
942 } 942 }
943 943
944 list_del_init(&rq->queuelist); 944 list_del_init(&rq->queuelist);
945 rq->flags &= ~REQ_QUEUED; 945 rq->flags &= ~REQ_QUEUED;
946 rq->tag = -1; 946 rq->tag = -1;
947 947
948 if (unlikely(bqt->tag_index[tag] == NULL)) 948 if (unlikely(bqt->tag_index[tag] == NULL))
949 printk(KERN_ERR "%s: tag %d is missing\n", 949 printk(KERN_ERR "%s: tag %d is missing\n",
950 __FUNCTION__, tag); 950 __FUNCTION__, tag);
951 951
952 bqt->tag_index[tag] = NULL; 952 bqt->tag_index[tag] = NULL;
953 bqt->busy--; 953 bqt->busy--;
954 } 954 }
955 955
956 EXPORT_SYMBOL(blk_queue_end_tag); 956 EXPORT_SYMBOL(blk_queue_end_tag);
957 957
958 /** 958 /**
959 * blk_queue_start_tag - find a free tag and assign it 959 * blk_queue_start_tag - find a free tag and assign it
960 * @q: the request queue for the device 960 * @q: the request queue for the device
961 * @rq: the block request that needs tagging 961 * @rq: the block request that needs tagging
962 * 962 *
963 * Description: 963 * Description:
964 * This can either be used as a stand-alone helper, or possibly be 964 * This can either be used as a stand-alone helper, or possibly be
965 * assigned as the queue &prep_rq_fn (in which case &struct request 965 * assigned as the queue &prep_rq_fn (in which case &struct request
966 * automagically gets a tag assigned). Note that this function 966 * automagically gets a tag assigned). Note that this function
967 * assumes that any type of request can be queued! if this is not 967 * assumes that any type of request can be queued! if this is not
968 * true for your device, you must check the request type before 968 * true for your device, you must check the request type before
969 * calling this function. The request will also be removed from 969 * calling this function. The request will also be removed from
970 * the request queue, so it's the drivers responsibility to readd 970 * the request queue, so it's the drivers responsibility to readd
971 * it if it should need to be restarted for some reason. 971 * it if it should need to be restarted for some reason.
972 * 972 *
973 * Notes: 973 * Notes:
974 * queue lock must be held. 974 * queue lock must be held.
975 **/ 975 **/
976 int blk_queue_start_tag(request_queue_t *q, struct request *rq) 976 int blk_queue_start_tag(request_queue_t *q, struct request *rq)
977 { 977 {
978 struct blk_queue_tag *bqt = q->queue_tags; 978 struct blk_queue_tag *bqt = q->queue_tags;
979 int tag; 979 int tag;
980 980
981 if (unlikely((rq->flags & REQ_QUEUED))) { 981 if (unlikely((rq->flags & REQ_QUEUED))) {
982 printk(KERN_ERR 982 printk(KERN_ERR
983 "%s: request %p for device [%s] already tagged %d", 983 "%s: request %p for device [%s] already tagged %d",
984 __FUNCTION__, rq, 984 __FUNCTION__, rq,
985 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); 985 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
986 BUG(); 986 BUG();
987 } 987 }
988 988
989 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); 989 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
990 if (tag >= bqt->max_depth) 990 if (tag >= bqt->max_depth)
991 return 1; 991 return 1;
992 992
993 __set_bit(tag, bqt->tag_map); 993 __set_bit(tag, bqt->tag_map);
994 994
995 rq->flags |= REQ_QUEUED; 995 rq->flags |= REQ_QUEUED;
996 rq->tag = tag; 996 rq->tag = tag;
997 bqt->tag_index[tag] = rq; 997 bqt->tag_index[tag] = rq;
998 blkdev_dequeue_request(rq); 998 blkdev_dequeue_request(rq);
999 list_add(&rq->queuelist, &bqt->busy_list); 999 list_add(&rq->queuelist, &bqt->busy_list);
1000 bqt->busy++; 1000 bqt->busy++;
1001 return 0; 1001 return 0;
1002 } 1002 }
1003 1003
1004 EXPORT_SYMBOL(blk_queue_start_tag); 1004 EXPORT_SYMBOL(blk_queue_start_tag);
1005 1005
1006 /** 1006 /**
1007 * blk_queue_invalidate_tags - invalidate all pending tags 1007 * blk_queue_invalidate_tags - invalidate all pending tags
1008 * @q: the request queue for the device 1008 * @q: the request queue for the device
1009 * 1009 *
1010 * Description: 1010 * Description:
1011 * Hardware conditions may dictate a need to stop all pending requests. 1011 * Hardware conditions may dictate a need to stop all pending requests.
1012 * In this case, we will safely clear the block side of the tag queue and 1012 * In this case, we will safely clear the block side of the tag queue and
1013 * readd all requests to the request queue in the right order. 1013 * readd all requests to the request queue in the right order.
1014 * 1014 *
1015 * Notes: 1015 * Notes:
1016 * queue lock must be held. 1016 * queue lock must be held.
1017 **/ 1017 **/
1018 void blk_queue_invalidate_tags(request_queue_t *q) 1018 void blk_queue_invalidate_tags(request_queue_t *q)
1019 { 1019 {
1020 struct blk_queue_tag *bqt = q->queue_tags; 1020 struct blk_queue_tag *bqt = q->queue_tags;
1021 struct list_head *tmp, *n; 1021 struct list_head *tmp, *n;
1022 struct request *rq; 1022 struct request *rq;
1023 1023
1024 list_for_each_safe(tmp, n, &bqt->busy_list) { 1024 list_for_each_safe(tmp, n, &bqt->busy_list) {
1025 rq = list_entry_rq(tmp); 1025 rq = list_entry_rq(tmp);
1026 1026
1027 if (rq->tag == -1) { 1027 if (rq->tag == -1) {
1028 printk(KERN_ERR 1028 printk(KERN_ERR
1029 "%s: bad tag found on list\n", __FUNCTION__); 1029 "%s: bad tag found on list\n", __FUNCTION__);
1030 list_del_init(&rq->queuelist); 1030 list_del_init(&rq->queuelist);
1031 rq->flags &= ~REQ_QUEUED; 1031 rq->flags &= ~REQ_QUEUED;
1032 } else 1032 } else
1033 blk_queue_end_tag(q, rq); 1033 blk_queue_end_tag(q, rq);
1034 1034
1035 rq->flags &= ~REQ_STARTED; 1035 rq->flags &= ~REQ_STARTED;
1036 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); 1036 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1037 } 1037 }
1038 } 1038 }
1039 1039
1040 EXPORT_SYMBOL(blk_queue_invalidate_tags); 1040 EXPORT_SYMBOL(blk_queue_invalidate_tags);
1041 1041
1042 static char *rq_flags[] = { 1042 static const char * const rq_flags[] = {
1043 "REQ_RW", 1043 "REQ_RW",
1044 "REQ_FAILFAST", 1044 "REQ_FAILFAST",
1045 "REQ_SORTED", 1045 "REQ_SORTED",
1046 "REQ_SOFTBARRIER", 1046 "REQ_SOFTBARRIER",
1047 "REQ_HARDBARRIER", 1047 "REQ_HARDBARRIER",
1048 "REQ_CMD", 1048 "REQ_CMD",
1049 "REQ_NOMERGE", 1049 "REQ_NOMERGE",
1050 "REQ_STARTED", 1050 "REQ_STARTED",
1051 "REQ_DONTPREP", 1051 "REQ_DONTPREP",
1052 "REQ_QUEUED", 1052 "REQ_QUEUED",
1053 "REQ_ELVPRIV", 1053 "REQ_ELVPRIV",
1054 "REQ_PC", 1054 "REQ_PC",
1055 "REQ_BLOCK_PC", 1055 "REQ_BLOCK_PC",
1056 "REQ_SENSE", 1056 "REQ_SENSE",
1057 "REQ_FAILED", 1057 "REQ_FAILED",
1058 "REQ_QUIET", 1058 "REQ_QUIET",
1059 "REQ_SPECIAL", 1059 "REQ_SPECIAL",
1060 "REQ_DRIVE_CMD", 1060 "REQ_DRIVE_CMD",
1061 "REQ_DRIVE_TASK", 1061 "REQ_DRIVE_TASK",
1062 "REQ_DRIVE_TASKFILE", 1062 "REQ_DRIVE_TASKFILE",
1063 "REQ_PREEMPT", 1063 "REQ_PREEMPT",
1064 "REQ_PM_SUSPEND", 1064 "REQ_PM_SUSPEND",
1065 "REQ_PM_RESUME", 1065 "REQ_PM_RESUME",
1066 "REQ_PM_SHUTDOWN", 1066 "REQ_PM_SHUTDOWN",
1067 }; 1067 };
1068 1068
1069 void blk_dump_rq_flags(struct request *rq, char *msg) 1069 void blk_dump_rq_flags(struct request *rq, char *msg)
1070 { 1070 {
1071 int bit; 1071 int bit;
1072 1072
1073 printk("%s: dev %s: flags = ", msg, 1073 printk("%s: dev %s: flags = ", msg,
1074 rq->rq_disk ? rq->rq_disk->disk_name : "?"); 1074 rq->rq_disk ? rq->rq_disk->disk_name : "?");
1075 bit = 0; 1075 bit = 0;
1076 do { 1076 do {
1077 if (rq->flags & (1 << bit)) 1077 if (rq->flags & (1 << bit))
1078 printk("%s ", rq_flags[bit]); 1078 printk("%s ", rq_flags[bit]);
1079 bit++; 1079 bit++;
1080 } while (bit < __REQ_NR_BITS); 1080 } while (bit < __REQ_NR_BITS);
1081 1081
1082 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, 1082 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
1083 rq->nr_sectors, 1083 rq->nr_sectors,
1084 rq->current_nr_sectors); 1084 rq->current_nr_sectors);
1085 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); 1085 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
1086 1086
1087 if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) { 1087 if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
1088 printk("cdb: "); 1088 printk("cdb: ");
1089 for (bit = 0; bit < sizeof(rq->cmd); bit++) 1089 for (bit = 0; bit < sizeof(rq->cmd); bit++)
1090 printk("%02x ", rq->cmd[bit]); 1090 printk("%02x ", rq->cmd[bit]);
1091 printk("\n"); 1091 printk("\n");
1092 } 1092 }
1093 } 1093 }
1094 1094
1095 EXPORT_SYMBOL(blk_dump_rq_flags); 1095 EXPORT_SYMBOL(blk_dump_rq_flags);
1096 1096
1097 void blk_recount_segments(request_queue_t *q, struct bio *bio) 1097 void blk_recount_segments(request_queue_t *q, struct bio *bio)
1098 { 1098 {
1099 struct bio_vec *bv, *bvprv = NULL; 1099 struct bio_vec *bv, *bvprv = NULL;
1100 int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster; 1100 int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
1101 int high, highprv = 1; 1101 int high, highprv = 1;
1102 1102
1103 if (unlikely(!bio->bi_io_vec)) 1103 if (unlikely(!bio->bi_io_vec))
1104 return; 1104 return;
1105 1105
1106 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); 1106 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1107 hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0; 1107 hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
1108 bio_for_each_segment(bv, bio, i) { 1108 bio_for_each_segment(bv, bio, i) {
1109 /* 1109 /*
1110 * the trick here is making sure that a high page is never 1110 * the trick here is making sure that a high page is never
1111 * considered part of another segment, since that might 1111 * considered part of another segment, since that might
1112 * change with the bounce page. 1112 * change with the bounce page.
1113 */ 1113 */
1114 high = page_to_pfn(bv->bv_page) >= q->bounce_pfn; 1114 high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;
1115 if (high || highprv) 1115 if (high || highprv)
1116 goto new_hw_segment; 1116 goto new_hw_segment;
1117 if (cluster) { 1117 if (cluster) {
1118 if (seg_size + bv->bv_len > q->max_segment_size) 1118 if (seg_size + bv->bv_len > q->max_segment_size)
1119 goto new_segment; 1119 goto new_segment;
1120 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) 1120 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
1121 goto new_segment; 1121 goto new_segment;
1122 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) 1122 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
1123 goto new_segment; 1123 goto new_segment;
1124 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) 1124 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
1125 goto new_hw_segment; 1125 goto new_hw_segment;
1126 1126
1127 seg_size += bv->bv_len; 1127 seg_size += bv->bv_len;
1128 hw_seg_size += bv->bv_len; 1128 hw_seg_size += bv->bv_len;
1129 bvprv = bv; 1129 bvprv = bv;
1130 continue; 1130 continue;
1131 } 1131 }
1132 new_segment: 1132 new_segment:
1133 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && 1133 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
1134 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) { 1134 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
1135 hw_seg_size += bv->bv_len; 1135 hw_seg_size += bv->bv_len;
1136 } else { 1136 } else {
1137 new_hw_segment: 1137 new_hw_segment:
1138 if (hw_seg_size > bio->bi_hw_front_size) 1138 if (hw_seg_size > bio->bi_hw_front_size)
1139 bio->bi_hw_front_size = hw_seg_size; 1139 bio->bi_hw_front_size = hw_seg_size;
1140 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; 1140 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
1141 nr_hw_segs++; 1141 nr_hw_segs++;
1142 } 1142 }
1143 1143
1144 nr_phys_segs++; 1144 nr_phys_segs++;
1145 bvprv = bv; 1145 bvprv = bv;
1146 seg_size = bv->bv_len; 1146 seg_size = bv->bv_len;
1147 highprv = high; 1147 highprv = high;
1148 } 1148 }
1149 if (hw_seg_size > bio->bi_hw_back_size) 1149 if (hw_seg_size > bio->bi_hw_back_size)
1150 bio->bi_hw_back_size = hw_seg_size; 1150 bio->bi_hw_back_size = hw_seg_size;
1151 if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size) 1151 if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
1152 bio->bi_hw_front_size = hw_seg_size; 1152 bio->bi_hw_front_size = hw_seg_size;
1153 bio->bi_phys_segments = nr_phys_segs; 1153 bio->bi_phys_segments = nr_phys_segs;
1154 bio->bi_hw_segments = nr_hw_segs; 1154 bio->bi_hw_segments = nr_hw_segs;
1155 bio->bi_flags |= (1 << BIO_SEG_VALID); 1155 bio->bi_flags |= (1 << BIO_SEG_VALID);
1156 } 1156 }
1157 1157
1158 1158
1159 static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio, 1159 static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
1160 struct bio *nxt) 1160 struct bio *nxt)
1161 { 1161 {
1162 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) 1162 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
1163 return 0; 1163 return 0;
1164 1164
1165 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) 1165 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
1166 return 0; 1166 return 0;
1167 if (bio->bi_size + nxt->bi_size > q->max_segment_size) 1167 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1168 return 0; 1168 return 0;
1169 1169
1170 /* 1170 /*
1171 * bio and nxt are contigous in memory, check if the queue allows 1171 * bio and nxt are contigous in memory, check if the queue allows
1172 * these two to be merged into one 1172 * these two to be merged into one
1173 */ 1173 */
1174 if (BIO_SEG_BOUNDARY(q, bio, nxt)) 1174 if (BIO_SEG_BOUNDARY(q, bio, nxt))
1175 return 1; 1175 return 1;
1176 1176
1177 return 0; 1177 return 0;
1178 } 1178 }
1179 1179
1180 static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio, 1180 static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,
1181 struct bio *nxt) 1181 struct bio *nxt)
1182 { 1182 {
1183 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 1183 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1184 blk_recount_segments(q, bio); 1184 blk_recount_segments(q, bio);
1185 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) 1185 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
1186 blk_recount_segments(q, nxt); 1186 blk_recount_segments(q, nxt);
1187 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || 1187 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
1188 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size)) 1188 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))
1189 return 0; 1189 return 0;
1190 if (bio->bi_size + nxt->bi_size > q->max_segment_size) 1190 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1191 return 0; 1191 return 0;
1192 1192
1193 return 1; 1193 return 1;
1194 } 1194 }
1195 1195
1196 /* 1196 /*
1197 * map a request to scatterlist, return number of sg entries setup. Caller 1197 * map a request to scatterlist, return number of sg entries setup. Caller
1198 * must make sure sg can hold rq->nr_phys_segments entries 1198 * must make sure sg can hold rq->nr_phys_segments entries
1199 */ 1199 */
1200 int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg) 1200 int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg)
1201 { 1201 {
1202 struct bio_vec *bvec, *bvprv; 1202 struct bio_vec *bvec, *bvprv;
1203 struct bio *bio; 1203 struct bio *bio;
1204 int nsegs, i, cluster; 1204 int nsegs, i, cluster;
1205 1205
1206 nsegs = 0; 1206 nsegs = 0;
1207 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); 1207 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1208 1208
1209 /* 1209 /*
1210 * for each bio in rq 1210 * for each bio in rq
1211 */ 1211 */
1212 bvprv = NULL; 1212 bvprv = NULL;
1213 rq_for_each_bio(bio, rq) { 1213 rq_for_each_bio(bio, rq) {
1214 /* 1214 /*
1215 * for each segment in bio 1215 * for each segment in bio
1216 */ 1216 */
1217 bio_for_each_segment(bvec, bio, i) { 1217 bio_for_each_segment(bvec, bio, i) {
1218 int nbytes = bvec->bv_len; 1218 int nbytes = bvec->bv_len;
1219 1219
1220 if (bvprv && cluster) { 1220 if (bvprv && cluster) {
1221 if (sg[nsegs - 1].length + nbytes > q->max_segment_size) 1221 if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
1222 goto new_segment; 1222 goto new_segment;
1223 1223
1224 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 1224 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
1225 goto new_segment; 1225 goto new_segment;
1226 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 1226 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
1227 goto new_segment; 1227 goto new_segment;
1228 1228
1229 sg[nsegs - 1].length += nbytes; 1229 sg[nsegs - 1].length += nbytes;
1230 } else { 1230 } else {
1231 new_segment: 1231 new_segment:
1232 memset(&sg[nsegs],0,sizeof(struct scatterlist)); 1232 memset(&sg[nsegs],0,sizeof(struct scatterlist));
1233 sg[nsegs].page = bvec->bv_page; 1233 sg[nsegs].page = bvec->bv_page;
1234 sg[nsegs].length = nbytes; 1234 sg[nsegs].length = nbytes;
1235 sg[nsegs].offset = bvec->bv_offset; 1235 sg[nsegs].offset = bvec->bv_offset;
1236 1236
1237 nsegs++; 1237 nsegs++;
1238 } 1238 }
1239 bvprv = bvec; 1239 bvprv = bvec;
1240 } /* segments in bio */ 1240 } /* segments in bio */
1241 } /* bios in rq */ 1241 } /* bios in rq */
1242 1242
1243 return nsegs; 1243 return nsegs;
1244 } 1244 }
1245 1245
1246 EXPORT_SYMBOL(blk_rq_map_sg); 1246 EXPORT_SYMBOL(blk_rq_map_sg);
1247 1247
1248 /* 1248 /*
1249 * the standard queue merge functions, can be overridden with device 1249 * the standard queue merge functions, can be overridden with device
1250 * specific ones if so desired 1250 * specific ones if so desired
1251 */ 1251 */
1252 1252
1253 static inline int ll_new_mergeable(request_queue_t *q, 1253 static inline int ll_new_mergeable(request_queue_t *q,
1254 struct request *req, 1254 struct request *req,
1255 struct bio *bio) 1255 struct bio *bio)
1256 { 1256 {
1257 int nr_phys_segs = bio_phys_segments(q, bio); 1257 int nr_phys_segs = bio_phys_segments(q, bio);
1258 1258
1259 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1259 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1260 req->flags |= REQ_NOMERGE; 1260 req->flags |= REQ_NOMERGE;
1261 if (req == q->last_merge) 1261 if (req == q->last_merge)
1262 q->last_merge = NULL; 1262 q->last_merge = NULL;
1263 return 0; 1263 return 0;
1264 } 1264 }
1265 1265
1266 /* 1266 /*
1267 * A hw segment is just getting larger, bump just the phys 1267 * A hw segment is just getting larger, bump just the phys
1268 * counter. 1268 * counter.
1269 */ 1269 */
1270 req->nr_phys_segments += nr_phys_segs; 1270 req->nr_phys_segments += nr_phys_segs;
1271 return 1; 1271 return 1;
1272 } 1272 }
1273 1273
1274 static inline int ll_new_hw_segment(request_queue_t *q, 1274 static inline int ll_new_hw_segment(request_queue_t *q,
1275 struct request *req, 1275 struct request *req,
1276 struct bio *bio) 1276 struct bio *bio)
1277 { 1277 {
1278 int nr_hw_segs = bio_hw_segments(q, bio); 1278 int nr_hw_segs = bio_hw_segments(q, bio);
1279 int nr_phys_segs = bio_phys_segments(q, bio); 1279 int nr_phys_segs = bio_phys_segments(q, bio);
1280 1280
1281 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments 1281 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1282 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1282 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1283 req->flags |= REQ_NOMERGE; 1283 req->flags |= REQ_NOMERGE;
1284 if (req == q->last_merge) 1284 if (req == q->last_merge)
1285 q->last_merge = NULL; 1285 q->last_merge = NULL;
1286 return 0; 1286 return 0;
1287 } 1287 }
1288 1288
1289 /* 1289 /*
1290 * This will form the start of a new hw segment. Bump both 1290 * This will form the start of a new hw segment. Bump both
1291 * counters. 1291 * counters.
1292 */ 1292 */
1293 req->nr_hw_segments += nr_hw_segs; 1293 req->nr_hw_segments += nr_hw_segs;
1294 req->nr_phys_segments += nr_phys_segs; 1294 req->nr_phys_segments += nr_phys_segs;
1295 return 1; 1295 return 1;
1296 } 1296 }
1297 1297
1298 static int ll_back_merge_fn(request_queue_t *q, struct request *req, 1298 static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1299 struct bio *bio) 1299 struct bio *bio)
1300 { 1300 {
1301 unsigned short max_sectors; 1301 unsigned short max_sectors;
1302 int len; 1302 int len;
1303 1303
1304 if (unlikely(blk_pc_request(req))) 1304 if (unlikely(blk_pc_request(req)))
1305 max_sectors = q->max_hw_sectors; 1305 max_sectors = q->max_hw_sectors;
1306 else 1306 else
1307 max_sectors = q->max_sectors; 1307 max_sectors = q->max_sectors;
1308 1308
1309 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1309 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1310 req->flags |= REQ_NOMERGE; 1310 req->flags |= REQ_NOMERGE;
1311 if (req == q->last_merge) 1311 if (req == q->last_merge)
1312 q->last_merge = NULL; 1312 q->last_merge = NULL;
1313 return 0; 1313 return 0;
1314 } 1314 }
1315 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) 1315 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
1316 blk_recount_segments(q, req->biotail); 1316 blk_recount_segments(q, req->biotail);
1317 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 1317 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1318 blk_recount_segments(q, bio); 1318 blk_recount_segments(q, bio);
1319 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; 1319 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
1320 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && 1320 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
1321 !BIOVEC_VIRT_OVERSIZE(len)) { 1321 !BIOVEC_VIRT_OVERSIZE(len)) {
1322 int mergeable = ll_new_mergeable(q, req, bio); 1322 int mergeable = ll_new_mergeable(q, req, bio);
1323 1323
1324 if (mergeable) { 1324 if (mergeable) {
1325 if (req->nr_hw_segments == 1) 1325 if (req->nr_hw_segments == 1)
1326 req->bio->bi_hw_front_size = len; 1326 req->bio->bi_hw_front_size = len;
1327 if (bio->bi_hw_segments == 1) 1327 if (bio->bi_hw_segments == 1)
1328 bio->bi_hw_back_size = len; 1328 bio->bi_hw_back_size = len;
1329 } 1329 }
1330 return mergeable; 1330 return mergeable;
1331 } 1331 }
1332 1332
1333 return ll_new_hw_segment(q, req, bio); 1333 return ll_new_hw_segment(q, req, bio);
1334 } 1334 }
1335 1335
1336 static int ll_front_merge_fn(request_queue_t *q, struct request *req, 1336 static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1337 struct bio *bio) 1337 struct bio *bio)
1338 { 1338 {
1339 unsigned short max_sectors; 1339 unsigned short max_sectors;
1340 int len; 1340 int len;
1341 1341
1342 if (unlikely(blk_pc_request(req))) 1342 if (unlikely(blk_pc_request(req)))
1343 max_sectors = q->max_hw_sectors; 1343 max_sectors = q->max_hw_sectors;
1344 else 1344 else
1345 max_sectors = q->max_sectors; 1345 max_sectors = q->max_sectors;
1346 1346
1347 1347
1348 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1348 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1349 req->flags |= REQ_NOMERGE; 1349 req->flags |= REQ_NOMERGE;
1350 if (req == q->last_merge) 1350 if (req == q->last_merge)
1351 q->last_merge = NULL; 1351 q->last_merge = NULL;
1352 return 0; 1352 return 0;
1353 } 1353 }
1354 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; 1354 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
1355 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 1355 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1356 blk_recount_segments(q, bio); 1356 blk_recount_segments(q, bio);
1357 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) 1357 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
1358 blk_recount_segments(q, req->bio); 1358 blk_recount_segments(q, req->bio);
1359 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && 1359 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
1360 !BIOVEC_VIRT_OVERSIZE(len)) { 1360 !BIOVEC_VIRT_OVERSIZE(len)) {
1361 int mergeable = ll_new_mergeable(q, req, bio); 1361 int mergeable = ll_new_mergeable(q, req, bio);
1362 1362
1363 if (mergeable) { 1363 if (mergeable) {
1364 if (bio->bi_hw_segments == 1) 1364 if (bio->bi_hw_segments == 1)
1365 bio->bi_hw_front_size = len; 1365 bio->bi_hw_front_size = len;
1366 if (req->nr_hw_segments == 1) 1366 if (req->nr_hw_segments == 1)
1367 req->biotail->bi_hw_back_size = len; 1367 req->biotail->bi_hw_back_size = len;
1368 } 1368 }
1369 return mergeable; 1369 return mergeable;
1370 } 1370 }
1371 1371
1372 return ll_new_hw_segment(q, req, bio); 1372 return ll_new_hw_segment(q, req, bio);
1373 } 1373 }
1374 1374
1375 static int ll_merge_requests_fn(request_queue_t *q, struct request *req, 1375 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
1376 struct request *next) 1376 struct request *next)
1377 { 1377 {
1378 int total_phys_segments; 1378 int total_phys_segments;
1379 int total_hw_segments; 1379 int total_hw_segments;
1380 1380
1381 /* 1381 /*
1382 * First check if the either of the requests are re-queued 1382 * First check if the either of the requests are re-queued
1383 * requests. Can't merge them if they are. 1383 * requests. Can't merge them if they are.
1384 */ 1384 */
1385 if (req->special || next->special) 1385 if (req->special || next->special)
1386 return 0; 1386 return 0;
1387 1387
1388 /* 1388 /*
1389 * Will it become too large? 1389 * Will it become too large?
1390 */ 1390 */
1391 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) 1391 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
1392 return 0; 1392 return 0;
1393 1393
1394 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 1394 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
1395 if (blk_phys_contig_segment(q, req->biotail, next->bio)) 1395 if (blk_phys_contig_segment(q, req->biotail, next->bio))
1396 total_phys_segments--; 1396 total_phys_segments--;
1397 1397
1398 if (total_phys_segments > q->max_phys_segments) 1398 if (total_phys_segments > q->max_phys_segments)
1399 return 0; 1399 return 0;
1400 1400
1401 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; 1401 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
1402 if (blk_hw_contig_segment(q, req->biotail, next->bio)) { 1402 if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
1403 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; 1403 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
1404 /* 1404 /*
1405 * propagate the combined length to the end of the requests 1405 * propagate the combined length to the end of the requests
1406 */ 1406 */
1407 if (req->nr_hw_segments == 1) 1407 if (req->nr_hw_segments == 1)
1408 req->bio->bi_hw_front_size = len; 1408 req->bio->bi_hw_front_size = len;
1409 if (next->nr_hw_segments == 1) 1409 if (next->nr_hw_segments == 1)
1410 next->biotail->bi_hw_back_size = len; 1410 next->biotail->bi_hw_back_size = len;
1411 total_hw_segments--; 1411 total_hw_segments--;
1412 } 1412 }
1413 1413
1414 if (total_hw_segments > q->max_hw_segments) 1414 if (total_hw_segments > q->max_hw_segments)
1415 return 0; 1415 return 0;
1416 1416
1417 /* Merge is OK... */ 1417 /* Merge is OK... */
1418 req->nr_phys_segments = total_phys_segments; 1418 req->nr_phys_segments = total_phys_segments;
1419 req->nr_hw_segments = total_hw_segments; 1419 req->nr_hw_segments = total_hw_segments;
1420 return 1; 1420 return 1;
1421 } 1421 }
1422 1422
1423 /* 1423 /*
1424 * "plug" the device if there are no outstanding requests: this will 1424 * "plug" the device if there are no outstanding requests: this will
1425 * force the transfer to start only after we have put all the requests 1425 * force the transfer to start only after we have put all the requests
1426 * on the list. 1426 * on the list.
1427 * 1427 *
1428 * This is called with interrupts off and no requests on the queue and 1428 * This is called with interrupts off and no requests on the queue and
1429 * with the queue lock held. 1429 * with the queue lock held.
1430 */ 1430 */
1431 void blk_plug_device(request_queue_t *q) 1431 void blk_plug_device(request_queue_t *q)
1432 { 1432 {
1433 WARN_ON(!irqs_disabled()); 1433 WARN_ON(!irqs_disabled());
1434 1434
1435 /* 1435 /*
1436 * don't plug a stopped queue, it must be paired with blk_start_queue() 1436 * don't plug a stopped queue, it must be paired with blk_start_queue()
1437 * which will restart the queueing 1437 * which will restart the queueing
1438 */ 1438 */
1439 if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) 1439 if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
1440 return; 1440 return;
1441 1441
1442 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) 1442 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1443 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 1443 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1444 } 1444 }
1445 1445
1446 EXPORT_SYMBOL(blk_plug_device); 1446 EXPORT_SYMBOL(blk_plug_device);
1447 1447
1448 /* 1448 /*
1449 * remove the queue from the plugged list, if present. called with 1449 * remove the queue from the plugged list, if present. called with
1450 * queue lock held and interrupts disabled. 1450 * queue lock held and interrupts disabled.
1451 */ 1451 */
1452 int blk_remove_plug(request_queue_t *q) 1452 int blk_remove_plug(request_queue_t *q)
1453 { 1453 {
1454 WARN_ON(!irqs_disabled()); 1454 WARN_ON(!irqs_disabled());
1455 1455
1456 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) 1456 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1457 return 0; 1457 return 0;
1458 1458
1459 del_timer(&q->unplug_timer); 1459 del_timer(&q->unplug_timer);
1460 return 1; 1460 return 1;
1461 } 1461 }
1462 1462
1463 EXPORT_SYMBOL(blk_remove_plug); 1463 EXPORT_SYMBOL(blk_remove_plug);
1464 1464
1465 /* 1465 /*
1466 * remove the plug and let it rip.. 1466 * remove the plug and let it rip..
1467 */ 1467 */
1468 void __generic_unplug_device(request_queue_t *q) 1468 void __generic_unplug_device(request_queue_t *q)
1469 { 1469 {
1470 if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))) 1470 if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)))
1471 return; 1471 return;
1472 1472
1473 if (!blk_remove_plug(q)) 1473 if (!blk_remove_plug(q))
1474 return; 1474 return;
1475 1475
1476 q->request_fn(q); 1476 q->request_fn(q);
1477 } 1477 }
1478 EXPORT_SYMBOL(__generic_unplug_device); 1478 EXPORT_SYMBOL(__generic_unplug_device);
1479 1479
1480 /** 1480 /**
1481 * generic_unplug_device - fire a request queue 1481 * generic_unplug_device - fire a request queue
1482 * @q: The &request_queue_t in question 1482 * @q: The &request_queue_t in question
1483 * 1483 *
1484 * Description: 1484 * Description:
1485 * Linux uses plugging to build bigger requests queues before letting 1485 * Linux uses plugging to build bigger requests queues before letting
1486 * the device have at them. If a queue is plugged, the I/O scheduler 1486 * the device have at them. If a queue is plugged, the I/O scheduler
1487 * is still adding and merging requests on the queue. Once the queue 1487 * is still adding and merging requests on the queue. Once the queue
1488 * gets unplugged, the request_fn defined for the queue is invoked and 1488 * gets unplugged, the request_fn defined for the queue is invoked and
1489 * transfers started. 1489 * transfers started.
1490 **/ 1490 **/
1491 void generic_unplug_device(request_queue_t *q) 1491 void generic_unplug_device(request_queue_t *q)
1492 { 1492 {
1493 spin_lock_irq(q->queue_lock); 1493 spin_lock_irq(q->queue_lock);
1494 __generic_unplug_device(q); 1494 __generic_unplug_device(q);
1495 spin_unlock_irq(q->queue_lock); 1495 spin_unlock_irq(q->queue_lock);
1496 } 1496 }
1497 EXPORT_SYMBOL(generic_unplug_device); 1497 EXPORT_SYMBOL(generic_unplug_device);
1498 1498
1499 static void blk_backing_dev_unplug(struct backing_dev_info *bdi, 1499 static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
1500 struct page *page) 1500 struct page *page)
1501 { 1501 {
1502 request_queue_t *q = bdi->unplug_io_data; 1502 request_queue_t *q = bdi->unplug_io_data;
1503 1503
1504 /* 1504 /*
1505 * devices don't necessarily have an ->unplug_fn defined 1505 * devices don't necessarily have an ->unplug_fn defined
1506 */ 1506 */
1507 if (q->unplug_fn) 1507 if (q->unplug_fn)
1508 q->unplug_fn(q); 1508 q->unplug_fn(q);
1509 } 1509 }
1510 1510
1511 static void blk_unplug_work(void *data) 1511 static void blk_unplug_work(void *data)
1512 { 1512 {
1513 request_queue_t *q = data; 1513 request_queue_t *q = data;
1514 1514
1515 q->unplug_fn(q); 1515 q->unplug_fn(q);
1516 } 1516 }
1517 1517
1518 static void blk_unplug_timeout(unsigned long data) 1518 static void blk_unplug_timeout(unsigned long data)
1519 { 1519 {
1520 request_queue_t *q = (request_queue_t *)data; 1520 request_queue_t *q = (request_queue_t *)data;
1521 1521
1522 kblockd_schedule_work(&q->unplug_work); 1522 kblockd_schedule_work(&q->unplug_work);
1523 } 1523 }
1524 1524
1525 /** 1525 /**
1526 * blk_start_queue - restart a previously stopped queue 1526 * blk_start_queue - restart a previously stopped queue
1527 * @q: The &request_queue_t in question 1527 * @q: The &request_queue_t in question
1528 * 1528 *
1529 * Description: 1529 * Description:
1530 * blk_start_queue() will clear the stop flag on the queue, and call 1530 * blk_start_queue() will clear the stop flag on the queue, and call
1531 * the request_fn for the queue if it was in a stopped state when 1531 * the request_fn for the queue if it was in a stopped state when
1532 * entered. Also see blk_stop_queue(). Queue lock must be held. 1532 * entered. Also see blk_stop_queue(). Queue lock must be held.
1533 **/ 1533 **/
1534 void blk_start_queue(request_queue_t *q) 1534 void blk_start_queue(request_queue_t *q)
1535 { 1535 {
1536 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); 1536 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1537 1537
1538 /* 1538 /*
1539 * one level of recursion is ok and is much faster than kicking 1539 * one level of recursion is ok and is much faster than kicking
1540 * the unplug handling 1540 * the unplug handling
1541 */ 1541 */
1542 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 1542 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1543 q->request_fn(q); 1543 q->request_fn(q);
1544 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); 1544 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1545 } else { 1545 } else {
1546 blk_plug_device(q); 1546 blk_plug_device(q);
1547 kblockd_schedule_work(&q->unplug_work); 1547 kblockd_schedule_work(&q->unplug_work);
1548 } 1548 }
1549 } 1549 }
1550 1550
1551 EXPORT_SYMBOL(blk_start_queue); 1551 EXPORT_SYMBOL(blk_start_queue);
1552 1552
1553 /** 1553 /**
1554 * blk_stop_queue - stop a queue 1554 * blk_stop_queue - stop a queue
1555 * @q: The &request_queue_t in question 1555 * @q: The &request_queue_t in question
1556 * 1556 *
1557 * Description: 1557 * Description:
1558 * The Linux block layer assumes that a block driver will consume all 1558 * The Linux block layer assumes that a block driver will consume all
1559 * entries on the request queue when the request_fn strategy is called. 1559 * entries on the request queue when the request_fn strategy is called.
1560 * Often this will not happen, because of hardware limitations (queue 1560 * Often this will not happen, because of hardware limitations (queue
1561 * depth settings). If a device driver gets a 'queue full' response, 1561 * depth settings). If a device driver gets a 'queue full' response,
1562 * or if it simply chooses not to queue more I/O at one point, it can 1562 * or if it simply chooses not to queue more I/O at one point, it can
1563 * call this function to prevent the request_fn from being called until 1563 * call this function to prevent the request_fn from being called until
1564 * the driver has signalled it's ready to go again. This happens by calling 1564 * the driver has signalled it's ready to go again. This happens by calling
1565 * blk_start_queue() to restart queue operations. Queue lock must be held. 1565 * blk_start_queue() to restart queue operations. Queue lock must be held.
1566 **/ 1566 **/
1567 void blk_stop_queue(request_queue_t *q) 1567 void blk_stop_queue(request_queue_t *q)
1568 { 1568 {
1569 blk_remove_plug(q); 1569 blk_remove_plug(q);
1570 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); 1570 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1571 } 1571 }
1572 EXPORT_SYMBOL(blk_stop_queue); 1572 EXPORT_SYMBOL(blk_stop_queue);
1573 1573
1574 /** 1574 /**
1575 * blk_sync_queue - cancel any pending callbacks on a queue 1575 * blk_sync_queue - cancel any pending callbacks on a queue
1576 * @q: the queue 1576 * @q: the queue
1577 * 1577 *
1578 * Description: 1578 * Description:
1579 * The block layer may perform asynchronous callback activity 1579 * The block layer may perform asynchronous callback activity
1580 * on a queue, such as calling the unplug function after a timeout. 1580 * on a queue, such as calling the unplug function after a timeout.
1581 * A block device may call blk_sync_queue to ensure that any 1581 * A block device may call blk_sync_queue to ensure that any
1582 * such activity is cancelled, thus allowing it to release resources 1582 * such activity is cancelled, thus allowing it to release resources
1583 * the the callbacks might use. The caller must already have made sure 1583 * the the callbacks might use. The caller must already have made sure
1584 * that its ->make_request_fn will not re-add plugging prior to calling 1584 * that its ->make_request_fn will not re-add plugging prior to calling
1585 * this function. 1585 * this function.
1586 * 1586 *
1587 */ 1587 */
1588 void blk_sync_queue(struct request_queue *q) 1588 void blk_sync_queue(struct request_queue *q)
1589 { 1589 {
1590 del_timer_sync(&q->unplug_timer); 1590 del_timer_sync(&q->unplug_timer);
1591 kblockd_flush(); 1591 kblockd_flush();
1592 } 1592 }
1593 EXPORT_SYMBOL(blk_sync_queue); 1593 EXPORT_SYMBOL(blk_sync_queue);
1594 1594
1595 /** 1595 /**
1596 * blk_run_queue - run a single device queue 1596 * blk_run_queue - run a single device queue
1597 * @q: The queue to run 1597 * @q: The queue to run
1598 */ 1598 */
1599 void blk_run_queue(struct request_queue *q) 1599 void blk_run_queue(struct request_queue *q)
1600 { 1600 {
1601 unsigned long flags; 1601 unsigned long flags;
1602 1602
1603 spin_lock_irqsave(q->queue_lock, flags); 1603 spin_lock_irqsave(q->queue_lock, flags);
1604 blk_remove_plug(q); 1604 blk_remove_plug(q);
1605 if (!elv_queue_empty(q)) 1605 if (!elv_queue_empty(q))
1606 q->request_fn(q); 1606 q->request_fn(q);
1607 spin_unlock_irqrestore(q->queue_lock, flags); 1607 spin_unlock_irqrestore(q->queue_lock, flags);
1608 } 1608 }
1609 EXPORT_SYMBOL(blk_run_queue); 1609 EXPORT_SYMBOL(blk_run_queue);
1610 1610
1611 /** 1611 /**
1612 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed 1612 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
1613 * @q: the request queue to be released 1613 * @q: the request queue to be released
1614 * 1614 *
1615 * Description: 1615 * Description:
1616 * blk_cleanup_queue is the pair to blk_init_queue() or 1616 * blk_cleanup_queue is the pair to blk_init_queue() or
1617 * blk_queue_make_request(). It should be called when a request queue is 1617 * blk_queue_make_request(). It should be called when a request queue is
1618 * being released; typically when a block device is being de-registered. 1618 * being released; typically when a block device is being de-registered.
1619 * Currently, its primary task it to free all the &struct request 1619 * Currently, its primary task it to free all the &struct request
1620 * structures that were allocated to the queue and the queue itself. 1620 * structures that were allocated to the queue and the queue itself.
1621 * 1621 *
1622 * Caveat: 1622 * Caveat:
1623 * Hopefully the low level driver will have finished any 1623 * Hopefully the low level driver will have finished any
1624 * outstanding requests first... 1624 * outstanding requests first...
1625 **/ 1625 **/
1626 void blk_cleanup_queue(request_queue_t * q) 1626 void blk_cleanup_queue(request_queue_t * q)
1627 { 1627 {
1628 struct request_list *rl = &q->rq; 1628 struct request_list *rl = &q->rq;
1629 1629
1630 if (!atomic_dec_and_test(&q->refcnt)) 1630 if (!atomic_dec_and_test(&q->refcnt))
1631 return; 1631 return;
1632 1632
1633 if (q->elevator) 1633 if (q->elevator)
1634 elevator_exit(q->elevator); 1634 elevator_exit(q->elevator);
1635 1635
1636 blk_sync_queue(q); 1636 blk_sync_queue(q);
1637 1637
1638 if (rl->rq_pool) 1638 if (rl->rq_pool)
1639 mempool_destroy(rl->rq_pool); 1639 mempool_destroy(rl->rq_pool);
1640 1640
1641 if (q->queue_tags) 1641 if (q->queue_tags)
1642 __blk_queue_free_tags(q); 1642 __blk_queue_free_tags(q);
1643 1643
1644 blk_queue_ordered(q, QUEUE_ORDERED_NONE); 1644 blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1645 1645
1646 kmem_cache_free(requestq_cachep, q); 1646 kmem_cache_free(requestq_cachep, q);
1647 } 1647 }
1648 1648
1649 EXPORT_SYMBOL(blk_cleanup_queue); 1649 EXPORT_SYMBOL(blk_cleanup_queue);
1650 1650
1651 static int blk_init_free_list(request_queue_t *q) 1651 static int blk_init_free_list(request_queue_t *q)
1652 { 1652 {
1653 struct request_list *rl = &q->rq; 1653 struct request_list *rl = &q->rq;
1654 1654
1655 rl->count[READ] = rl->count[WRITE] = 0; 1655 rl->count[READ] = rl->count[WRITE] = 0;
1656 rl->starved[READ] = rl->starved[WRITE] = 0; 1656 rl->starved[READ] = rl->starved[WRITE] = 0;
1657 rl->elvpriv = 0; 1657 rl->elvpriv = 0;
1658 init_waitqueue_head(&rl->wait[READ]); 1658 init_waitqueue_head(&rl->wait[READ]);
1659 init_waitqueue_head(&rl->wait[WRITE]); 1659 init_waitqueue_head(&rl->wait[WRITE]);
1660 1660
1661 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 1661 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1662 mempool_free_slab, request_cachep, q->node); 1662 mempool_free_slab, request_cachep, q->node);
1663 1663
1664 if (!rl->rq_pool) 1664 if (!rl->rq_pool)
1665 return -ENOMEM; 1665 return -ENOMEM;
1666 1666
1667 return 0; 1667 return 0;
1668 } 1668 }
1669 1669
1670 static int __make_request(request_queue_t *, struct bio *); 1670 static int __make_request(request_queue_t *, struct bio *);
1671 1671
1672 request_queue_t *blk_alloc_queue(gfp_t gfp_mask) 1672 request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
1673 { 1673 {
1674 return blk_alloc_queue_node(gfp_mask, -1); 1674 return blk_alloc_queue_node(gfp_mask, -1);
1675 } 1675 }
1676 EXPORT_SYMBOL(blk_alloc_queue); 1676 EXPORT_SYMBOL(blk_alloc_queue);
1677 1677
1678 request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) 1678 request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1679 { 1679 {
1680 request_queue_t *q; 1680 request_queue_t *q;
1681 1681
1682 q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id); 1682 q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);
1683 if (!q) 1683 if (!q)
1684 return NULL; 1684 return NULL;
1685 1685
1686 memset(q, 0, sizeof(*q)); 1686 memset(q, 0, sizeof(*q));
1687 init_timer(&q->unplug_timer); 1687 init_timer(&q->unplug_timer);
1688 atomic_set(&q->refcnt, 1); 1688 atomic_set(&q->refcnt, 1);
1689 1689
1690 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; 1690 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
1691 q->backing_dev_info.unplug_io_data = q; 1691 q->backing_dev_info.unplug_io_data = q;
1692 1692
1693 return q; 1693 return q;
1694 } 1694 }
1695 EXPORT_SYMBOL(blk_alloc_queue_node); 1695 EXPORT_SYMBOL(blk_alloc_queue_node);
1696 1696
1697 /** 1697 /**
1698 * blk_init_queue - prepare a request queue for use with a block device 1698 * blk_init_queue - prepare a request queue for use with a block device
1699 * @rfn: The function to be called to process requests that have been 1699 * @rfn: The function to be called to process requests that have been
1700 * placed on the queue. 1700 * placed on the queue.
1701 * @lock: Request queue spin lock 1701 * @lock: Request queue spin lock
1702 * 1702 *
1703 * Description: 1703 * Description:
1704 * If a block device wishes to use the standard request handling procedures, 1704 * If a block device wishes to use the standard request handling procedures,
1705 * which sorts requests and coalesces adjacent requests, then it must 1705 * which sorts requests and coalesces adjacent requests, then it must
1706 * call blk_init_queue(). The function @rfn will be called when there 1706 * call blk_init_queue(). The function @rfn will be called when there
1707 * are requests on the queue that need to be processed. If the device 1707 * are requests on the queue that need to be processed. If the device
1708 * supports plugging, then @rfn may not be called immediately when requests 1708 * supports plugging, then @rfn may not be called immediately when requests
1709 * are available on the queue, but may be called at some time later instead. 1709 * are available on the queue, but may be called at some time later instead.
1710 * Plugged queues are generally unplugged when a buffer belonging to one 1710 * Plugged queues are generally unplugged when a buffer belonging to one
1711 * of the requests on the queue is needed, or due to memory pressure. 1711 * of the requests on the queue is needed, or due to memory pressure.
1712 * 1712 *
1713 * @rfn is not required, or even expected, to remove all requests off the 1713 * @rfn is not required, or even expected, to remove all requests off the
1714 * queue, but only as many as it can handle at a time. If it does leave 1714 * queue, but only as many as it can handle at a time. If it does leave
1715 * requests on the queue, it is responsible for arranging that the requests 1715 * requests on the queue, it is responsible for arranging that the requests
1716 * get dealt with eventually. 1716 * get dealt with eventually.
1717 * 1717 *
1718 * The queue spin lock must be held while manipulating the requests on the 1718 * The queue spin lock must be held while manipulating the requests on the
1719 * request queue. 1719 * request queue.
1720 * 1720 *
1721 * Function returns a pointer to the initialized request queue, or NULL if 1721 * Function returns a pointer to the initialized request queue, or NULL if
1722 * it didn't succeed. 1722 * it didn't succeed.
1723 * 1723 *
1724 * Note: 1724 * Note:
1725 * blk_init_queue() must be paired with a blk_cleanup_queue() call 1725 * blk_init_queue() must be paired with a blk_cleanup_queue() call
1726 * when the block device is deactivated (such as at module unload). 1726 * when the block device is deactivated (such as at module unload).
1727 **/ 1727 **/
1728 1728
1729 request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) 1729 request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1730 { 1730 {
1731 return blk_init_queue_node(rfn, lock, -1); 1731 return blk_init_queue_node(rfn, lock, -1);
1732 } 1732 }
1733 EXPORT_SYMBOL(blk_init_queue); 1733 EXPORT_SYMBOL(blk_init_queue);
1734 1734
1735 request_queue_t * 1735 request_queue_t *
1736 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 1736 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1737 { 1737 {
1738 request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id); 1738 request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
1739 1739
1740 if (!q) 1740 if (!q)
1741 return NULL; 1741 return NULL;
1742 1742
1743 q->node = node_id; 1743 q->node = node_id;
1744 if (blk_init_free_list(q)) 1744 if (blk_init_free_list(q))
1745 goto out_init; 1745 goto out_init;
1746 1746
1747 /* 1747 /*
1748 * if caller didn't supply a lock, they get per-queue locking with 1748 * if caller didn't supply a lock, they get per-queue locking with
1749 * our embedded lock 1749 * our embedded lock
1750 */ 1750 */
1751 if (!lock) { 1751 if (!lock) {
1752 spin_lock_init(&q->__queue_lock); 1752 spin_lock_init(&q->__queue_lock);
1753 lock = &q->__queue_lock; 1753 lock = &q->__queue_lock;
1754 } 1754 }
1755 1755
1756 q->request_fn = rfn; 1756 q->request_fn = rfn;
1757 q->back_merge_fn = ll_back_merge_fn; 1757 q->back_merge_fn = ll_back_merge_fn;
1758 q->front_merge_fn = ll_front_merge_fn; 1758 q->front_merge_fn = ll_front_merge_fn;
1759 q->merge_requests_fn = ll_merge_requests_fn; 1759 q->merge_requests_fn = ll_merge_requests_fn;
1760 q->prep_rq_fn = NULL; 1760 q->prep_rq_fn = NULL;
1761 q->unplug_fn = generic_unplug_device; 1761 q->unplug_fn = generic_unplug_device;
1762 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); 1762 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
1763 q->queue_lock = lock; 1763 q->queue_lock = lock;
1764 1764
1765 blk_queue_segment_boundary(q, 0xffffffff); 1765 blk_queue_segment_boundary(q, 0xffffffff);
1766 1766
1767 blk_queue_make_request(q, __make_request); 1767 blk_queue_make_request(q, __make_request);
1768 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); 1768 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
1769 1769
1770 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); 1770 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
1771 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); 1771 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
1772 1772
1773 /* 1773 /*
1774 * all done 1774 * all done
1775 */ 1775 */
1776 if (!elevator_init(q, NULL)) { 1776 if (!elevator_init(q, NULL)) {
1777 blk_queue_congestion_threshold(q); 1777 blk_queue_congestion_threshold(q);
1778 return q; 1778 return q;
1779 } 1779 }
1780 1780
1781 blk_cleanup_queue(q); 1781 blk_cleanup_queue(q);
1782 out_init: 1782 out_init:
1783 kmem_cache_free(requestq_cachep, q); 1783 kmem_cache_free(requestq_cachep, q);
1784 return NULL; 1784 return NULL;
1785 } 1785 }
1786 EXPORT_SYMBOL(blk_init_queue_node); 1786 EXPORT_SYMBOL(blk_init_queue_node);
1787 1787
1788 int blk_get_queue(request_queue_t *q) 1788 int blk_get_queue(request_queue_t *q)
1789 { 1789 {
1790 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { 1790 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
1791 atomic_inc(&q->refcnt); 1791 atomic_inc(&q->refcnt);
1792 return 0; 1792 return 0;
1793 } 1793 }
1794 1794
1795 return 1; 1795 return 1;
1796 } 1796 }
1797 1797
1798 EXPORT_SYMBOL(blk_get_queue); 1798 EXPORT_SYMBOL(blk_get_queue);
1799 1799
1800 static inline void blk_free_request(request_queue_t *q, struct request *rq) 1800 static inline void blk_free_request(request_queue_t *q, struct request *rq)
1801 { 1801 {
1802 if (rq->flags & REQ_ELVPRIV) 1802 if (rq->flags & REQ_ELVPRIV)
1803 elv_put_request(q, rq); 1803 elv_put_request(q, rq);
1804 mempool_free(rq, q->rq.rq_pool); 1804 mempool_free(rq, q->rq.rq_pool);
1805 } 1805 }
1806 1806
1807 static inline struct request * 1807 static inline struct request *
1808 blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, 1808 blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
1809 int priv, gfp_t gfp_mask) 1809 int priv, gfp_t gfp_mask)
1810 { 1810 {
1811 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 1811 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1812 1812
1813 if (!rq) 1813 if (!rq)
1814 return NULL; 1814 return NULL;
1815 1815
1816 /* 1816 /*
1817 * first three bits are identical in rq->flags and bio->bi_rw, 1817 * first three bits are identical in rq->flags and bio->bi_rw,
1818 * see bio.h and blkdev.h 1818 * see bio.h and blkdev.h
1819 */ 1819 */
1820 rq->flags = rw; 1820 rq->flags = rw;
1821 1821
1822 if (priv) { 1822 if (priv) {
1823 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { 1823 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
1824 mempool_free(rq, q->rq.rq_pool); 1824 mempool_free(rq, q->rq.rq_pool);
1825 return NULL; 1825 return NULL;
1826 } 1826 }
1827 rq->flags |= REQ_ELVPRIV; 1827 rq->flags |= REQ_ELVPRIV;
1828 } 1828 }
1829 1829
1830 return rq; 1830 return rq;
1831 } 1831 }
1832 1832
1833 /* 1833 /*
1834 * ioc_batching returns true if the ioc is a valid batching request and 1834 * ioc_batching returns true if the ioc is a valid batching request and
1835 * should be given priority access to a request. 1835 * should be given priority access to a request.
1836 */ 1836 */
1837 static inline int ioc_batching(request_queue_t *q, struct io_context *ioc) 1837 static inline int ioc_batching(request_queue_t *q, struct io_context *ioc)
1838 { 1838 {
1839 if (!ioc) 1839 if (!ioc)
1840 return 0; 1840 return 0;
1841 1841
1842 /* 1842 /*
1843 * Make sure the process is able to allocate at least 1 request 1843 * Make sure the process is able to allocate at least 1 request
1844 * even if the batch times out, otherwise we could theoretically 1844 * even if the batch times out, otherwise we could theoretically
1845 * lose wakeups. 1845 * lose wakeups.
1846 */ 1846 */
1847 return ioc->nr_batch_requests == q->nr_batching || 1847 return ioc->nr_batch_requests == q->nr_batching ||
1848 (ioc->nr_batch_requests > 0 1848 (ioc->nr_batch_requests > 0
1849 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); 1849 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
1850 } 1850 }
1851 1851
1852 /* 1852 /*
1853 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This 1853 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
1854 * will cause the process to be a "batcher" on all queues in the system. This 1854 * will cause the process to be a "batcher" on all queues in the system. This
1855 * is the behaviour we want though - once it gets a wakeup it should be given 1855 * is the behaviour we want though - once it gets a wakeup it should be given
1856 * a nice run. 1856 * a nice run.
1857 */ 1857 */
1858 static void ioc_set_batching(request_queue_t *q, struct io_context *ioc) 1858 static void ioc_set_batching(request_queue_t *q, struct io_context *ioc)
1859 { 1859 {
1860 if (!ioc || ioc_batching(q, ioc)) 1860 if (!ioc || ioc_batching(q, ioc))
1861 return; 1861 return;
1862 1862
1863 ioc->nr_batch_requests = q->nr_batching; 1863 ioc->nr_batch_requests = q->nr_batching;
1864 ioc->last_waited = jiffies; 1864 ioc->last_waited = jiffies;
1865 } 1865 }
1866 1866
1867 static void __freed_request(request_queue_t *q, int rw) 1867 static void __freed_request(request_queue_t *q, int rw)
1868 { 1868 {
1869 struct request_list *rl = &q->rq; 1869 struct request_list *rl = &q->rq;
1870 1870
1871 if (rl->count[rw] < queue_congestion_off_threshold(q)) 1871 if (rl->count[rw] < queue_congestion_off_threshold(q))
1872 clear_queue_congested(q, rw); 1872 clear_queue_congested(q, rw);
1873 1873
1874 if (rl->count[rw] + 1 <= q->nr_requests) { 1874 if (rl->count[rw] + 1 <= q->nr_requests) {
1875 if (waitqueue_active(&rl->wait[rw])) 1875 if (waitqueue_active(&rl->wait[rw]))
1876 wake_up(&rl->wait[rw]); 1876 wake_up(&rl->wait[rw]);
1877 1877
1878 blk_clear_queue_full(q, rw); 1878 blk_clear_queue_full(q, rw);
1879 } 1879 }
1880 } 1880 }
1881 1881
1882 /* 1882 /*
1883 * A request has just been released. Account for it, update the full and 1883 * A request has just been released. Account for it, update the full and
1884 * congestion status, wake up any waiters. Called under q->queue_lock. 1884 * congestion status, wake up any waiters. Called under q->queue_lock.
1885 */ 1885 */
1886 static void freed_request(request_queue_t *q, int rw, int priv) 1886 static void freed_request(request_queue_t *q, int rw, int priv)
1887 { 1887 {
1888 struct request_list *rl = &q->rq; 1888 struct request_list *rl = &q->rq;
1889 1889
1890 rl->count[rw]--; 1890 rl->count[rw]--;
1891 if (priv) 1891 if (priv)
1892 rl->elvpriv--; 1892 rl->elvpriv--;
1893 1893
1894 __freed_request(q, rw); 1894 __freed_request(q, rw);
1895 1895
1896 if (unlikely(rl->starved[rw ^ 1])) 1896 if (unlikely(rl->starved[rw ^ 1]))
1897 __freed_request(q, rw ^ 1); 1897 __freed_request(q, rw ^ 1);
1898 } 1898 }
1899 1899
1900 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) 1900 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
1901 /* 1901 /*
1902 * Get a free request, queue_lock must be held. 1902 * Get a free request, queue_lock must be held.
1903 * Returns NULL on failure, with queue_lock held. 1903 * Returns NULL on failure, with queue_lock held.
1904 * Returns !NULL on success, with queue_lock *not held*. 1904 * Returns !NULL on success, with queue_lock *not held*.
1905 */ 1905 */
1906 static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, 1906 static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
1907 gfp_t gfp_mask) 1907 gfp_t gfp_mask)
1908 { 1908 {
1909 struct request *rq = NULL; 1909 struct request *rq = NULL;
1910 struct request_list *rl = &q->rq; 1910 struct request_list *rl = &q->rq;
1911 struct io_context *ioc = NULL; 1911 struct io_context *ioc = NULL;
1912 int may_queue, priv; 1912 int may_queue, priv;
1913 1913
1914 may_queue = elv_may_queue(q, rw, bio); 1914 may_queue = elv_may_queue(q, rw, bio);
1915 if (may_queue == ELV_MQUEUE_NO) 1915 if (may_queue == ELV_MQUEUE_NO)
1916 goto rq_starved; 1916 goto rq_starved;
1917 1917
1918 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { 1918 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
1919 if (rl->count[rw]+1 >= q->nr_requests) { 1919 if (rl->count[rw]+1 >= q->nr_requests) {
1920 ioc = current_io_context(GFP_ATOMIC); 1920 ioc = current_io_context(GFP_ATOMIC);
1921 /* 1921 /*
1922 * The queue will fill after this allocation, so set 1922 * The queue will fill after this allocation, so set
1923 * it as full, and mark this process as "batching". 1923 * it as full, and mark this process as "batching".
1924 * This process will be allowed to complete a batch of 1924 * This process will be allowed to complete a batch of
1925 * requests, others will be blocked. 1925 * requests, others will be blocked.
1926 */ 1926 */
1927 if (!blk_queue_full(q, rw)) { 1927 if (!blk_queue_full(q, rw)) {
1928 ioc_set_batching(q, ioc); 1928 ioc_set_batching(q, ioc);
1929 blk_set_queue_full(q, rw); 1929 blk_set_queue_full(q, rw);
1930 } else { 1930 } else {
1931 if (may_queue != ELV_MQUEUE_MUST 1931 if (may_queue != ELV_MQUEUE_MUST
1932 && !ioc_batching(q, ioc)) { 1932 && !ioc_batching(q, ioc)) {
1933 /* 1933 /*
1934 * The queue is full and the allocating 1934 * The queue is full and the allocating
1935 * process is not a "batcher", and not 1935 * process is not a "batcher", and not
1936 * exempted by the IO scheduler 1936 * exempted by the IO scheduler
1937 */ 1937 */
1938 goto out; 1938 goto out;
1939 } 1939 }
1940 } 1940 }
1941 } 1941 }
1942 set_queue_congested(q, rw); 1942 set_queue_congested(q, rw);
1943 } 1943 }
1944 1944
1945 /* 1945 /*
1946 * Only allow batching queuers to allocate up to 50% over the defined 1946 * Only allow batching queuers to allocate up to 50% over the defined
1947 * limit of requests, otherwise we could have thousands of requests 1947 * limit of requests, otherwise we could have thousands of requests
1948 * allocated with any setting of ->nr_requests 1948 * allocated with any setting of ->nr_requests
1949 */ 1949 */
1950 if (rl->count[rw] >= (3 * q->nr_requests / 2)) 1950 if (rl->count[rw] >= (3 * q->nr_requests / 2))
1951 goto out; 1951 goto out;
1952 1952
1953 rl->count[rw]++; 1953 rl->count[rw]++;
1954 rl->starved[rw] = 0; 1954 rl->starved[rw] = 0;
1955 1955
1956 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1956 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1957 if (priv) 1957 if (priv)
1958 rl->elvpriv++; 1958 rl->elvpriv++;
1959 1959
1960 spin_unlock_irq(q->queue_lock); 1960 spin_unlock_irq(q->queue_lock);
1961 1961
1962 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); 1962 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
1963 if (unlikely(!rq)) { 1963 if (unlikely(!rq)) {
1964 /* 1964 /*
1965 * Allocation failed presumably due to memory. Undo anything 1965 * Allocation failed presumably due to memory. Undo anything
1966 * we might have messed up. 1966 * we might have messed up.
1967 * 1967 *
1968 * Allocating task should really be put onto the front of the 1968 * Allocating task should really be put onto the front of the
1969 * wait queue, but this is pretty rare. 1969 * wait queue, but this is pretty rare.
1970 */ 1970 */
1971 spin_lock_irq(q->queue_lock); 1971 spin_lock_irq(q->queue_lock);
1972 freed_request(q, rw, priv); 1972 freed_request(q, rw, priv);
1973 1973
1974 /* 1974 /*
1975 * in the very unlikely event that allocation failed and no 1975 * in the very unlikely event that allocation failed and no
1976 * requests for this direction was pending, mark us starved 1976 * requests for this direction was pending, mark us starved
1977 * so that freeing of a request in the other direction will 1977 * so that freeing of a request in the other direction will
1978 * notice us. another possible fix would be to split the 1978 * notice us. another possible fix would be to split the
1979 * rq mempool into READ and WRITE 1979 * rq mempool into READ and WRITE
1980 */ 1980 */
1981 rq_starved: 1981 rq_starved:
1982 if (unlikely(rl->count[rw] == 0)) 1982 if (unlikely(rl->count[rw] == 0))
1983 rl->starved[rw] = 1; 1983 rl->starved[rw] = 1;
1984 1984
1985 goto out; 1985 goto out;
1986 } 1986 }
1987 1987
1988 /* 1988 /*
1989 * ioc may be NULL here, and ioc_batching will be false. That's 1989 * ioc may be NULL here, and ioc_batching will be false. That's
1990 * OK, if the queue is under the request limit then requests need 1990 * OK, if the queue is under the request limit then requests need
1991 * not count toward the nr_batch_requests limit. There will always 1991 * not count toward the nr_batch_requests limit. There will always
1992 * be some limit enforced by BLK_BATCH_TIME. 1992 * be some limit enforced by BLK_BATCH_TIME.
1993 */ 1993 */
1994 if (ioc_batching(q, ioc)) 1994 if (ioc_batching(q, ioc))
1995 ioc->nr_batch_requests--; 1995 ioc->nr_batch_requests--;
1996 1996
1997 rq_init(q, rq); 1997 rq_init(q, rq);
1998 rq->rl = rl; 1998 rq->rl = rl;
1999 out: 1999 out:
2000 return rq; 2000 return rq;
2001 } 2001 }
2002 2002
2003 /* 2003 /*
2004 * No available requests for this queue, unplug the device and wait for some 2004 * No available requests for this queue, unplug the device and wait for some
2005 * requests to become available. 2005 * requests to become available.
2006 * 2006 *
2007 * Called with q->queue_lock held, and returns with it unlocked. 2007 * Called with q->queue_lock held, and returns with it unlocked.
2008 */ 2008 */
2009 static struct request *get_request_wait(request_queue_t *q, int rw, 2009 static struct request *get_request_wait(request_queue_t *q, int rw,
2010 struct bio *bio) 2010 struct bio *bio)
2011 { 2011 {
2012 struct request *rq; 2012 struct request *rq;
2013 2013
2014 rq = get_request(q, rw, bio, GFP_NOIO); 2014 rq = get_request(q, rw, bio, GFP_NOIO);
2015 while (!rq) { 2015 while (!rq) {
2016 DEFINE_WAIT(wait); 2016 DEFINE_WAIT(wait);
2017 struct request_list *rl = &q->rq; 2017 struct request_list *rl = &q->rq;
2018 2018
2019 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 2019 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
2020 TASK_UNINTERRUPTIBLE); 2020 TASK_UNINTERRUPTIBLE);
2021 2021
2022 rq = get_request(q, rw, bio, GFP_NOIO); 2022 rq = get_request(q, rw, bio, GFP_NOIO);
2023 2023
2024 if (!rq) { 2024 if (!rq) {
2025 struct io_context *ioc; 2025 struct io_context *ioc;
2026 2026
2027 __generic_unplug_device(q); 2027 __generic_unplug_device(q);
2028 spin_unlock_irq(q->queue_lock); 2028 spin_unlock_irq(q->queue_lock);
2029 io_schedule(); 2029 io_schedule();
2030 2030
2031 /* 2031 /*
2032 * After sleeping, we become a "batching" process and 2032 * After sleeping, we become a "batching" process and
2033 * will be able to allocate at least one request, and 2033 * will be able to allocate at least one request, and
2034 * up to a big batch of them for a small period time. 2034 * up to a big batch of them for a small period time.
2035 * See ioc_batching, ioc_set_batching 2035 * See ioc_batching, ioc_set_batching
2036 */ 2036 */
2037 ioc = current_io_context(GFP_NOIO); 2037 ioc = current_io_context(GFP_NOIO);
2038 ioc_set_batching(q, ioc); 2038 ioc_set_batching(q, ioc);
2039 2039
2040 spin_lock_irq(q->queue_lock); 2040 spin_lock_irq(q->queue_lock);
2041 } 2041 }
2042 finish_wait(&rl->wait[rw], &wait); 2042 finish_wait(&rl->wait[rw], &wait);
2043 } 2043 }
2044 2044
2045 return rq; 2045 return rq;
2046 } 2046 }
2047 2047
2048 struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) 2048 struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
2049 { 2049 {
2050 struct request *rq; 2050 struct request *rq;
2051 2051
2052 BUG_ON(rw != READ && rw != WRITE); 2052 BUG_ON(rw != READ && rw != WRITE);
2053 2053
2054 spin_lock_irq(q->queue_lock); 2054 spin_lock_irq(q->queue_lock);
2055 if (gfp_mask & __GFP_WAIT) { 2055 if (gfp_mask & __GFP_WAIT) {
2056 rq = get_request_wait(q, rw, NULL); 2056 rq = get_request_wait(q, rw, NULL);
2057 } else { 2057 } else {
2058 rq = get_request(q, rw, NULL, gfp_mask); 2058 rq = get_request(q, rw, NULL, gfp_mask);
2059 if (!rq) 2059 if (!rq)
2060 spin_unlock_irq(q->queue_lock); 2060 spin_unlock_irq(q->queue_lock);
2061 } 2061 }
2062 /* q->queue_lock is unlocked at this point */ 2062 /* q->queue_lock is unlocked at this point */
2063 2063
2064 return rq; 2064 return rq;
2065 } 2065 }
2066 EXPORT_SYMBOL(blk_get_request); 2066 EXPORT_SYMBOL(blk_get_request);
2067 2067
2068 /** 2068 /**
2069 * blk_requeue_request - put a request back on queue 2069 * blk_requeue_request - put a request back on queue
2070 * @q: request queue where request should be inserted 2070 * @q: request queue where request should be inserted
2071 * @rq: request to be inserted 2071 * @rq: request to be inserted
2072 * 2072 *
2073 * Description: 2073 * Description:
2074 * Drivers often keep queueing requests until the hardware cannot accept 2074 * Drivers often keep queueing requests until the hardware cannot accept
2075 * more, when that condition happens we need to put the request back 2075 * more, when that condition happens we need to put the request back
2076 * on the queue. Must be called with queue lock held. 2076 * on the queue. Must be called with queue lock held.
2077 */ 2077 */
2078 void blk_requeue_request(request_queue_t *q, struct request *rq) 2078 void blk_requeue_request(request_queue_t *q, struct request *rq)
2079 { 2079 {
2080 if (blk_rq_tagged(rq)) 2080 if (blk_rq_tagged(rq))
2081 blk_queue_end_tag(q, rq); 2081 blk_queue_end_tag(q, rq);
2082 2082
2083 elv_requeue_request(q, rq); 2083 elv_requeue_request(q, rq);
2084 } 2084 }
2085 2085
2086 EXPORT_SYMBOL(blk_requeue_request); 2086 EXPORT_SYMBOL(blk_requeue_request);
2087 2087
2088 /** 2088 /**
2089 * blk_insert_request - insert a special request in to a request queue 2089 * blk_insert_request - insert a special request in to a request queue
2090 * @q: request queue where request should be inserted 2090 * @q: request queue where request should be inserted
2091 * @rq: request to be inserted 2091 * @rq: request to be inserted
2092 * @at_head: insert request at head or tail of queue 2092 * @at_head: insert request at head or tail of queue
2093 * @data: private data 2093 * @data: private data
2094 * 2094 *
2095 * Description: 2095 * Description:
2096 * Many block devices need to execute commands asynchronously, so they don't 2096 * Many block devices need to execute commands asynchronously, so they don't
2097 * block the whole kernel from preemption during request execution. This is 2097 * block the whole kernel from preemption during request execution. This is
2098 * accomplished normally by inserting aritficial requests tagged as 2098 * accomplished normally by inserting aritficial requests tagged as
2099 * REQ_SPECIAL in to the corresponding request queue, and letting them be 2099 * REQ_SPECIAL in to the corresponding request queue, and letting them be
2100 * scheduled for actual execution by the request queue. 2100 * scheduled for actual execution by the request queue.
2101 * 2101 *
2102 * We have the option of inserting the head or the tail of the queue. 2102 * We have the option of inserting the head or the tail of the queue.
2103 * Typically we use the tail for new ioctls and so forth. We use the head 2103 * Typically we use the tail for new ioctls and so forth. We use the head
2104 * of the queue for things like a QUEUE_FULL message from a device, or a 2104 * of the queue for things like a QUEUE_FULL message from a device, or a
2105 * host that is unable to accept a particular command. 2105 * host that is unable to accept a particular command.
2106 */ 2106 */
2107 void blk_insert_request(request_queue_t *q, struct request *rq, 2107 void blk_insert_request(request_queue_t *q, struct request *rq,
2108 int at_head, void *data) 2108 int at_head, void *data)
2109 { 2109 {
2110 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2110 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2111 unsigned long flags; 2111 unsigned long flags;
2112 2112
2113 /* 2113 /*
2114 * tell I/O scheduler that this isn't a regular read/write (ie it 2114 * tell I/O scheduler that this isn't a regular read/write (ie it
2115 * must not attempt merges on this) and that it acts as a soft 2115 * must not attempt merges on this) and that it acts as a soft
2116 * barrier 2116 * barrier
2117 */ 2117 */
2118 rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER; 2118 rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
2119 2119
2120 rq->special = data; 2120 rq->special = data;
2121 2121
2122 spin_lock_irqsave(q->queue_lock, flags); 2122 spin_lock_irqsave(q->queue_lock, flags);
2123 2123
2124 /* 2124 /*
2125 * If command is tagged, release the tag 2125 * If command is tagged, release the tag
2126 */ 2126 */
2127 if (blk_rq_tagged(rq)) 2127 if (blk_rq_tagged(rq))
2128 blk_queue_end_tag(q, rq); 2128 blk_queue_end_tag(q, rq);
2129 2129
2130 drive_stat_acct(rq, rq->nr_sectors, 1); 2130 drive_stat_acct(rq, rq->nr_sectors, 1);
2131 __elv_add_request(q, rq, where, 0); 2131 __elv_add_request(q, rq, where, 0);
2132 2132
2133 if (blk_queue_plugged(q)) 2133 if (blk_queue_plugged(q))
2134 __generic_unplug_device(q); 2134 __generic_unplug_device(q);
2135 else 2135 else
2136 q->request_fn(q); 2136 q->request_fn(q);
2137 spin_unlock_irqrestore(q->queue_lock, flags); 2137 spin_unlock_irqrestore(q->queue_lock, flags);
2138 } 2138 }
2139 2139
2140 EXPORT_SYMBOL(blk_insert_request); 2140 EXPORT_SYMBOL(blk_insert_request);
2141 2141
2142 /** 2142 /**
2143 * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage 2143 * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
2144 * @q: request queue where request should be inserted 2144 * @q: request queue where request should be inserted
2145 * @rq: request structure to fill 2145 * @rq: request structure to fill
2146 * @ubuf: the user buffer 2146 * @ubuf: the user buffer
2147 * @len: length of user data 2147 * @len: length of user data
2148 * 2148 *
2149 * Description: 2149 * Description:
2150 * Data will be mapped directly for zero copy io, if possible. Otherwise 2150 * Data will be mapped directly for zero copy io, if possible. Otherwise
2151 * a kernel bounce buffer is used. 2151 * a kernel bounce buffer is used.
2152 * 2152 *
2153 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2153 * A matching blk_rq_unmap_user() must be issued at the end of io, while
2154 * still in process context. 2154 * still in process context.
2155 * 2155 *
2156 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2156 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
2157 * before being submitted to the device, as pages mapped may be out of 2157 * before being submitted to the device, as pages mapped may be out of
2158 * reach. It's the callers responsibility to make sure this happens. The 2158 * reach. It's the callers responsibility to make sure this happens. The
2159 * original bio must be passed back in to blk_rq_unmap_user() for proper 2159 * original bio must be passed back in to blk_rq_unmap_user() for proper
2160 * unmapping. 2160 * unmapping.
2161 */ 2161 */
2162 int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, 2162 int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
2163 unsigned int len) 2163 unsigned int len)
2164 { 2164 {
2165 unsigned long uaddr; 2165 unsigned long uaddr;
2166 struct bio *bio; 2166 struct bio *bio;
2167 int reading; 2167 int reading;
2168 2168
2169 if (len > (q->max_hw_sectors << 9)) 2169 if (len > (q->max_hw_sectors << 9))
2170 return -EINVAL; 2170 return -EINVAL;
2171 if (!len || !ubuf) 2171 if (!len || !ubuf)
2172 return -EINVAL; 2172 return -EINVAL;
2173 2173
2174 reading = rq_data_dir(rq) == READ; 2174 reading = rq_data_dir(rq) == READ;
2175 2175
2176 /* 2176 /*
2177 * if alignment requirement is satisfied, map in user pages for 2177 * if alignment requirement is satisfied, map in user pages for
2178 * direct dma. else, set up kernel bounce buffers 2178 * direct dma. else, set up kernel bounce buffers
2179 */ 2179 */
2180 uaddr = (unsigned long) ubuf; 2180 uaddr = (unsigned long) ubuf;
2181 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) 2181 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
2182 bio = bio_map_user(q, NULL, uaddr, len, reading); 2182 bio = bio_map_user(q, NULL, uaddr, len, reading);
2183 else 2183 else
2184 bio = bio_copy_user(q, uaddr, len, reading); 2184 bio = bio_copy_user(q, uaddr, len, reading);
2185 2185
2186 if (!IS_ERR(bio)) { 2186 if (!IS_ERR(bio)) {
2187 rq->bio = rq->biotail = bio; 2187 rq->bio = rq->biotail = bio;
2188 blk_rq_bio_prep(q, rq, bio); 2188 blk_rq_bio_prep(q, rq, bio);
2189 2189
2190 rq->buffer = rq->data = NULL; 2190 rq->buffer = rq->data = NULL;
2191 rq->data_len = len; 2191 rq->data_len = len;
2192 return 0; 2192 return 0;
2193 } 2193 }
2194 2194
2195 /* 2195 /*
2196 * bio is the err-ptr 2196 * bio is the err-ptr
2197 */ 2197 */
2198 return PTR_ERR(bio); 2198 return PTR_ERR(bio);
2199 } 2199 }
2200 2200
2201 EXPORT_SYMBOL(blk_rq_map_user); 2201 EXPORT_SYMBOL(blk_rq_map_user);
2202 2202
2203 /** 2203 /**
2204 * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage 2204 * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
2205 * @q: request queue where request should be inserted 2205 * @q: request queue where request should be inserted
2206 * @rq: request to map data to 2206 * @rq: request to map data to
2207 * @iov: pointer to the iovec 2207 * @iov: pointer to the iovec
2208 * @iov_count: number of elements in the iovec 2208 * @iov_count: number of elements in the iovec
2209 * 2209 *
2210 * Description: 2210 * Description:
2211 * Data will be mapped directly for zero copy io, if possible. Otherwise 2211 * Data will be mapped directly for zero copy io, if possible. Otherwise
2212 * a kernel bounce buffer is used. 2212 * a kernel bounce buffer is used.
2213 * 2213 *
2214 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2214 * A matching blk_rq_unmap_user() must be issued at the end of io, while
2215 * still in process context. 2215 * still in process context.
2216 * 2216 *
2217 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2217 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
2218 * before being submitted to the device, as pages mapped may be out of 2218 * before being submitted to the device, as pages mapped may be out of
2219 * reach. It's the callers responsibility to make sure this happens. The 2219 * reach. It's the callers responsibility to make sure this happens. The
2220 * original bio must be passed back in to blk_rq_unmap_user() for proper 2220 * original bio must be passed back in to blk_rq_unmap_user() for proper
2221 * unmapping. 2221 * unmapping.
2222 */ 2222 */
2223 int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, 2223 int blk_rq_map_user_iov(request_queue_t *q, struct request *rq,
2224 struct sg_iovec *iov, int iov_count) 2224 struct sg_iovec *iov, int iov_count)
2225 { 2225 {
2226 struct bio *bio; 2226 struct bio *bio;
2227 2227
2228 if (!iov || iov_count <= 0) 2228 if (!iov || iov_count <= 0)
2229 return -EINVAL; 2229 return -EINVAL;
2230 2230
2231 /* we don't allow misaligned data like bio_map_user() does. If the 2231 /* we don't allow misaligned data like bio_map_user() does. If the
2232 * user is using sg, they're expected to know the alignment constraints 2232 * user is using sg, they're expected to know the alignment constraints
2233 * and respect them accordingly */ 2233 * and respect them accordingly */
2234 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); 2234 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);
2235 if (IS_ERR(bio)) 2235 if (IS_ERR(bio))
2236 return PTR_ERR(bio); 2236 return PTR_ERR(bio);
2237 2237
2238 rq->bio = rq->biotail = bio; 2238 rq->bio = rq->biotail = bio;
2239 blk_rq_bio_prep(q, rq, bio); 2239 blk_rq_bio_prep(q, rq, bio);
2240 rq->buffer = rq->data = NULL; 2240 rq->buffer = rq->data = NULL;
2241 rq->data_len = bio->bi_size; 2241 rq->data_len = bio->bi_size;
2242 return 0; 2242 return 0;
2243 } 2243 }
2244 2244
2245 EXPORT_SYMBOL(blk_rq_map_user_iov); 2245 EXPORT_SYMBOL(blk_rq_map_user_iov);
2246 2246
2247 /** 2247 /**
2248 * blk_rq_unmap_user - unmap a request with user data 2248 * blk_rq_unmap_user - unmap a request with user data
2249 * @bio: bio to be unmapped 2249 * @bio: bio to be unmapped
2250 * @ulen: length of user buffer 2250 * @ulen: length of user buffer
2251 * 2251 *
2252 * Description: 2252 * Description:
2253 * Unmap a bio previously mapped by blk_rq_map_user(). 2253 * Unmap a bio previously mapped by blk_rq_map_user().
2254 */ 2254 */
2255 int blk_rq_unmap_user(struct bio *bio, unsigned int ulen) 2255 int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)
2256 { 2256 {
2257 int ret = 0; 2257 int ret = 0;
2258 2258
2259 if (bio) { 2259 if (bio) {
2260 if (bio_flagged(bio, BIO_USER_MAPPED)) 2260 if (bio_flagged(bio, BIO_USER_MAPPED))
2261 bio_unmap_user(bio); 2261 bio_unmap_user(bio);
2262 else 2262 else
2263 ret = bio_uncopy_user(bio); 2263 ret = bio_uncopy_user(bio);
2264 } 2264 }
2265 2265
2266 return 0; 2266 return 0;
2267 } 2267 }
2268 2268
2269 EXPORT_SYMBOL(blk_rq_unmap_user); 2269 EXPORT_SYMBOL(blk_rq_unmap_user);
2270 2270
2271 /** 2271 /**
2272 * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage 2272 * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
2273 * @q: request queue where request should be inserted 2273 * @q: request queue where request should be inserted
2274 * @rq: request to fill 2274 * @rq: request to fill
2275 * @kbuf: the kernel buffer 2275 * @kbuf: the kernel buffer
2276 * @len: length of user data 2276 * @len: length of user data
2277 * @gfp_mask: memory allocation flags 2277 * @gfp_mask: memory allocation flags
2278 */ 2278 */
2279 int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, 2279 int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
2280 unsigned int len, gfp_t gfp_mask) 2280 unsigned int len, gfp_t gfp_mask)
2281 { 2281 {
2282 struct bio *bio; 2282 struct bio *bio;
2283 2283
2284 if (len > (q->max_hw_sectors << 9)) 2284 if (len > (q->max_hw_sectors << 9))
2285 return -EINVAL; 2285 return -EINVAL;
2286 if (!len || !kbuf) 2286 if (!len || !kbuf)
2287 return -EINVAL; 2287 return -EINVAL;
2288 2288
2289 bio = bio_map_kern(q, kbuf, len, gfp_mask); 2289 bio = bio_map_kern(q, kbuf, len, gfp_mask);
2290 if (IS_ERR(bio)) 2290 if (IS_ERR(bio))
2291 return PTR_ERR(bio); 2291 return PTR_ERR(bio);
2292 2292
2293 if (rq_data_dir(rq) == WRITE) 2293 if (rq_data_dir(rq) == WRITE)
2294 bio->bi_rw |= (1 << BIO_RW); 2294 bio->bi_rw |= (1 << BIO_RW);
2295 2295
2296 rq->bio = rq->biotail = bio; 2296 rq->bio = rq->biotail = bio;
2297 blk_rq_bio_prep(q, rq, bio); 2297 blk_rq_bio_prep(q, rq, bio);
2298 2298
2299 rq->buffer = rq->data = NULL; 2299 rq->buffer = rq->data = NULL;
2300 rq->data_len = len; 2300 rq->data_len = len;
2301 return 0; 2301 return 0;
2302 } 2302 }
2303 2303
2304 EXPORT_SYMBOL(blk_rq_map_kern); 2304 EXPORT_SYMBOL(blk_rq_map_kern);
2305 2305
2306 /** 2306 /**
2307 * blk_execute_rq_nowait - insert a request into queue for execution 2307 * blk_execute_rq_nowait - insert a request into queue for execution
2308 * @q: queue to insert the request in 2308 * @q: queue to insert the request in
2309 * @bd_disk: matching gendisk 2309 * @bd_disk: matching gendisk
2310 * @rq: request to insert 2310 * @rq: request to insert
2311 * @at_head: insert request at head or tail of queue 2311 * @at_head: insert request at head or tail of queue
2312 * @done: I/O completion handler 2312 * @done: I/O completion handler
2313 * 2313 *
2314 * Description: 2314 * Description:
2315 * Insert a fully prepared request at the back of the io scheduler queue 2315 * Insert a fully prepared request at the back of the io scheduler queue
2316 * for execution. Don't wait for completion. 2316 * for execution. Don't wait for completion.
2317 */ 2317 */
2318 void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, 2318 void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2319 struct request *rq, int at_head, 2319 struct request *rq, int at_head,
2320 void (*done)(struct request *)) 2320 void (*done)(struct request *))
2321 { 2321 {
2322 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2322 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2323 2323
2324 rq->rq_disk = bd_disk; 2324 rq->rq_disk = bd_disk;
2325 rq->flags |= REQ_NOMERGE; 2325 rq->flags |= REQ_NOMERGE;
2326 rq->end_io = done; 2326 rq->end_io = done;
2327 elv_add_request(q, rq, where, 1); 2327 elv_add_request(q, rq, where, 1);
2328 generic_unplug_device(q); 2328 generic_unplug_device(q);
2329 } 2329 }
2330 2330
2331 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); 2331 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2332 2332
2333 /** 2333 /**
2334 * blk_execute_rq - insert a request into queue for execution 2334 * blk_execute_rq - insert a request into queue for execution
2335 * @q: queue to insert the request in 2335 * @q: queue to insert the request in
2336 * @bd_disk: matching gendisk 2336 * @bd_disk: matching gendisk
2337 * @rq: request to insert 2337 * @rq: request to insert
2338 * @at_head: insert request at head or tail of queue 2338 * @at_head: insert request at head or tail of queue
2339 * 2339 *
2340 * Description: 2340 * Description:
2341 * Insert a fully prepared request at the back of the io scheduler queue 2341 * Insert a fully prepared request at the back of the io scheduler queue
2342 * for execution and wait for completion. 2342 * for execution and wait for completion.
2343 */ 2343 */
2344 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, 2344 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2345 struct request *rq, int at_head) 2345 struct request *rq, int at_head)
2346 { 2346 {
2347 DECLARE_COMPLETION(wait); 2347 DECLARE_COMPLETION(wait);
2348 char sense[SCSI_SENSE_BUFFERSIZE]; 2348 char sense[SCSI_SENSE_BUFFERSIZE];
2349 int err = 0; 2349 int err = 0;
2350 2350
2351 /* 2351 /*
2352 * we need an extra reference to the request, so we can look at 2352 * we need an extra reference to the request, so we can look at
2353 * it after io completion 2353 * it after io completion
2354 */ 2354 */
2355 rq->ref_count++; 2355 rq->ref_count++;
2356 2356
2357 if (!rq->sense) { 2357 if (!rq->sense) {
2358 memset(sense, 0, sizeof(sense)); 2358 memset(sense, 0, sizeof(sense));
2359 rq->sense = sense; 2359 rq->sense = sense;
2360 rq->sense_len = 0; 2360 rq->sense_len = 0;
2361 } 2361 }
2362 2362
2363 rq->waiting = &wait; 2363 rq->waiting = &wait;
2364 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); 2364 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2365 wait_for_completion(&wait); 2365 wait_for_completion(&wait);
2366 rq->waiting = NULL; 2366 rq->waiting = NULL;
2367 2367
2368 if (rq->errors) 2368 if (rq->errors)
2369 err = -EIO; 2369 err = -EIO;
2370 2370
2371 return err; 2371 return err;
2372 } 2372 }
2373 2373
2374 EXPORT_SYMBOL(blk_execute_rq); 2374 EXPORT_SYMBOL(blk_execute_rq);
2375 2375
2376 /** 2376 /**
2377 * blkdev_issue_flush - queue a flush 2377 * blkdev_issue_flush - queue a flush
2378 * @bdev: blockdev to issue flush for 2378 * @bdev: blockdev to issue flush for
2379 * @error_sector: error sector 2379 * @error_sector: error sector
2380 * 2380 *
2381 * Description: 2381 * Description:
2382 * Issue a flush for the block device in question. Caller can supply 2382 * Issue a flush for the block device in question. Caller can supply
2383 * room for storing the error offset in case of a flush error, if they 2383 * room for storing the error offset in case of a flush error, if they
2384 * wish to. Caller must run wait_for_completion() on its own. 2384 * wish to. Caller must run wait_for_completion() on its own.
2385 */ 2385 */
2386 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) 2386 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
2387 { 2387 {
2388 request_queue_t *q; 2388 request_queue_t *q;
2389 2389
2390 if (bdev->bd_disk == NULL) 2390 if (bdev->bd_disk == NULL)
2391 return -ENXIO; 2391 return -ENXIO;
2392 2392
2393 q = bdev_get_queue(bdev); 2393 q = bdev_get_queue(bdev);
2394 if (!q) 2394 if (!q)
2395 return -ENXIO; 2395 return -ENXIO;
2396 if (!q->issue_flush_fn) 2396 if (!q->issue_flush_fn)
2397 return -EOPNOTSUPP; 2397 return -EOPNOTSUPP;
2398 2398
2399 return q->issue_flush_fn(q, bdev->bd_disk, error_sector); 2399 return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
2400 } 2400 }
2401 2401
2402 EXPORT_SYMBOL(blkdev_issue_flush); 2402 EXPORT_SYMBOL(blkdev_issue_flush);
2403 2403
2404 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) 2404 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
2405 { 2405 {
2406 int rw = rq_data_dir(rq); 2406 int rw = rq_data_dir(rq);
2407 2407
2408 if (!blk_fs_request(rq) || !rq->rq_disk) 2408 if (!blk_fs_request(rq) || !rq->rq_disk)
2409 return; 2409 return;
2410 2410
2411 if (!new_io) { 2411 if (!new_io) {
2412 __disk_stat_inc(rq->rq_disk, merges[rw]); 2412 __disk_stat_inc(rq->rq_disk, merges[rw]);
2413 } else { 2413 } else {
2414 disk_round_stats(rq->rq_disk); 2414 disk_round_stats(rq->rq_disk);
2415 rq->rq_disk->in_flight++; 2415 rq->rq_disk->in_flight++;
2416 } 2416 }
2417 } 2417 }
2418 2418
2419 /* 2419 /*
2420 * add-request adds a request to the linked list. 2420 * add-request adds a request to the linked list.
2421 * queue lock is held and interrupts disabled, as we muck with the 2421 * queue lock is held and interrupts disabled, as we muck with the
2422 * request queue list. 2422 * request queue list.
2423 */ 2423 */
2424 static inline void add_request(request_queue_t * q, struct request * req) 2424 static inline void add_request(request_queue_t * q, struct request * req)
2425 { 2425 {
2426 drive_stat_acct(req, req->nr_sectors, 1); 2426 drive_stat_acct(req, req->nr_sectors, 1);
2427 2427
2428 if (q->activity_fn) 2428 if (q->activity_fn)
2429 q->activity_fn(q->activity_data, rq_data_dir(req)); 2429 q->activity_fn(q->activity_data, rq_data_dir(req));
2430 2430
2431 /* 2431 /*
2432 * elevator indicated where it wants this request to be 2432 * elevator indicated where it wants this request to be
2433 * inserted at elevator_merge time 2433 * inserted at elevator_merge time
2434 */ 2434 */
2435 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 2435 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
2436 } 2436 }
2437 2437
2438 /* 2438 /*
2439 * disk_round_stats() - Round off the performance stats on a struct 2439 * disk_round_stats() - Round off the performance stats on a struct
2440 * disk_stats. 2440 * disk_stats.
2441 * 2441 *
2442 * The average IO queue length and utilisation statistics are maintained 2442 * The average IO queue length and utilisation statistics are maintained
2443 * by observing the current state of the queue length and the amount of 2443 * by observing the current state of the queue length and the amount of
2444 * time it has been in this state for. 2444 * time it has been in this state for.
2445 * 2445 *
2446 * Normally, that accounting is done on IO completion, but that can result 2446 * Normally, that accounting is done on IO completion, but that can result
2447 * in more than a second's worth of IO being accounted for within any one 2447 * in more than a second's worth of IO being accounted for within any one
2448 * second, leading to >100% utilisation. To deal with that, we call this 2448 * second, leading to >100% utilisation. To deal with that, we call this
2449 * function to do a round-off before returning the results when reading 2449 * function to do a round-off before returning the results when reading
2450 * /proc/diskstats. This accounts immediately for all queue usage up to 2450 * /proc/diskstats. This accounts immediately for all queue usage up to
2451 * the current jiffies and restarts the counters again. 2451 * the current jiffies and restarts the counters again.
2452 */ 2452 */
2453 void disk_round_stats(struct gendisk *disk) 2453 void disk_round_stats(struct gendisk *disk)
2454 { 2454 {
2455 unsigned long now = jiffies; 2455 unsigned long now = jiffies;
2456 2456
2457 if (now == disk->stamp) 2457 if (now == disk->stamp)
2458 return; 2458 return;
2459 2459
2460 if (disk->in_flight) { 2460 if (disk->in_flight) {
2461 __disk_stat_add(disk, time_in_queue, 2461 __disk_stat_add(disk, time_in_queue,
2462 disk->in_flight * (now - disk->stamp)); 2462 disk->in_flight * (now - disk->stamp));
2463 __disk_stat_add(disk, io_ticks, (now - disk->stamp)); 2463 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
2464 } 2464 }
2465 disk->stamp = now; 2465 disk->stamp = now;
2466 } 2466 }
2467 2467
2468 /* 2468 /*
2469 * queue lock must be held 2469 * queue lock must be held
2470 */ 2470 */
2471 void __blk_put_request(request_queue_t *q, struct request *req) 2471 void __blk_put_request(request_queue_t *q, struct request *req)
2472 { 2472 {
2473 struct request_list *rl = req->rl; 2473 struct request_list *rl = req->rl;
2474 2474
2475 if (unlikely(!q)) 2475 if (unlikely(!q))
2476 return; 2476 return;
2477 if (unlikely(--req->ref_count)) 2477 if (unlikely(--req->ref_count))
2478 return; 2478 return;
2479 2479
2480 elv_completed_request(q, req); 2480 elv_completed_request(q, req);
2481 2481
2482 req->rq_status = RQ_INACTIVE; 2482 req->rq_status = RQ_INACTIVE;
2483 req->rl = NULL; 2483 req->rl = NULL;
2484 2484
2485 /* 2485 /*
2486 * Request may not have originated from ll_rw_blk. if not, 2486 * Request may not have originated from ll_rw_blk. if not,
2487 * it didn't come out of our reserved rq pools 2487 * it didn't come out of our reserved rq pools
2488 */ 2488 */
2489 if (rl) { 2489 if (rl) {
2490 int rw = rq_data_dir(req); 2490 int rw = rq_data_dir(req);
2491 int priv = req->flags & REQ_ELVPRIV; 2491 int priv = req->flags & REQ_ELVPRIV;
2492 2492
2493 BUG_ON(!list_empty(&req->queuelist)); 2493 BUG_ON(!list_empty(&req->queuelist));
2494 2494
2495 blk_free_request(q, req); 2495 blk_free_request(q, req);
2496 freed_request(q, rw, priv); 2496 freed_request(q, rw, priv);
2497 } 2497 }
2498 } 2498 }
2499 2499
2500 EXPORT_SYMBOL_GPL(__blk_put_request); 2500 EXPORT_SYMBOL_GPL(__blk_put_request);
2501 2501
2502 void blk_put_request(struct request *req) 2502 void blk_put_request(struct request *req)
2503 { 2503 {
2504 unsigned long flags; 2504 unsigned long flags;
2505 request_queue_t *q = req->q; 2505 request_queue_t *q = req->q;
2506 2506
2507 /* 2507 /*
2508 * Gee, IDE calls in w/ NULL q. Fix IDE and remove the 2508 * Gee, IDE calls in w/ NULL q. Fix IDE and remove the
2509 * following if (q) test. 2509 * following if (q) test.
2510 */ 2510 */
2511 if (q) { 2511 if (q) {
2512 spin_lock_irqsave(q->queue_lock, flags); 2512 spin_lock_irqsave(q->queue_lock, flags);
2513 __blk_put_request(q, req); 2513 __blk_put_request(q, req);
2514 spin_unlock_irqrestore(q->queue_lock, flags); 2514 spin_unlock_irqrestore(q->queue_lock, flags);
2515 } 2515 }
2516 } 2516 }
2517 2517
2518 EXPORT_SYMBOL(blk_put_request); 2518 EXPORT_SYMBOL(blk_put_request);
2519 2519
2520 /** 2520 /**
2521 * blk_end_sync_rq - executes a completion event on a request 2521 * blk_end_sync_rq - executes a completion event on a request
2522 * @rq: request to complete 2522 * @rq: request to complete
2523 */ 2523 */
2524 void blk_end_sync_rq(struct request *rq) 2524 void blk_end_sync_rq(struct request *rq)
2525 { 2525 {
2526 struct completion *waiting = rq->waiting; 2526 struct completion *waiting = rq->waiting;
2527 2527
2528 rq->waiting = NULL; 2528 rq->waiting = NULL;
2529 __blk_put_request(rq->q, rq); 2529 __blk_put_request(rq->q, rq);
2530 2530
2531 /* 2531 /*
2532 * complete last, if this is a stack request the process (and thus 2532 * complete last, if this is a stack request the process (and thus
2533 * the rq pointer) could be invalid right after this complete() 2533 * the rq pointer) could be invalid right after this complete()
2534 */ 2534 */
2535 complete(waiting); 2535 complete(waiting);
2536 } 2536 }
2537 EXPORT_SYMBOL(blk_end_sync_rq); 2537 EXPORT_SYMBOL(blk_end_sync_rq);
2538 2538
2539 /** 2539 /**
2540 * blk_congestion_wait - wait for a queue to become uncongested 2540 * blk_congestion_wait - wait for a queue to become uncongested
2541 * @rw: READ or WRITE 2541 * @rw: READ or WRITE
2542 * @timeout: timeout in jiffies 2542 * @timeout: timeout in jiffies
2543 * 2543 *
2544 * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion. 2544 * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
2545 * If no queues are congested then just wait for the next request to be 2545 * If no queues are congested then just wait for the next request to be
2546 * returned. 2546 * returned.
2547 */ 2547 */
2548 long blk_congestion_wait(int rw, long timeout) 2548 long blk_congestion_wait(int rw, long timeout)
2549 { 2549 {
2550 long ret; 2550 long ret;
2551 DEFINE_WAIT(wait); 2551 DEFINE_WAIT(wait);
2552 wait_queue_head_t *wqh = &congestion_wqh[rw]; 2552 wait_queue_head_t *wqh = &congestion_wqh[rw];
2553 2553
2554 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); 2554 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
2555 ret = io_schedule_timeout(timeout); 2555 ret = io_schedule_timeout(timeout);
2556 finish_wait(wqh, &wait); 2556 finish_wait(wqh, &wait);
2557 return ret; 2557 return ret;
2558 } 2558 }
2559 2559
2560 EXPORT_SYMBOL(blk_congestion_wait); 2560 EXPORT_SYMBOL(blk_congestion_wait);
2561 2561
2562 /* 2562 /*
2563 * Has to be called with the request spinlock acquired 2563 * Has to be called with the request spinlock acquired
2564 */ 2564 */
2565 static int attempt_merge(request_queue_t *q, struct request *req, 2565 static int attempt_merge(request_queue_t *q, struct request *req,
2566 struct request *next) 2566 struct request *next)
2567 { 2567 {
2568 if (!rq_mergeable(req) || !rq_mergeable(next)) 2568 if (!rq_mergeable(req) || !rq_mergeable(next))
2569 return 0; 2569 return 0;
2570 2570
2571 /* 2571 /*
2572 * not contigious 2572 * not contigious
2573 */ 2573 */
2574 if (req->sector + req->nr_sectors != next->sector) 2574 if (req->sector + req->nr_sectors != next->sector)
2575 return 0; 2575 return 0;
2576 2576
2577 if (rq_data_dir(req) != rq_data_dir(next) 2577 if (rq_data_dir(req) != rq_data_dir(next)
2578 || req->rq_disk != next->rq_disk 2578 || req->rq_disk != next->rq_disk
2579 || next->waiting || next->special) 2579 || next->waiting || next->special)
2580 return 0; 2580 return 0;
2581 2581
2582 /* 2582 /*
2583 * If we are allowed to merge, then append bio list 2583 * If we are allowed to merge, then append bio list
2584 * from next to rq and release next. merge_requests_fn 2584 * from next to rq and release next. merge_requests_fn
2585 * will have updated segment counts, update sector 2585 * will have updated segment counts, update sector
2586 * counts here. 2586 * counts here.
2587 */ 2587 */
2588 if (!q->merge_requests_fn(q, req, next)) 2588 if (!q->merge_requests_fn(q, req, next))
2589 return 0; 2589 return 0;
2590 2590
2591 /* 2591 /*
2592 * At this point we have either done a back merge 2592 * At this point we have either done a back merge
2593 * or front merge. We need the smaller start_time of 2593 * or front merge. We need the smaller start_time of
2594 * the merged requests to be the current request 2594 * the merged requests to be the current request
2595 * for accounting purposes. 2595 * for accounting purposes.
2596 */ 2596 */
2597 if (time_after(req->start_time, next->start_time)) 2597 if (time_after(req->start_time, next->start_time))
2598 req->start_time = next->start_time; 2598 req->start_time = next->start_time;
2599 2599
2600 req->biotail->bi_next = next->bio; 2600 req->biotail->bi_next = next->bio;
2601 req->biotail = next->biotail; 2601 req->biotail = next->biotail;
2602 2602
2603 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; 2603 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
2604 2604
2605 elv_merge_requests(q, req, next); 2605 elv_merge_requests(q, req, next);
2606 2606
2607 if (req->rq_disk) { 2607 if (req->rq_disk) {
2608 disk_round_stats(req->rq_disk); 2608 disk_round_stats(req->rq_disk);
2609 req->rq_disk->in_flight--; 2609 req->rq_disk->in_flight--;
2610 } 2610 }
2611 2611
2612 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 2612 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
2613 2613
2614 __blk_put_request(q, next); 2614 __blk_put_request(q, next);
2615 return 1; 2615 return 1;
2616 } 2616 }
2617 2617
2618 static inline int attempt_back_merge(request_queue_t *q, struct request *rq) 2618 static inline int attempt_back_merge(request_queue_t *q, struct request *rq)
2619 { 2619 {
2620 struct request *next = elv_latter_request(q, rq); 2620 struct request *next = elv_latter_request(q, rq);
2621 2621
2622 if (next) 2622 if (next)
2623 return attempt_merge(q, rq, next); 2623 return attempt_merge(q, rq, next);
2624 2624
2625 return 0; 2625 return 0;
2626 } 2626 }
2627 2627
2628 static inline int attempt_front_merge(request_queue_t *q, struct request *rq) 2628 static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
2629 { 2629 {
2630 struct request *prev = elv_former_request(q, rq); 2630 struct request *prev = elv_former_request(q, rq);
2631 2631
2632 if (prev) 2632 if (prev)
2633 return attempt_merge(q, prev, rq); 2633 return attempt_merge(q, prev, rq);
2634 2634
2635 return 0; 2635 return 0;
2636 } 2636 }
2637 2637
2638 /** 2638 /**
2639 * blk_attempt_remerge - attempt to remerge active head with next request 2639 * blk_attempt_remerge - attempt to remerge active head with next request
2640 * @q: The &request_queue_t belonging to the device 2640 * @q: The &request_queue_t belonging to the device
2641 * @rq: The head request (usually) 2641 * @rq: The head request (usually)
2642 * 2642 *
2643 * Description: 2643 * Description:
2644 * For head-active devices, the queue can easily be unplugged so quickly 2644 * For head-active devices, the queue can easily be unplugged so quickly
2645 * that proper merging is not done on the front request. This may hurt 2645 * that proper merging is not done on the front request. This may hurt
2646 * performance greatly for some devices. The block layer cannot safely 2646 * performance greatly for some devices. The block layer cannot safely
2647 * do merging on that first request for these queues, but the driver can 2647 * do merging on that first request for these queues, but the driver can
2648 * call this function and make it happen any way. Only the driver knows 2648 * call this function and make it happen any way. Only the driver knows
2649 * when it is safe to do so. 2649 * when it is safe to do so.
2650 **/ 2650 **/
2651 void blk_attempt_remerge(request_queue_t *q, struct request *rq) 2651 void blk_attempt_remerge(request_queue_t *q, struct request *rq)
2652 { 2652 {
2653 unsigned long flags; 2653 unsigned long flags;
2654 2654
2655 spin_lock_irqsave(q->queue_lock, flags); 2655 spin_lock_irqsave(q->queue_lock, flags);
2656 attempt_back_merge(q, rq); 2656 attempt_back_merge(q, rq);
2657 spin_unlock_irqrestore(q->queue_lock, flags); 2657 spin_unlock_irqrestore(q->queue_lock, flags);
2658 } 2658 }
2659 2659
2660 EXPORT_SYMBOL(blk_attempt_remerge); 2660 EXPORT_SYMBOL(blk_attempt_remerge);
2661 2661
2662 static int __make_request(request_queue_t *q, struct bio *bio) 2662 static int __make_request(request_queue_t *q, struct bio *bio)
2663 { 2663 {
2664 struct request *req; 2664 struct request *req;
2665 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; 2665 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
2666 unsigned short prio; 2666 unsigned short prio;
2667 sector_t sector; 2667 sector_t sector;
2668 2668
2669 sector = bio->bi_sector; 2669 sector = bio->bi_sector;
2670 nr_sectors = bio_sectors(bio); 2670 nr_sectors = bio_sectors(bio);
2671 cur_nr_sectors = bio_cur_sectors(bio); 2671 cur_nr_sectors = bio_cur_sectors(bio);
2672 prio = bio_prio(bio); 2672 prio = bio_prio(bio);
2673 2673
2674 rw = bio_data_dir(bio); 2674 rw = bio_data_dir(bio);
2675 sync = bio_sync(bio); 2675 sync = bio_sync(bio);
2676 2676
2677 /* 2677 /*
2678 * low level driver can indicate that it wants pages above a 2678 * low level driver can indicate that it wants pages above a
2679 * certain limit bounced to low memory (ie for highmem, or even 2679 * certain limit bounced to low memory (ie for highmem, or even
2680 * ISA dma in theory) 2680 * ISA dma in theory)
2681 */ 2681 */
2682 blk_queue_bounce(q, &bio); 2682 blk_queue_bounce(q, &bio);
2683 2683
2684 spin_lock_prefetch(q->queue_lock); 2684 spin_lock_prefetch(q->queue_lock);
2685 2685
2686 barrier = bio_barrier(bio); 2686 barrier = bio_barrier(bio);
2687 if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { 2687 if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {
2688 err = -EOPNOTSUPP; 2688 err = -EOPNOTSUPP;
2689 goto end_io; 2689 goto end_io;
2690 } 2690 }
2691 2691
2692 spin_lock_irq(q->queue_lock); 2692 spin_lock_irq(q->queue_lock);
2693 2693
2694 if (unlikely(barrier) || elv_queue_empty(q)) 2694 if (unlikely(barrier) || elv_queue_empty(q))
2695 goto get_rq; 2695 goto get_rq;
2696 2696
2697 el_ret = elv_merge(q, &req, bio); 2697 el_ret = elv_merge(q, &req, bio);
2698 switch (el_ret) { 2698 switch (el_ret) {
2699 case ELEVATOR_BACK_MERGE: 2699 case ELEVATOR_BACK_MERGE:
2700 BUG_ON(!rq_mergeable(req)); 2700 BUG_ON(!rq_mergeable(req));
2701 2701
2702 if (!q->back_merge_fn(q, req, bio)) 2702 if (!q->back_merge_fn(q, req, bio))
2703 break; 2703 break;
2704 2704
2705 req->biotail->bi_next = bio; 2705 req->biotail->bi_next = bio;
2706 req->biotail = bio; 2706 req->biotail = bio;
2707 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2707 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2708 req->ioprio = ioprio_best(req->ioprio, prio); 2708 req->ioprio = ioprio_best(req->ioprio, prio);
2709 drive_stat_acct(req, nr_sectors, 0); 2709 drive_stat_acct(req, nr_sectors, 0);
2710 if (!attempt_back_merge(q, req)) 2710 if (!attempt_back_merge(q, req))
2711 elv_merged_request(q, req); 2711 elv_merged_request(q, req);
2712 goto out; 2712 goto out;
2713 2713
2714 case ELEVATOR_FRONT_MERGE: 2714 case ELEVATOR_FRONT_MERGE:
2715 BUG_ON(!rq_mergeable(req)); 2715 BUG_ON(!rq_mergeable(req));
2716 2716
2717 if (!q->front_merge_fn(q, req, bio)) 2717 if (!q->front_merge_fn(q, req, bio))
2718 break; 2718 break;
2719 2719
2720 bio->bi_next = req->bio; 2720 bio->bi_next = req->bio;
2721 req->bio = bio; 2721 req->bio = bio;
2722 2722
2723 /* 2723 /*
2724 * may not be valid. if the low level driver said 2724 * may not be valid. if the low level driver said
2725 * it didn't need a bounce buffer then it better 2725 * it didn't need a bounce buffer then it better
2726 * not touch req->buffer either... 2726 * not touch req->buffer either...
2727 */ 2727 */
2728 req->buffer = bio_data(bio); 2728 req->buffer = bio_data(bio);
2729 req->current_nr_sectors = cur_nr_sectors; 2729 req->current_nr_sectors = cur_nr_sectors;
2730 req->hard_cur_sectors = cur_nr_sectors; 2730 req->hard_cur_sectors = cur_nr_sectors;
2731 req->sector = req->hard_sector = sector; 2731 req->sector = req->hard_sector = sector;
2732 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2732 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2733 req->ioprio = ioprio_best(req->ioprio, prio); 2733 req->ioprio = ioprio_best(req->ioprio, prio);
2734 drive_stat_acct(req, nr_sectors, 0); 2734 drive_stat_acct(req, nr_sectors, 0);
2735 if (!attempt_front_merge(q, req)) 2735 if (!attempt_front_merge(q, req))
2736 elv_merged_request(q, req); 2736 elv_merged_request(q, req);
2737 goto out; 2737 goto out;
2738 2738
2739 /* ELV_NO_MERGE: elevator says don't/can't merge. */ 2739 /* ELV_NO_MERGE: elevator says don't/can't merge. */
2740 default: 2740 default:
2741 ; 2741 ;
2742 } 2742 }
2743 2743
2744 get_rq: 2744 get_rq:
2745 /* 2745 /*
2746 * Grab a free request. This is might sleep but can not fail. 2746 * Grab a free request. This is might sleep but can not fail.
2747 * Returns with the queue unlocked. 2747 * Returns with the queue unlocked.
2748 */ 2748 */
2749 req = get_request_wait(q, rw, bio); 2749 req = get_request_wait(q, rw, bio);
2750 2750
2751 /* 2751 /*
2752 * After dropping the lock and possibly sleeping here, our request 2752 * After dropping the lock and possibly sleeping here, our request
2753 * may now be mergeable after it had proven unmergeable (above). 2753 * may now be mergeable after it had proven unmergeable (above).
2754 * We don't worry about that case for efficiency. It won't happen 2754 * We don't worry about that case for efficiency. It won't happen
2755 * often, and the elevators are able to handle it. 2755 * often, and the elevators are able to handle it.
2756 */ 2756 */
2757 2757
2758 req->flags |= REQ_CMD; 2758 req->flags |= REQ_CMD;
2759 2759
2760 /* 2760 /*
2761 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 2761 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2762 */ 2762 */
2763 if (bio_rw_ahead(bio) || bio_failfast(bio)) 2763 if (bio_rw_ahead(bio) || bio_failfast(bio))
2764 req->flags |= REQ_FAILFAST; 2764 req->flags |= REQ_FAILFAST;
2765 2765
2766 /* 2766 /*
2767 * REQ_BARRIER implies no merging, but lets make it explicit 2767 * REQ_BARRIER implies no merging, but lets make it explicit
2768 */ 2768 */
2769 if (unlikely(barrier)) 2769 if (unlikely(barrier))
2770 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 2770 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2771 2771
2772 req->errors = 0; 2772 req->errors = 0;
2773 req->hard_sector = req->sector = sector; 2773 req->hard_sector = req->sector = sector;
2774 req->hard_nr_sectors = req->nr_sectors = nr_sectors; 2774 req->hard_nr_sectors = req->nr_sectors = nr_sectors;
2775 req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors; 2775 req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;
2776 req->nr_phys_segments = bio_phys_segments(q, bio); 2776 req->nr_phys_segments = bio_phys_segments(q, bio);
2777 req->nr_hw_segments = bio_hw_segments(q, bio); 2777 req->nr_hw_segments = bio_hw_segments(q, bio);
2778 req->buffer = bio_data(bio); /* see ->buffer comment above */ 2778 req->buffer = bio_data(bio); /* see ->buffer comment above */
2779 req->waiting = NULL; 2779 req->waiting = NULL;
2780 req->bio = req->biotail = bio; 2780 req->bio = req->biotail = bio;
2781 req->ioprio = prio; 2781 req->ioprio = prio;
2782 req->rq_disk = bio->bi_bdev->bd_disk; 2782 req->rq_disk = bio->bi_bdev->bd_disk;
2783 req->start_time = jiffies; 2783 req->start_time = jiffies;
2784 2784
2785 spin_lock_irq(q->queue_lock); 2785 spin_lock_irq(q->queue_lock);
2786 if (elv_queue_empty(q)) 2786 if (elv_queue_empty(q))
2787 blk_plug_device(q); 2787 blk_plug_device(q);
2788 add_request(q, req); 2788 add_request(q, req);
2789 out: 2789 out:
2790 if (sync) 2790 if (sync)
2791 __generic_unplug_device(q); 2791 __generic_unplug_device(q);
2792 2792
2793 spin_unlock_irq(q->queue_lock); 2793 spin_unlock_irq(q->queue_lock);
2794 return 0; 2794 return 0;
2795 2795
2796 end_io: 2796 end_io:
2797 bio_endio(bio, nr_sectors << 9, err); 2797 bio_endio(bio, nr_sectors << 9, err);
2798 return 0; 2798 return 0;
2799 } 2799 }
2800 2800
2801 /* 2801 /*
2802 * If bio->bi_dev is a partition, remap the location 2802 * If bio->bi_dev is a partition, remap the location
2803 */ 2803 */
2804 static inline void blk_partition_remap(struct bio *bio) 2804 static inline void blk_partition_remap(struct bio *bio)
2805 { 2805 {
2806 struct block_device *bdev = bio->bi_bdev; 2806 struct block_device *bdev = bio->bi_bdev;
2807 2807
2808 if (bdev != bdev->bd_contains) { 2808 if (bdev != bdev->bd_contains) {
2809 struct hd_struct *p = bdev->bd_part; 2809 struct hd_struct *p = bdev->bd_part;
2810 const int rw = bio_data_dir(bio); 2810 const int rw = bio_data_dir(bio);
2811 2811
2812 p->sectors[rw] += bio_sectors(bio); 2812 p->sectors[rw] += bio_sectors(bio);
2813 p->ios[rw]++; 2813 p->ios[rw]++;
2814 2814
2815 bio->bi_sector += p->start_sect; 2815 bio->bi_sector += p->start_sect;
2816 bio->bi_bdev = bdev->bd_contains; 2816 bio->bi_bdev = bdev->bd_contains;
2817 } 2817 }
2818 } 2818 }
2819 2819
2820 static void handle_bad_sector(struct bio *bio) 2820 static void handle_bad_sector(struct bio *bio)
2821 { 2821 {
2822 char b[BDEVNAME_SIZE]; 2822 char b[BDEVNAME_SIZE];
2823 2823
2824 printk(KERN_INFO "attempt to access beyond end of device\n"); 2824 printk(KERN_INFO "attempt to access beyond end of device\n");
2825 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", 2825 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
2826 bdevname(bio->bi_bdev, b), 2826 bdevname(bio->bi_bdev, b),
2827 bio->bi_rw, 2827 bio->bi_rw,
2828 (unsigned long long)bio->bi_sector + bio_sectors(bio), 2828 (unsigned long long)bio->bi_sector + bio_sectors(bio),
2829 (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); 2829 (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
2830 2830
2831 set_bit(BIO_EOF, &bio->bi_flags); 2831 set_bit(BIO_EOF, &bio->bi_flags);
2832 } 2832 }
2833 2833
2834 /** 2834 /**
2835 * generic_make_request: hand a buffer to its device driver for I/O 2835 * generic_make_request: hand a buffer to its device driver for I/O
2836 * @bio: The bio describing the location in memory and on the device. 2836 * @bio: The bio describing the location in memory and on the device.
2837 * 2837 *
2838 * generic_make_request() is used to make I/O requests of block 2838 * generic_make_request() is used to make I/O requests of block
2839 * devices. It is passed a &struct bio, which describes the I/O that needs 2839 * devices. It is passed a &struct bio, which describes the I/O that needs
2840 * to be done. 2840 * to be done.
2841 * 2841 *
2842 * generic_make_request() does not return any status. The 2842 * generic_make_request() does not return any status. The
2843 * success/failure status of the request, along with notification of 2843 * success/failure status of the request, along with notification of
2844 * completion, is delivered asynchronously through the bio->bi_end_io 2844 * completion, is delivered asynchronously through the bio->bi_end_io
2845 * function described (one day) else where. 2845 * function described (one day) else where.
2846 * 2846 *
2847 * The caller of generic_make_request must make sure that bi_io_vec 2847 * The caller of generic_make_request must make sure that bi_io_vec
2848 * are set to describe the memory buffer, and that bi_dev and bi_sector are 2848 * are set to describe the memory buffer, and that bi_dev and bi_sector are
2849 * set to describe the device address, and the 2849 * set to describe the device address, and the
2850 * bi_end_io and optionally bi_private are set to describe how 2850 * bi_end_io and optionally bi_private are set to describe how
2851 * completion notification should be signaled. 2851 * completion notification should be signaled.
2852 * 2852 *
2853 * generic_make_request and the drivers it calls may use bi_next if this 2853 * generic_make_request and the drivers it calls may use bi_next if this
2854 * bio happens to be merged with someone else, and may change bi_dev and 2854 * bio happens to be merged with someone else, and may change bi_dev and
2855 * bi_sector for remaps as it sees fit. So the values of these fields 2855 * bi_sector for remaps as it sees fit. So the values of these fields
2856 * should NOT be depended on after the call to generic_make_request. 2856 * should NOT be depended on after the call to generic_make_request.
2857 */ 2857 */
2858 void generic_make_request(struct bio *bio) 2858 void generic_make_request(struct bio *bio)
2859 { 2859 {
2860 request_queue_t *q; 2860 request_queue_t *q;
2861 sector_t maxsector; 2861 sector_t maxsector;
2862 int ret, nr_sectors = bio_sectors(bio); 2862 int ret, nr_sectors = bio_sectors(bio);
2863 2863
2864 might_sleep(); 2864 might_sleep();
2865 /* Test device or partition size, when known. */ 2865 /* Test device or partition size, when known. */
2866 maxsector = bio->bi_bdev->bd_inode->i_size >> 9; 2866 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
2867 if (maxsector) { 2867 if (maxsector) {
2868 sector_t sector = bio->bi_sector; 2868 sector_t sector = bio->bi_sector;
2869 2869
2870 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 2870 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
2871 /* 2871 /*
2872 * This may well happen - the kernel calls bread() 2872 * This may well happen - the kernel calls bread()
2873 * without checking the size of the device, e.g., when 2873 * without checking the size of the device, e.g., when
2874 * mounting a device. 2874 * mounting a device.
2875 */ 2875 */
2876 handle_bad_sector(bio); 2876 handle_bad_sector(bio);
2877 goto end_io; 2877 goto end_io;
2878 } 2878 }
2879 } 2879 }
2880 2880
2881 /* 2881 /*
2882 * Resolve the mapping until finished. (drivers are 2882 * Resolve the mapping until finished. (drivers are
2883 * still free to implement/resolve their own stacking 2883 * still free to implement/resolve their own stacking
2884 * by explicitly returning 0) 2884 * by explicitly returning 0)
2885 * 2885 *
2886 * NOTE: we don't repeat the blk_size check for each new device. 2886 * NOTE: we don't repeat the blk_size check for each new device.
2887 * Stacking drivers are expected to know what they are doing. 2887 * Stacking drivers are expected to know what they are doing.
2888 */ 2888 */
2889 do { 2889 do {
2890 char b[BDEVNAME_SIZE]; 2890 char b[BDEVNAME_SIZE];
2891 2891
2892 q = bdev_get_queue(bio->bi_bdev); 2892 q = bdev_get_queue(bio->bi_bdev);
2893 if (!q) { 2893 if (!q) {
2894 printk(KERN_ERR 2894 printk(KERN_ERR
2895 "generic_make_request: Trying to access " 2895 "generic_make_request: Trying to access "
2896 "nonexistent block-device %s (%Lu)\n", 2896 "nonexistent block-device %s (%Lu)\n",
2897 bdevname(bio->bi_bdev, b), 2897 bdevname(bio->bi_bdev, b),
2898 (long long) bio->bi_sector); 2898 (long long) bio->bi_sector);
2899 end_io: 2899 end_io:
2900 bio_endio(bio, bio->bi_size, -EIO); 2900 bio_endio(bio, bio->bi_size, -EIO);
2901 break; 2901 break;
2902 } 2902 }
2903 2903
2904 if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) { 2904 if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
2905 printk("bio too big device %s (%u > %u)\n", 2905 printk("bio too big device %s (%u > %u)\n",
2906 bdevname(bio->bi_bdev, b), 2906 bdevname(bio->bi_bdev, b),
2907 bio_sectors(bio), 2907 bio_sectors(bio),
2908 q->max_hw_sectors); 2908 q->max_hw_sectors);
2909 goto end_io; 2909 goto end_io;
2910 } 2910 }
2911 2911
2912 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 2912 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
2913 goto end_io; 2913 goto end_io;
2914 2914
2915 /* 2915 /*
2916 * If this device has partitions, remap block n 2916 * If this device has partitions, remap block n
2917 * of partition p to block n+start(p) of the disk. 2917 * of partition p to block n+start(p) of the disk.
2918 */ 2918 */
2919 blk_partition_remap(bio); 2919 blk_partition_remap(bio);
2920 2920
2921 ret = q->make_request_fn(q, bio); 2921 ret = q->make_request_fn(q, bio);
2922 } while (ret); 2922 } while (ret);
2923 } 2923 }
2924 2924
2925 EXPORT_SYMBOL(generic_make_request); 2925 EXPORT_SYMBOL(generic_make_request);
2926 2926
2927 /** 2927 /**
2928 * submit_bio: submit a bio to the block device layer for I/O 2928 * submit_bio: submit a bio to the block device layer for I/O
2929 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 2929 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
2930 * @bio: The &struct bio which describes the I/O 2930 * @bio: The &struct bio which describes the I/O
2931 * 2931 *
2932 * submit_bio() is very similar in purpose to generic_make_request(), and 2932 * submit_bio() is very similar in purpose to generic_make_request(), and
2933 * uses that function to do most of the work. Both are fairly rough 2933 * uses that function to do most of the work. Both are fairly rough
2934 * interfaces, @bio must be presetup and ready for I/O. 2934 * interfaces, @bio must be presetup and ready for I/O.
2935 * 2935 *
2936 */ 2936 */
2937 void submit_bio(int rw, struct bio *bio) 2937 void submit_bio(int rw, struct bio *bio)
2938 { 2938 {
2939 int count = bio_sectors(bio); 2939 int count = bio_sectors(bio);
2940 2940
2941 BIO_BUG_ON(!bio->bi_size); 2941 BIO_BUG_ON(!bio->bi_size);
2942 BIO_BUG_ON(!bio->bi_io_vec); 2942 BIO_BUG_ON(!bio->bi_io_vec);
2943 bio->bi_rw |= rw; 2943 bio->bi_rw |= rw;
2944 if (rw & WRITE) 2944 if (rw & WRITE)
2945 mod_page_state(pgpgout, count); 2945 mod_page_state(pgpgout, count);
2946 else 2946 else
2947 mod_page_state(pgpgin, count); 2947 mod_page_state(pgpgin, count);
2948 2948
2949 if (unlikely(block_dump)) { 2949 if (unlikely(block_dump)) {
2950 char b[BDEVNAME_SIZE]; 2950 char b[BDEVNAME_SIZE];
2951 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", 2951 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
2952 current->comm, current->pid, 2952 current->comm, current->pid,
2953 (rw & WRITE) ? "WRITE" : "READ", 2953 (rw & WRITE) ? "WRITE" : "READ",
2954 (unsigned long long)bio->bi_sector, 2954 (unsigned long long)bio->bi_sector,
2955 bdevname(bio->bi_bdev,b)); 2955 bdevname(bio->bi_bdev,b));
2956 } 2956 }
2957 2957
2958 generic_make_request(bio); 2958 generic_make_request(bio);
2959 } 2959 }
2960 2960
2961 EXPORT_SYMBOL(submit_bio); 2961 EXPORT_SYMBOL(submit_bio);
2962 2962
2963 static void blk_recalc_rq_segments(struct request *rq) 2963 static void blk_recalc_rq_segments(struct request *rq)
2964 { 2964 {
2965 struct bio *bio, *prevbio = NULL; 2965 struct bio *bio, *prevbio = NULL;
2966 int nr_phys_segs, nr_hw_segs; 2966 int nr_phys_segs, nr_hw_segs;
2967 unsigned int phys_size, hw_size; 2967 unsigned int phys_size, hw_size;
2968 request_queue_t *q = rq->q; 2968 request_queue_t *q = rq->q;
2969 2969
2970 if (!rq->bio) 2970 if (!rq->bio)
2971 return; 2971 return;
2972 2972
2973 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; 2973 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
2974 rq_for_each_bio(bio, rq) { 2974 rq_for_each_bio(bio, rq) {
2975 /* Force bio hw/phys segs to be recalculated. */ 2975 /* Force bio hw/phys segs to be recalculated. */
2976 bio->bi_flags &= ~(1 << BIO_SEG_VALID); 2976 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
2977 2977
2978 nr_phys_segs += bio_phys_segments(q, bio); 2978 nr_phys_segs += bio_phys_segments(q, bio);
2979 nr_hw_segs += bio_hw_segments(q, bio); 2979 nr_hw_segs += bio_hw_segments(q, bio);
2980 if (prevbio) { 2980 if (prevbio) {
2981 int pseg = phys_size + prevbio->bi_size + bio->bi_size; 2981 int pseg = phys_size + prevbio->bi_size + bio->bi_size;
2982 int hseg = hw_size + prevbio->bi_size + bio->bi_size; 2982 int hseg = hw_size + prevbio->bi_size + bio->bi_size;
2983 2983
2984 if (blk_phys_contig_segment(q, prevbio, bio) && 2984 if (blk_phys_contig_segment(q, prevbio, bio) &&
2985 pseg <= q->max_segment_size) { 2985 pseg <= q->max_segment_size) {
2986 nr_phys_segs--; 2986 nr_phys_segs--;
2987 phys_size += prevbio->bi_size + bio->bi_size; 2987 phys_size += prevbio->bi_size + bio->bi_size;
2988 } else 2988 } else
2989 phys_size = 0; 2989 phys_size = 0;
2990 2990
2991 if (blk_hw_contig_segment(q, prevbio, bio) && 2991 if (blk_hw_contig_segment(q, prevbio, bio) &&
2992 hseg <= q->max_segment_size) { 2992 hseg <= q->max_segment_size) {
2993 nr_hw_segs--; 2993 nr_hw_segs--;
2994 hw_size += prevbio->bi_size + bio->bi_size; 2994 hw_size += prevbio->bi_size + bio->bi_size;
2995 } else 2995 } else
2996 hw_size = 0; 2996 hw_size = 0;
2997 } 2997 }
2998 prevbio = bio; 2998 prevbio = bio;
2999 } 2999 }
3000 3000
3001 rq->nr_phys_segments = nr_phys_segs; 3001 rq->nr_phys_segments = nr_phys_segs;
3002 rq->nr_hw_segments = nr_hw_segs; 3002 rq->nr_hw_segments = nr_hw_segs;
3003 } 3003 }
3004 3004
3005 static void blk_recalc_rq_sectors(struct request *rq, int nsect) 3005 static void blk_recalc_rq_sectors(struct request *rq, int nsect)
3006 { 3006 {
3007 if (blk_fs_request(rq)) { 3007 if (blk_fs_request(rq)) {
3008 rq->hard_sector += nsect; 3008 rq->hard_sector += nsect;
3009 rq->hard_nr_sectors -= nsect; 3009 rq->hard_nr_sectors -= nsect;
3010 3010
3011 /* 3011 /*
3012 * Move the I/O submission pointers ahead if required. 3012 * Move the I/O submission pointers ahead if required.
3013 */ 3013 */
3014 if ((rq->nr_sectors >= rq->hard_nr_sectors) && 3014 if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
3015 (rq->sector <= rq->hard_sector)) { 3015 (rq->sector <= rq->hard_sector)) {
3016 rq->sector = rq->hard_sector; 3016 rq->sector = rq->hard_sector;
3017 rq->nr_sectors = rq->hard_nr_sectors; 3017 rq->nr_sectors = rq->hard_nr_sectors;
3018 rq->hard_cur_sectors = bio_cur_sectors(rq->bio); 3018 rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
3019 rq->current_nr_sectors = rq->hard_cur_sectors; 3019 rq->current_nr_sectors = rq->hard_cur_sectors;
3020 rq->buffer = bio_data(rq->bio); 3020 rq->buffer = bio_data(rq->bio);
3021 } 3021 }
3022 3022
3023 /* 3023 /*
3024 * if total number of sectors is less than the first segment 3024 * if total number of sectors is less than the first segment
3025 * size, something has gone terribly wrong 3025 * size, something has gone terribly wrong
3026 */ 3026 */
3027 if (rq->nr_sectors < rq->current_nr_sectors) { 3027 if (rq->nr_sectors < rq->current_nr_sectors) {
3028 printk("blk: request botched\n"); 3028 printk("blk: request botched\n");
3029 rq->nr_sectors = rq->current_nr_sectors; 3029 rq->nr_sectors = rq->current_nr_sectors;
3030 } 3030 }
3031 } 3031 }
3032 } 3032 }
3033 3033
3034 static int __end_that_request_first(struct request *req, int uptodate, 3034 static int __end_that_request_first(struct request *req, int uptodate,
3035 int nr_bytes) 3035 int nr_bytes)
3036 { 3036 {
3037 int total_bytes, bio_nbytes, error, next_idx = 0; 3037 int total_bytes, bio_nbytes, error, next_idx = 0;
3038 struct bio *bio; 3038 struct bio *bio;
3039 3039
3040 /* 3040 /*
3041 * extend uptodate bool to allow < 0 value to be direct io error 3041 * extend uptodate bool to allow < 0 value to be direct io error
3042 */ 3042 */
3043 error = 0; 3043 error = 0;
3044 if (end_io_error(uptodate)) 3044 if (end_io_error(uptodate))
3045 error = !uptodate ? -EIO : uptodate; 3045 error = !uptodate ? -EIO : uptodate;
3046 3046
3047 /* 3047 /*
3048 * for a REQ_BLOCK_PC request, we want to carry any eventual 3048 * for a REQ_BLOCK_PC request, we want to carry any eventual
3049 * sense key with us all the way through 3049 * sense key with us all the way through
3050 */ 3050 */
3051 if (!blk_pc_request(req)) 3051 if (!blk_pc_request(req))
3052 req->errors = 0; 3052 req->errors = 0;
3053 3053
3054 if (!uptodate) { 3054 if (!uptodate) {
3055 if (blk_fs_request(req) && !(req->flags & REQ_QUIET)) 3055 if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
3056 printk("end_request: I/O error, dev %s, sector %llu\n", 3056 printk("end_request: I/O error, dev %s, sector %llu\n",
3057 req->rq_disk ? req->rq_disk->disk_name : "?", 3057 req->rq_disk ? req->rq_disk->disk_name : "?",
3058 (unsigned long long)req->sector); 3058 (unsigned long long)req->sector);
3059 } 3059 }
3060 3060
3061 if (blk_fs_request(req) && req->rq_disk) { 3061 if (blk_fs_request(req) && req->rq_disk) {
3062 const int rw = rq_data_dir(req); 3062 const int rw = rq_data_dir(req);
3063 3063
3064 __disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); 3064 __disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
3065 } 3065 }
3066 3066
3067 total_bytes = bio_nbytes = 0; 3067 total_bytes = bio_nbytes = 0;
3068 while ((bio = req->bio) != NULL) { 3068 while ((bio = req->bio) != NULL) {
3069 int nbytes; 3069 int nbytes;
3070 3070
3071 if (nr_bytes >= bio->bi_size) { 3071 if (nr_bytes >= bio->bi_size) {
3072 req->bio = bio->bi_next; 3072 req->bio = bio->bi_next;
3073 nbytes = bio->bi_size; 3073 nbytes = bio->bi_size;
3074 bio_endio(bio, nbytes, error); 3074 bio_endio(bio, nbytes, error);
3075 next_idx = 0; 3075 next_idx = 0;
3076 bio_nbytes = 0; 3076 bio_nbytes = 0;
3077 } else { 3077 } else {
3078 int idx = bio->bi_idx + next_idx; 3078 int idx = bio->bi_idx + next_idx;
3079 3079
3080 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { 3080 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
3081 blk_dump_rq_flags(req, "__end_that"); 3081 blk_dump_rq_flags(req, "__end_that");
3082 printk("%s: bio idx %d >= vcnt %d\n", 3082 printk("%s: bio idx %d >= vcnt %d\n",
3083 __FUNCTION__, 3083 __FUNCTION__,
3084 bio->bi_idx, bio->bi_vcnt); 3084 bio->bi_idx, bio->bi_vcnt);
3085 break; 3085 break;
3086 } 3086 }
3087 3087
3088 nbytes = bio_iovec_idx(bio, idx)->bv_len; 3088 nbytes = bio_iovec_idx(bio, idx)->bv_len;
3089 BIO_BUG_ON(nbytes > bio->bi_size); 3089 BIO_BUG_ON(nbytes > bio->bi_size);
3090 3090
3091 /* 3091 /*
3092 * not a complete bvec done 3092 * not a complete bvec done
3093 */ 3093 */
3094 if (unlikely(nbytes > nr_bytes)) { 3094 if (unlikely(nbytes > nr_bytes)) {
3095 bio_nbytes += nr_bytes; 3095 bio_nbytes += nr_bytes;
3096 total_bytes += nr_bytes; 3096 total_bytes += nr_bytes;
3097 break; 3097 break;
3098 } 3098 }
3099 3099
3100 /* 3100 /*
3101 * advance to the next vector 3101 * advance to the next vector
3102 */ 3102 */
3103 next_idx++; 3103 next_idx++;
3104 bio_nbytes += nbytes; 3104 bio_nbytes += nbytes;
3105 } 3105 }
3106 3106
3107 total_bytes += nbytes; 3107 total_bytes += nbytes;
3108 nr_bytes -= nbytes; 3108 nr_bytes -= nbytes;
3109 3109
3110 if ((bio = req->bio)) { 3110 if ((bio = req->bio)) {
3111 /* 3111 /*
3112 * end more in this run, or just return 'not-done' 3112 * end more in this run, or just return 'not-done'
3113 */ 3113 */
3114 if (unlikely(nr_bytes <= 0)) 3114 if (unlikely(nr_bytes <= 0))
3115 break; 3115 break;
3116 } 3116 }
3117 } 3117 }
3118 3118
3119 /* 3119 /*
3120 * completely done 3120 * completely done
3121 */ 3121 */
3122 if (!req->bio) 3122 if (!req->bio)
3123 return 0; 3123 return 0;
3124 3124
3125 /* 3125 /*
3126 * if the request wasn't completed, update state 3126 * if the request wasn't completed, update state
3127 */ 3127 */
3128 if (bio_nbytes) { 3128 if (bio_nbytes) {
3129 bio_endio(bio, bio_nbytes, error); 3129 bio_endio(bio, bio_nbytes, error);
3130 bio->bi_idx += next_idx; 3130 bio->bi_idx += next_idx;
3131 bio_iovec(bio)->bv_offset += nr_bytes; 3131 bio_iovec(bio)->bv_offset += nr_bytes;
3132 bio_iovec(bio)->bv_len -= nr_bytes; 3132 bio_iovec(bio)->bv_len -= nr_bytes;
3133 } 3133 }
3134 3134
3135 blk_recalc_rq_sectors(req, total_bytes >> 9); 3135 blk_recalc_rq_sectors(req, total_bytes >> 9);
3136 blk_recalc_rq_segments(req); 3136 blk_recalc_rq_segments(req);
3137 return 1; 3137 return 1;
3138 } 3138 }
3139 3139
3140 /** 3140 /**
3141 * end_that_request_first - end I/O on a request 3141 * end_that_request_first - end I/O on a request
3142 * @req: the request being processed 3142 * @req: the request being processed
3143 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error 3143 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
3144 * @nr_sectors: number of sectors to end I/O on 3144 * @nr_sectors: number of sectors to end I/O on
3145 * 3145 *
3146 * Description: 3146 * Description:
3147 * Ends I/O on a number of sectors attached to @req, and sets it up 3147 * Ends I/O on a number of sectors attached to @req, and sets it up
3148 * for the next range of segments (if any) in the cluster. 3148 * for the next range of segments (if any) in the cluster.
3149 * 3149 *
3150 * Return: 3150 * Return:
3151 * 0 - we are done with this request, call end_that_request_last() 3151 * 0 - we are done with this request, call end_that_request_last()
3152 * 1 - still buffers pending for this request 3152 * 1 - still buffers pending for this request
3153 **/ 3153 **/
3154 int end_that_request_first(struct request *req, int uptodate, int nr_sectors) 3154 int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
3155 { 3155 {
3156 return __end_that_request_first(req, uptodate, nr_sectors << 9); 3156 return __end_that_request_first(req, uptodate, nr_sectors << 9);
3157 } 3157 }
3158 3158
3159 EXPORT_SYMBOL(end_that_request_first); 3159 EXPORT_SYMBOL(end_that_request_first);
3160 3160
3161 /** 3161 /**
3162 * end_that_request_chunk - end I/O on a request 3162 * end_that_request_chunk - end I/O on a request
3163 * @req: the request being processed 3163 * @req: the request being processed
3164 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error 3164 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
3165 * @nr_bytes: number of bytes to complete 3165 * @nr_bytes: number of bytes to complete
3166 * 3166 *
3167 * Description: 3167 * Description:
3168 * Ends I/O on a number of bytes attached to @req, and sets it up 3168 * Ends I/O on a number of bytes attached to @req, and sets it up
3169 * for the next range of segments (if any). Like end_that_request_first(), 3169 * for the next range of segments (if any). Like end_that_request_first(),
3170 * but deals with bytes instead of sectors. 3170 * but deals with bytes instead of sectors.
3171 * 3171 *
3172 * Return: 3172 * Return:
3173 * 0 - we are done with this request, call end_that_request_last() 3173 * 0 - we are done with this request, call end_that_request_last()
3174 * 1 - still buffers pending for this request 3174 * 1 - still buffers pending for this request
3175 **/ 3175 **/
3176 int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) 3176 int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
3177 { 3177 {
3178 return __end_that_request_first(req, uptodate, nr_bytes); 3178 return __end_that_request_first(req, uptodate, nr_bytes);
3179 } 3179 }
3180 3180
3181 EXPORT_SYMBOL(end_that_request_chunk); 3181 EXPORT_SYMBOL(end_that_request_chunk);
3182 3182
3183 /* 3183 /*
3184 * queue lock must be held 3184 * queue lock must be held
3185 */ 3185 */
3186 void end_that_request_last(struct request *req) 3186 void end_that_request_last(struct request *req)
3187 { 3187 {
3188 struct gendisk *disk = req->rq_disk; 3188 struct gendisk *disk = req->rq_disk;
3189 3189
3190 if (unlikely(laptop_mode) && blk_fs_request(req)) 3190 if (unlikely(laptop_mode) && blk_fs_request(req))
3191 laptop_io_completion(); 3191 laptop_io_completion();
3192 3192
3193 if (disk && blk_fs_request(req)) { 3193 if (disk && blk_fs_request(req)) {
3194 unsigned long duration = jiffies - req->start_time; 3194 unsigned long duration = jiffies - req->start_time;
3195 const int rw = rq_data_dir(req); 3195 const int rw = rq_data_dir(req);
3196 3196
3197 __disk_stat_inc(disk, ios[rw]); 3197 __disk_stat_inc(disk, ios[rw]);
3198 __disk_stat_add(disk, ticks[rw], duration); 3198 __disk_stat_add(disk, ticks[rw], duration);
3199 disk_round_stats(disk); 3199 disk_round_stats(disk);
3200 disk->in_flight--; 3200 disk->in_flight--;
3201 } 3201 }
3202 if (req->end_io) 3202 if (req->end_io)
3203 req->end_io(req); 3203 req->end_io(req);
3204 else 3204 else
3205 __blk_put_request(req->q, req); 3205 __blk_put_request(req->q, req);
3206 } 3206 }
3207 3207
3208 EXPORT_SYMBOL(end_that_request_last); 3208 EXPORT_SYMBOL(end_that_request_last);
3209 3209
3210 void end_request(struct request *req, int uptodate) 3210 void end_request(struct request *req, int uptodate)
3211 { 3211 {
3212 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { 3212 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
3213 add_disk_randomness(req->rq_disk); 3213 add_disk_randomness(req->rq_disk);
3214 blkdev_dequeue_request(req); 3214 blkdev_dequeue_request(req);
3215 end_that_request_last(req); 3215 end_that_request_last(req);
3216 } 3216 }
3217 } 3217 }
3218 3218
3219 EXPORT_SYMBOL(end_request); 3219 EXPORT_SYMBOL(end_request);
3220 3220
3221 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) 3221 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
3222 { 3222 {
3223 /* first three bits are identical in rq->flags and bio->bi_rw */ 3223 /* first three bits are identical in rq->flags and bio->bi_rw */
3224 rq->flags |= (bio->bi_rw & 7); 3224 rq->flags |= (bio->bi_rw & 7);
3225 3225
3226 rq->nr_phys_segments = bio_phys_segments(q, bio); 3226 rq->nr_phys_segments = bio_phys_segments(q, bio);
3227 rq->nr_hw_segments = bio_hw_segments(q, bio); 3227 rq->nr_hw_segments = bio_hw_segments(q, bio);
3228 rq->current_nr_sectors = bio_cur_sectors(bio); 3228 rq->current_nr_sectors = bio_cur_sectors(bio);
3229 rq->hard_cur_sectors = rq->current_nr_sectors; 3229 rq->hard_cur_sectors = rq->current_nr_sectors;
3230 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 3230 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
3231 rq->buffer = bio_data(bio); 3231 rq->buffer = bio_data(bio);
3232 3232
3233 rq->bio = rq->biotail = bio; 3233 rq->bio = rq->biotail = bio;
3234 } 3234 }
3235 3235
3236 EXPORT_SYMBOL(blk_rq_bio_prep); 3236 EXPORT_SYMBOL(blk_rq_bio_prep);
3237 3237
3238 int kblockd_schedule_work(struct work_struct *work) 3238 int kblockd_schedule_work(struct work_struct *work)
3239 { 3239 {
3240 return queue_work(kblockd_workqueue, work); 3240 return queue_work(kblockd_workqueue, work);
3241 } 3241 }
3242 3242
3243 EXPORT_SYMBOL(kblockd_schedule_work); 3243 EXPORT_SYMBOL(kblockd_schedule_work);
3244 3244
3245 void kblockd_flush(void) 3245 void kblockd_flush(void)
3246 { 3246 {
3247 flush_workqueue(kblockd_workqueue); 3247 flush_workqueue(kblockd_workqueue);
3248 } 3248 }
3249 EXPORT_SYMBOL(kblockd_flush); 3249 EXPORT_SYMBOL(kblockd_flush);
3250 3250
3251 int __init blk_dev_init(void) 3251 int __init blk_dev_init(void)
3252 { 3252 {
3253 kblockd_workqueue = create_workqueue("kblockd"); 3253 kblockd_workqueue = create_workqueue("kblockd");
3254 if (!kblockd_workqueue) 3254 if (!kblockd_workqueue)
3255 panic("Failed to create kblockd\n"); 3255 panic("Failed to create kblockd\n");
3256 3256
3257 request_cachep = kmem_cache_create("blkdev_requests", 3257 request_cachep = kmem_cache_create("blkdev_requests",
3258 sizeof(struct request), 0, SLAB_PANIC, NULL, NULL); 3258 sizeof(struct request), 0, SLAB_PANIC, NULL, NULL);
3259 3259
3260 requestq_cachep = kmem_cache_create("blkdev_queue", 3260 requestq_cachep = kmem_cache_create("blkdev_queue",
3261 sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL); 3261 sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL);
3262 3262
3263 iocontext_cachep = kmem_cache_create("blkdev_ioc", 3263 iocontext_cachep = kmem_cache_create("blkdev_ioc",
3264 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); 3264 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
3265 3265
3266 blk_max_low_pfn = max_low_pfn; 3266 blk_max_low_pfn = max_low_pfn;
3267 blk_max_pfn = max_pfn; 3267 blk_max_pfn = max_pfn;
3268 3268
3269 return 0; 3269 return 0;
3270 } 3270 }
3271 3271
3272 /* 3272 /*
3273 * IO Context helper functions 3273 * IO Context helper functions
3274 */ 3274 */
3275 void put_io_context(struct io_context *ioc) 3275 void put_io_context(struct io_context *ioc)
3276 { 3276 {
3277 if (ioc == NULL) 3277 if (ioc == NULL)
3278 return; 3278 return;
3279 3279
3280 BUG_ON(atomic_read(&ioc->refcount) == 0); 3280 BUG_ON(atomic_read(&ioc->refcount) == 0);
3281 3281
3282 if (atomic_dec_and_test(&ioc->refcount)) { 3282 if (atomic_dec_and_test(&ioc->refcount)) {
3283 if (ioc->aic && ioc->aic->dtor) 3283 if (ioc->aic && ioc->aic->dtor)
3284 ioc->aic->dtor(ioc->aic); 3284 ioc->aic->dtor(ioc->aic);
3285 if (ioc->cic && ioc->cic->dtor) 3285 if (ioc->cic && ioc->cic->dtor)
3286 ioc->cic->dtor(ioc->cic); 3286 ioc->cic->dtor(ioc->cic);
3287 3287
3288 kmem_cache_free(iocontext_cachep, ioc); 3288 kmem_cache_free(iocontext_cachep, ioc);
3289 } 3289 }
3290 } 3290 }
3291 EXPORT_SYMBOL(put_io_context); 3291 EXPORT_SYMBOL(put_io_context);
3292 3292
3293 /* Called by the exitting task */ 3293 /* Called by the exitting task */
3294 void exit_io_context(void) 3294 void exit_io_context(void)
3295 { 3295 {
3296 unsigned long flags; 3296 unsigned long flags;
3297 struct io_context *ioc; 3297 struct io_context *ioc;
3298 3298
3299 local_irq_save(flags); 3299 local_irq_save(flags);
3300 task_lock(current); 3300 task_lock(current);
3301 ioc = current->io_context; 3301 ioc = current->io_context;
3302 current->io_context = NULL; 3302 current->io_context = NULL;
3303 ioc->task = NULL; 3303 ioc->task = NULL;
3304 task_unlock(current); 3304 task_unlock(current);
3305 local_irq_restore(flags); 3305 local_irq_restore(flags);
3306 3306
3307 if (ioc->aic && ioc->aic->exit) 3307 if (ioc->aic && ioc->aic->exit)
3308 ioc->aic->exit(ioc->aic); 3308 ioc->aic->exit(ioc->aic);
3309 if (ioc->cic && ioc->cic->exit) 3309 if (ioc->cic && ioc->cic->exit)
3310 ioc->cic->exit(ioc->cic); 3310 ioc->cic->exit(ioc->cic);
3311 3311
3312 put_io_context(ioc); 3312 put_io_context(ioc);
3313 } 3313 }
3314 3314
3315 /* 3315 /*
3316 * If the current task has no IO context then create one and initialise it. 3316 * If the current task has no IO context then create one and initialise it.
3317 * Otherwise, return its existing IO context. 3317 * Otherwise, return its existing IO context.
3318 * 3318 *
3319 * This returned IO context doesn't have a specifically elevated refcount, 3319 * This returned IO context doesn't have a specifically elevated refcount,
3320 * but since the current task itself holds a reference, the context can be 3320 * but since the current task itself holds a reference, the context can be
3321 * used in general code, so long as it stays within `current` context. 3321 * used in general code, so long as it stays within `current` context.
3322 */ 3322 */
3323 struct io_context *current_io_context(gfp_t gfp_flags) 3323 struct io_context *current_io_context(gfp_t gfp_flags)
3324 { 3324 {
3325 struct task_struct *tsk = current; 3325 struct task_struct *tsk = current;
3326 struct io_context *ret; 3326 struct io_context *ret;
3327 3327
3328 ret = tsk->io_context; 3328 ret = tsk->io_context;
3329 if (likely(ret)) 3329 if (likely(ret))
3330 return ret; 3330 return ret;
3331 3331
3332 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); 3332 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
3333 if (ret) { 3333 if (ret) {
3334 atomic_set(&ret->refcount, 1); 3334 atomic_set(&ret->refcount, 1);
3335 ret->task = current; 3335 ret->task = current;
3336 ret->set_ioprio = NULL; 3336 ret->set_ioprio = NULL;
3337 ret->last_waited = jiffies; /* doesn't matter... */ 3337 ret->last_waited = jiffies; /* doesn't matter... */
3338 ret->nr_batch_requests = 0; /* because this is 0 */ 3338 ret->nr_batch_requests = 0; /* because this is 0 */
3339 ret->aic = NULL; 3339 ret->aic = NULL;
3340 ret->cic = NULL; 3340 ret->cic = NULL;
3341 tsk->io_context = ret; 3341 tsk->io_context = ret;
3342 } 3342 }
3343 3343
3344 return ret; 3344 return ret;
3345 } 3345 }
3346 EXPORT_SYMBOL(current_io_context); 3346 EXPORT_SYMBOL(current_io_context);
3347 3347
3348 /* 3348 /*
3349 * If the current task has no IO context then create one and initialise it. 3349 * If the current task has no IO context then create one and initialise it.
3350 * If it does have a context, take a ref on it. 3350 * If it does have a context, take a ref on it.
3351 * 3351 *
3352 * This is always called in the context of the task which submitted the I/O. 3352 * This is always called in the context of the task which submitted the I/O.
3353 */ 3353 */
3354 struct io_context *get_io_context(gfp_t gfp_flags) 3354 struct io_context *get_io_context(gfp_t gfp_flags)
3355 { 3355 {
3356 struct io_context *ret; 3356 struct io_context *ret;
3357 ret = current_io_context(gfp_flags); 3357 ret = current_io_context(gfp_flags);
3358 if (likely(ret)) 3358 if (likely(ret))
3359 atomic_inc(&ret->refcount); 3359 atomic_inc(&ret->refcount);
3360 return ret; 3360 return ret;
3361 } 3361 }
3362 EXPORT_SYMBOL(get_io_context); 3362 EXPORT_SYMBOL(get_io_context);
3363 3363
3364 void copy_io_context(struct io_context **pdst, struct io_context **psrc) 3364 void copy_io_context(struct io_context **pdst, struct io_context **psrc)
3365 { 3365 {
3366 struct io_context *src = *psrc; 3366 struct io_context *src = *psrc;
3367 struct io_context *dst = *pdst; 3367 struct io_context *dst = *pdst;
3368 3368
3369 if (src) { 3369 if (src) {
3370 BUG_ON(atomic_read(&src->refcount) == 0); 3370 BUG_ON(atomic_read(&src->refcount) == 0);
3371 atomic_inc(&src->refcount); 3371 atomic_inc(&src->refcount);
3372 put_io_context(dst); 3372 put_io_context(dst);
3373 *pdst = src; 3373 *pdst = src;
3374 } 3374 }
3375 } 3375 }
3376 EXPORT_SYMBOL(copy_io_context); 3376 EXPORT_SYMBOL(copy_io_context);
3377 3377
3378 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) 3378 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
3379 { 3379 {
3380 struct io_context *temp; 3380 struct io_context *temp;
3381 temp = *ioc1; 3381 temp = *ioc1;
3382 *ioc1 = *ioc2; 3382 *ioc1 = *ioc2;
3383 *ioc2 = temp; 3383 *ioc2 = temp;
3384 } 3384 }
3385 EXPORT_SYMBOL(swap_io_context); 3385 EXPORT_SYMBOL(swap_io_context);
3386 3386
3387 /* 3387 /*
3388 * sysfs parts below 3388 * sysfs parts below
3389 */ 3389 */
3390 struct queue_sysfs_entry { 3390 struct queue_sysfs_entry {
3391 struct attribute attr; 3391 struct attribute attr;
3392 ssize_t (*show)(struct request_queue *, char *); 3392 ssize_t (*show)(struct request_queue *, char *);
3393 ssize_t (*store)(struct request_queue *, const char *, size_t); 3393 ssize_t (*store)(struct request_queue *, const char *, size_t);
3394 }; 3394 };
3395 3395
3396 static ssize_t 3396 static ssize_t
3397 queue_var_show(unsigned int var, char *page) 3397 queue_var_show(unsigned int var, char *page)
3398 { 3398 {
3399 return sprintf(page, "%d\n", var); 3399 return sprintf(page, "%d\n", var);
3400 } 3400 }
3401 3401
3402 static ssize_t 3402 static ssize_t
3403 queue_var_store(unsigned long *var, const char *page, size_t count) 3403 queue_var_store(unsigned long *var, const char *page, size_t count)
3404 { 3404 {
3405 char *p = (char *) page; 3405 char *p = (char *) page;
3406 3406
3407 *var = simple_strtoul(p, &p, 10); 3407 *var = simple_strtoul(p, &p, 10);
3408 return count; 3408 return count;
3409 } 3409 }
3410 3410
3411 static ssize_t queue_requests_show(struct request_queue *q, char *page) 3411 static ssize_t queue_requests_show(struct request_queue *q, char *page)
3412 { 3412 {
3413 return queue_var_show(q->nr_requests, (page)); 3413 return queue_var_show(q->nr_requests, (page));
3414 } 3414 }
3415 3415
3416 static ssize_t 3416 static ssize_t
3417 queue_requests_store(struct request_queue *q, const char *page, size_t count) 3417 queue_requests_store(struct request_queue *q, const char *page, size_t count)
3418 { 3418 {
3419 struct request_list *rl = &q->rq; 3419 struct request_list *rl = &q->rq;
3420 3420
3421 int ret = queue_var_store(&q->nr_requests, page, count); 3421 int ret = queue_var_store(&q->nr_requests, page, count);
3422 if (q->nr_requests < BLKDEV_MIN_RQ) 3422 if (q->nr_requests < BLKDEV_MIN_RQ)
3423 q->nr_requests = BLKDEV_MIN_RQ; 3423 q->nr_requests = BLKDEV_MIN_RQ;
3424 blk_queue_congestion_threshold(q); 3424 blk_queue_congestion_threshold(q);
3425 3425
3426 if (rl->count[READ] >= queue_congestion_on_threshold(q)) 3426 if (rl->count[READ] >= queue_congestion_on_threshold(q))
3427 set_queue_congested(q, READ); 3427 set_queue_congested(q, READ);
3428 else if (rl->count[READ] < queue_congestion_off_threshold(q)) 3428 else if (rl->count[READ] < queue_congestion_off_threshold(q))
3429 clear_queue_congested(q, READ); 3429 clear_queue_congested(q, READ);
3430 3430
3431 if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) 3431 if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
3432 set_queue_congested(q, WRITE); 3432 set_queue_congested(q, WRITE);
3433 else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) 3433 else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
3434 clear_queue_congested(q, WRITE); 3434 clear_queue_congested(q, WRITE);
3435 3435
3436 if (rl->count[READ] >= q->nr_requests) { 3436 if (rl->count[READ] >= q->nr_requests) {
3437 blk_set_queue_full(q, READ); 3437 blk_set_queue_full(q, READ);
3438 } else if (rl->count[READ]+1 <= q->nr_requests) { 3438 } else if (rl->count[READ]+1 <= q->nr_requests) {
3439 blk_clear_queue_full(q, READ); 3439 blk_clear_queue_full(q, READ);
3440 wake_up(&rl->wait[READ]); 3440 wake_up(&rl->wait[READ]);
3441 } 3441 }
3442 3442
3443 if (rl->count[WRITE] >= q->nr_requests) { 3443 if (rl->count[WRITE] >= q->nr_requests) {
3444 blk_set_queue_full(q, WRITE); 3444 blk_set_queue_full(q, WRITE);
3445 } else if (rl->count[WRITE]+1 <= q->nr_requests) { 3445 } else if (rl->count[WRITE]+1 <= q->nr_requests) {
3446 blk_clear_queue_full(q, WRITE); 3446 blk_clear_queue_full(q, WRITE);
3447 wake_up(&rl->wait[WRITE]); 3447 wake_up(&rl->wait[WRITE]);
3448 } 3448 }
3449 return ret; 3449 return ret;
3450 } 3450 }
3451 3451
3452 static ssize_t queue_ra_show(struct request_queue *q, char *page) 3452 static ssize_t queue_ra_show(struct request_queue *q, char *page)
3453 { 3453 {
3454 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); 3454 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3455 3455
3456 return queue_var_show(ra_kb, (page)); 3456 return queue_var_show(ra_kb, (page));
3457 } 3457 }
3458 3458
3459 static ssize_t 3459 static ssize_t
3460 queue_ra_store(struct request_queue *q, const char *page, size_t count) 3460 queue_ra_store(struct request_queue *q, const char *page, size_t count)
3461 { 3461 {
3462 unsigned long ra_kb; 3462 unsigned long ra_kb;
3463 ssize_t ret = queue_var_store(&ra_kb, page, count); 3463 ssize_t ret = queue_var_store(&ra_kb, page, count);
3464 3464
3465 spin_lock_irq(q->queue_lock); 3465 spin_lock_irq(q->queue_lock);
3466 if (ra_kb > (q->max_sectors >> 1)) 3466 if (ra_kb > (q->max_sectors >> 1))
3467 ra_kb = (q->max_sectors >> 1); 3467 ra_kb = (q->max_sectors >> 1);
3468 3468
3469 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); 3469 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
3470 spin_unlock_irq(q->queue_lock); 3470 spin_unlock_irq(q->queue_lock);
3471 3471
3472 return ret; 3472 return ret;
3473 } 3473 }
3474 3474
3475 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) 3475 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
3476 { 3476 {
3477 int max_sectors_kb = q->max_sectors >> 1; 3477 int max_sectors_kb = q->max_sectors >> 1;
3478 3478
3479 return queue_var_show(max_sectors_kb, (page)); 3479 return queue_var_show(max_sectors_kb, (page));
3480 } 3480 }
3481 3481
3482 static ssize_t 3482 static ssize_t
3483 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) 3483 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
3484 { 3484 {
3485 unsigned long max_sectors_kb, 3485 unsigned long max_sectors_kb,
3486 max_hw_sectors_kb = q->max_hw_sectors >> 1, 3486 max_hw_sectors_kb = q->max_hw_sectors >> 1,
3487 page_kb = 1 << (PAGE_CACHE_SHIFT - 10); 3487 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
3488 ssize_t ret = queue_var_store(&max_sectors_kb, page, count); 3488 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
3489 int ra_kb; 3489 int ra_kb;
3490 3490
3491 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) 3491 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
3492 return -EINVAL; 3492 return -EINVAL;
3493 /* 3493 /*
3494 * Take the queue lock to update the readahead and max_sectors 3494 * Take the queue lock to update the readahead and max_sectors
3495 * values synchronously: 3495 * values synchronously:
3496 */ 3496 */
3497 spin_lock_irq(q->queue_lock); 3497 spin_lock_irq(q->queue_lock);
3498 /* 3498 /*
3499 * Trim readahead window as well, if necessary: 3499 * Trim readahead window as well, if necessary:
3500 */ 3500 */
3501 ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); 3501 ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3502 if (ra_kb > max_sectors_kb) 3502 if (ra_kb > max_sectors_kb)
3503 q->backing_dev_info.ra_pages = 3503 q->backing_dev_info.ra_pages =
3504 max_sectors_kb >> (PAGE_CACHE_SHIFT - 10); 3504 max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);
3505 3505
3506 q->max_sectors = max_sectors_kb << 1; 3506 q->max_sectors = max_sectors_kb << 1;
3507 spin_unlock_irq(q->queue_lock); 3507 spin_unlock_irq(q->queue_lock);
3508 3508
3509 return ret; 3509 return ret;
3510 } 3510 }
3511 3511
3512 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) 3512 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
3513 { 3513 {
3514 int max_hw_sectors_kb = q->max_hw_sectors >> 1; 3514 int max_hw_sectors_kb = q->max_hw_sectors >> 1;
3515 3515
3516 return queue_var_show(max_hw_sectors_kb, (page)); 3516 return queue_var_show(max_hw_sectors_kb, (page));
3517 } 3517 }
3518 3518
3519 3519
3520 static struct queue_sysfs_entry queue_requests_entry = { 3520 static struct queue_sysfs_entry queue_requests_entry = {
3521 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, 3521 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
3522 .show = queue_requests_show, 3522 .show = queue_requests_show,
3523 .store = queue_requests_store, 3523 .store = queue_requests_store,
3524 }; 3524 };
3525 3525
3526 static struct queue_sysfs_entry queue_ra_entry = { 3526 static struct queue_sysfs_entry queue_ra_entry = {
3527 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, 3527 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
3528 .show = queue_ra_show, 3528 .show = queue_ra_show,
3529 .store = queue_ra_store, 3529 .store = queue_ra_store,
3530 }; 3530 };
3531 3531
3532 static struct queue_sysfs_entry queue_max_sectors_entry = { 3532 static struct queue_sysfs_entry queue_max_sectors_entry = {
3533 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, 3533 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
3534 .show = queue_max_sectors_show, 3534 .show = queue_max_sectors_show,
3535 .store = queue_max_sectors_store, 3535 .store = queue_max_sectors_store,
3536 }; 3536 };
3537 3537
3538 static struct queue_sysfs_entry queue_max_hw_sectors_entry = { 3538 static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
3539 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, 3539 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
3540 .show = queue_max_hw_sectors_show, 3540 .show = queue_max_hw_sectors_show,
3541 }; 3541 };
3542 3542
3543 static struct queue_sysfs_entry queue_iosched_entry = { 3543 static struct queue_sysfs_entry queue_iosched_entry = {
3544 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, 3544 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
3545 .show = elv_iosched_show, 3545 .show = elv_iosched_show,
3546 .store = elv_iosched_store, 3546 .store = elv_iosched_store,
3547 }; 3547 };
3548 3548
3549 static struct attribute *default_attrs[] = { 3549 static struct attribute *default_attrs[] = {
3550 &queue_requests_entry.attr, 3550 &queue_requests_entry.attr,
3551 &queue_ra_entry.attr, 3551 &queue_ra_entry.attr,
3552 &queue_max_hw_sectors_entry.attr, 3552 &queue_max_hw_sectors_entry.attr,
3553 &queue_max_sectors_entry.attr, 3553 &queue_max_sectors_entry.attr,
3554 &queue_iosched_entry.attr, 3554 &queue_iosched_entry.attr,
3555 NULL, 3555 NULL,
3556 }; 3556 };
3557 3557
3558 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) 3558 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
3559 3559
3560 static ssize_t 3560 static ssize_t
3561 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 3561 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3562 { 3562 {
3563 struct queue_sysfs_entry *entry = to_queue(attr); 3563 struct queue_sysfs_entry *entry = to_queue(attr);
3564 struct request_queue *q; 3564 struct request_queue *q;
3565 3565
3566 q = container_of(kobj, struct request_queue, kobj); 3566 q = container_of(kobj, struct request_queue, kobj);
3567 if (!entry->show) 3567 if (!entry->show)
3568 return -EIO; 3568 return -EIO;
3569 3569
3570 return entry->show(q, page); 3570 return entry->show(q, page);
3571 } 3571 }
3572 3572
3573 static ssize_t 3573 static ssize_t
3574 queue_attr_store(struct kobject *kobj, struct attribute *attr, 3574 queue_attr_store(struct kobject *kobj, struct attribute *attr,
3575 const char *page, size_t length) 3575 const char *page, size_t length)
3576 { 3576 {
3577 struct queue_sysfs_entry *entry = to_queue(attr); 3577 struct queue_sysfs_entry *entry = to_queue(attr);
3578 struct request_queue *q; 3578 struct request_queue *q;
3579 3579
3580 q = container_of(kobj, struct request_queue, kobj); 3580 q = container_of(kobj, struct request_queue, kobj);
3581 if (!entry->store) 3581 if (!entry->store)
3582 return -EIO; 3582 return -EIO;
3583 3583
3584 return entry->store(q, page, length); 3584 return entry->store(q, page, length);
3585 } 3585 }
3586 3586
3587 static struct sysfs_ops queue_sysfs_ops = { 3587 static struct sysfs_ops queue_sysfs_ops = {
3588 .show = queue_attr_show, 3588 .show = queue_attr_show,
3589 .store = queue_attr_store, 3589 .store = queue_attr_store,
3590 }; 3590 };
3591 3591
3592 static struct kobj_type queue_ktype = { 3592 static struct kobj_type queue_ktype = {
3593 .sysfs_ops = &queue_sysfs_ops, 3593 .sysfs_ops = &queue_sysfs_ops,
3594 .default_attrs = default_attrs, 3594 .default_attrs = default_attrs,
3595 }; 3595 };
3596 3596
3597 int blk_register_queue(struct gendisk *disk) 3597 int blk_register_queue(struct gendisk *disk)
3598 { 3598 {
3599 int ret; 3599 int ret;
3600 3600
3601 request_queue_t *q = disk->queue; 3601 request_queue_t *q = disk->queue;
3602 3602
3603 if (!q || !q->request_fn) 3603 if (!q || !q->request_fn)
3604 return -ENXIO; 3604 return -ENXIO;
3605 3605
3606 q->kobj.parent = kobject_get(&disk->kobj); 3606 q->kobj.parent = kobject_get(&disk->kobj);
3607 if (!q->kobj.parent) 3607 if (!q->kobj.parent)
3608 return -EBUSY; 3608 return -EBUSY;
3609 3609
3610 snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); 3610 snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");
3611 q->kobj.ktype = &queue_ktype; 3611 q->kobj.ktype = &queue_ktype;
3612 3612
3613 ret = kobject_register(&q->kobj); 3613 ret = kobject_register(&q->kobj);
3614 if (ret < 0) 3614 if (ret < 0)
3615 return ret; 3615 return ret;
3616 3616
3617 ret = elv_register_queue(q); 3617 ret = elv_register_queue(q);
3618 if (ret) { 3618 if (ret) {
3619 kobject_unregister(&q->kobj); 3619 kobject_unregister(&q->kobj);
3620 return ret; 3620 return ret;
3621 } 3621 }
3622 3622
3623 return 0; 3623 return 0;
3624 } 3624 }
3625 3625
3626 void blk_unregister_queue(struct gendisk *disk) 3626 void blk_unregister_queue(struct gendisk *disk)
3627 { 3627 {
3628 request_queue_t *q = disk->queue; 3628 request_queue_t *q = disk->queue;
3629 3629
3630 if (q && q->request_fn) { 3630 if (q && q->request_fn) {
3631 elv_unregister_queue(q); 3631 elv_unregister_queue(q);
3632 3632
3633 kobject_unregister(&q->kobj); 3633 kobject_unregister(&q->kobj);
3634 kobject_put(&disk->kobj); 3634 kobject_put(&disk->kobj);
3635 } 3635 }
3636 } 3636 }
3637 3637
1 /* 1 /*
2 * Copyright (C) 2001 Jens Axboe <axboe@suse.de> 2 * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 * 7 *
8 * This program is distributed in the hope that it will be useful, 8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * 10 *
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public Licens 14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 * 17 *
18 */ 18 */
19 #include <linux/kernel.h> 19 #include <linux/kernel.h>
20 #include <linux/errno.h> 20 #include <linux/errno.h>
21 #include <linux/string.h> 21 #include <linux/string.h>
22 #include <linux/module.h> 22 #include <linux/module.h>
23 #include <linux/blkdev.h> 23 #include <linux/blkdev.h>
24 #include <linux/completion.h> 24 #include <linux/completion.h>
25 #include <linux/cdrom.h> 25 #include <linux/cdrom.h>
26 #include <linux/slab.h> 26 #include <linux/slab.h>
27 #include <linux/times.h> 27 #include <linux/times.h>
28 #include <asm/uaccess.h> 28 #include <asm/uaccess.h>
29 29
30 #include <scsi/scsi.h> 30 #include <scsi/scsi.h>
31 #include <scsi/scsi_ioctl.h> 31 #include <scsi/scsi_ioctl.h>
32 #include <scsi/scsi_cmnd.h> 32 #include <scsi/scsi_cmnd.h>
33 33
34 /* Command group 3 is reserved and should never be used. */ 34 /* Command group 3 is reserved and should never be used. */
35 const unsigned char scsi_command_size[8] = 35 const unsigned char scsi_command_size[8] =
36 { 36 {
37 6, 10, 10, 12, 37 6, 10, 10, 12,
38 16, 12, 10, 10 38 16, 12, 10, 10
39 }; 39 };
40 40
41 EXPORT_SYMBOL(scsi_command_size); 41 EXPORT_SYMBOL(scsi_command_size);
42 42
43 #define BLK_DEFAULT_TIMEOUT (60 * HZ) 43 #define BLK_DEFAULT_TIMEOUT (60 * HZ)
44 44
45 #include <scsi/sg.h> 45 #include <scsi/sg.h>
46 46
47 static int sg_get_version(int __user *p) 47 static int sg_get_version(int __user *p)
48 { 48 {
49 static int sg_version_num = 30527; 49 static const int sg_version_num = 30527;
50 return put_user(sg_version_num, p); 50 return put_user(sg_version_num, p);
51 } 51 }
52 52
53 static int scsi_get_idlun(request_queue_t *q, int __user *p) 53 static int scsi_get_idlun(request_queue_t *q, int __user *p)
54 { 54 {
55 return put_user(0, p); 55 return put_user(0, p);
56 } 56 }
57 57
58 static int scsi_get_bus(request_queue_t *q, int __user *p) 58 static int scsi_get_bus(request_queue_t *q, int __user *p)
59 { 59 {
60 return put_user(0, p); 60 return put_user(0, p);
61 } 61 }
62 62
63 static int sg_get_timeout(request_queue_t *q) 63 static int sg_get_timeout(request_queue_t *q)
64 { 64 {
65 return q->sg_timeout / (HZ / USER_HZ); 65 return q->sg_timeout / (HZ / USER_HZ);
66 } 66 }
67 67
68 static int sg_set_timeout(request_queue_t *q, int __user *p) 68 static int sg_set_timeout(request_queue_t *q, int __user *p)
69 { 69 {
70 int timeout, err = get_user(timeout, p); 70 int timeout, err = get_user(timeout, p);
71 71
72 if (!err) 72 if (!err)
73 q->sg_timeout = timeout * (HZ / USER_HZ); 73 q->sg_timeout = timeout * (HZ / USER_HZ);
74 74
75 return err; 75 return err;
76 } 76 }
77 77
78 static int sg_get_reserved_size(request_queue_t *q, int __user *p) 78 static int sg_get_reserved_size(request_queue_t *q, int __user *p)
79 { 79 {
80 return put_user(q->sg_reserved_size, p); 80 return put_user(q->sg_reserved_size, p);
81 } 81 }
82 82
83 static int sg_set_reserved_size(request_queue_t *q, int __user *p) 83 static int sg_set_reserved_size(request_queue_t *q, int __user *p)
84 { 84 {
85 int size, err = get_user(size, p); 85 int size, err = get_user(size, p);
86 86
87 if (err) 87 if (err)
88 return err; 88 return err;
89 89
90 if (size < 0) 90 if (size < 0)
91 return -EINVAL; 91 return -EINVAL;
92 if (size > (q->max_sectors << 9)) 92 if (size > (q->max_sectors << 9))
93 size = q->max_sectors << 9; 93 size = q->max_sectors << 9;
94 94
95 q->sg_reserved_size = size; 95 q->sg_reserved_size = size;
96 return 0; 96 return 0;
97 } 97 }
98 98
99 /* 99 /*
100 * will always return that we are ATAPI even for a real SCSI drive, I'm not 100 * will always return that we are ATAPI even for a real SCSI drive, I'm not
101 * so sure this is worth doing anything about (why would you care??) 101 * so sure this is worth doing anything about (why would you care??)
102 */ 102 */
103 static int sg_emulated_host(request_queue_t *q, int __user *p) 103 static int sg_emulated_host(request_queue_t *q, int __user *p)
104 { 104 {
105 return put_user(1, p); 105 return put_user(1, p);
106 } 106 }
107 107
108 #define CMD_READ_SAFE 0x01 108 #define CMD_READ_SAFE 0x01
109 #define CMD_WRITE_SAFE 0x02 109 #define CMD_WRITE_SAFE 0x02
110 #define CMD_WARNED 0x04 110 #define CMD_WARNED 0x04
111 #define safe_for_read(cmd) [cmd] = CMD_READ_SAFE 111 #define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
112 #define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE 112 #define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
113 113
114 static int verify_command(struct file *file, unsigned char *cmd) 114 static int verify_command(struct file *file, unsigned char *cmd)
115 { 115 {
116 static unsigned char cmd_type[256] = { 116 static unsigned char cmd_type[256] = {
117 117
118 /* Basic read-only commands */ 118 /* Basic read-only commands */
119 safe_for_read(TEST_UNIT_READY), 119 safe_for_read(TEST_UNIT_READY),
120 safe_for_read(REQUEST_SENSE), 120 safe_for_read(REQUEST_SENSE),
121 safe_for_read(READ_6), 121 safe_for_read(READ_6),
122 safe_for_read(READ_10), 122 safe_for_read(READ_10),
123 safe_for_read(READ_12), 123 safe_for_read(READ_12),
124 safe_for_read(READ_16), 124 safe_for_read(READ_16),
125 safe_for_read(READ_BUFFER), 125 safe_for_read(READ_BUFFER),
126 safe_for_read(READ_DEFECT_DATA), 126 safe_for_read(READ_DEFECT_DATA),
127 safe_for_read(READ_LONG), 127 safe_for_read(READ_LONG),
128 safe_for_read(INQUIRY), 128 safe_for_read(INQUIRY),
129 safe_for_read(MODE_SENSE), 129 safe_for_read(MODE_SENSE),
130 safe_for_read(MODE_SENSE_10), 130 safe_for_read(MODE_SENSE_10),
131 safe_for_read(LOG_SENSE), 131 safe_for_read(LOG_SENSE),
132 safe_for_read(START_STOP), 132 safe_for_read(START_STOP),
133 safe_for_read(GPCMD_VERIFY_10), 133 safe_for_read(GPCMD_VERIFY_10),
134 safe_for_read(VERIFY_16), 134 safe_for_read(VERIFY_16),
135 135
136 /* Audio CD commands */ 136 /* Audio CD commands */
137 safe_for_read(GPCMD_PLAY_CD), 137 safe_for_read(GPCMD_PLAY_CD),
138 safe_for_read(GPCMD_PLAY_AUDIO_10), 138 safe_for_read(GPCMD_PLAY_AUDIO_10),
139 safe_for_read(GPCMD_PLAY_AUDIO_MSF), 139 safe_for_read(GPCMD_PLAY_AUDIO_MSF),
140 safe_for_read(GPCMD_PLAY_AUDIO_TI), 140 safe_for_read(GPCMD_PLAY_AUDIO_TI),
141 safe_for_read(GPCMD_PAUSE_RESUME), 141 safe_for_read(GPCMD_PAUSE_RESUME),
142 142
143 /* CD/DVD data reading */ 143 /* CD/DVD data reading */
144 safe_for_read(GPCMD_READ_BUFFER_CAPACITY), 144 safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
145 safe_for_read(GPCMD_READ_CD), 145 safe_for_read(GPCMD_READ_CD),
146 safe_for_read(GPCMD_READ_CD_MSF), 146 safe_for_read(GPCMD_READ_CD_MSF),
147 safe_for_read(GPCMD_READ_DISC_INFO), 147 safe_for_read(GPCMD_READ_DISC_INFO),
148 safe_for_read(GPCMD_READ_CDVD_CAPACITY), 148 safe_for_read(GPCMD_READ_CDVD_CAPACITY),
149 safe_for_read(GPCMD_READ_DVD_STRUCTURE), 149 safe_for_read(GPCMD_READ_DVD_STRUCTURE),
150 safe_for_read(GPCMD_READ_HEADER), 150 safe_for_read(GPCMD_READ_HEADER),
151 safe_for_read(GPCMD_READ_TRACK_RZONE_INFO), 151 safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
152 safe_for_read(GPCMD_READ_SUBCHANNEL), 152 safe_for_read(GPCMD_READ_SUBCHANNEL),
153 safe_for_read(GPCMD_READ_TOC_PMA_ATIP), 153 safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
154 safe_for_read(GPCMD_REPORT_KEY), 154 safe_for_read(GPCMD_REPORT_KEY),
155 safe_for_read(GPCMD_SCAN), 155 safe_for_read(GPCMD_SCAN),
156 safe_for_read(GPCMD_GET_CONFIGURATION), 156 safe_for_read(GPCMD_GET_CONFIGURATION),
157 safe_for_read(GPCMD_READ_FORMAT_CAPACITIES), 157 safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
158 safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION), 158 safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
159 safe_for_read(GPCMD_GET_PERFORMANCE), 159 safe_for_read(GPCMD_GET_PERFORMANCE),
160 safe_for_read(GPCMD_SEEK), 160 safe_for_read(GPCMD_SEEK),
161 safe_for_read(GPCMD_STOP_PLAY_SCAN), 161 safe_for_read(GPCMD_STOP_PLAY_SCAN),
162 162
163 /* Basic writing commands */ 163 /* Basic writing commands */
164 safe_for_write(WRITE_6), 164 safe_for_write(WRITE_6),
165 safe_for_write(WRITE_10), 165 safe_for_write(WRITE_10),
166 safe_for_write(WRITE_VERIFY), 166 safe_for_write(WRITE_VERIFY),
167 safe_for_write(WRITE_12), 167 safe_for_write(WRITE_12),
168 safe_for_write(WRITE_VERIFY_12), 168 safe_for_write(WRITE_VERIFY_12),
169 safe_for_write(WRITE_16), 169 safe_for_write(WRITE_16),
170 safe_for_write(WRITE_LONG), 170 safe_for_write(WRITE_LONG),
171 safe_for_write(WRITE_LONG_2), 171 safe_for_write(WRITE_LONG_2),
172 safe_for_write(ERASE), 172 safe_for_write(ERASE),
173 safe_for_write(GPCMD_MODE_SELECT_10), 173 safe_for_write(GPCMD_MODE_SELECT_10),
174 safe_for_write(MODE_SELECT), 174 safe_for_write(MODE_SELECT),
175 safe_for_write(LOG_SELECT), 175 safe_for_write(LOG_SELECT),
176 safe_for_write(GPCMD_BLANK), 176 safe_for_write(GPCMD_BLANK),
177 safe_for_write(GPCMD_CLOSE_TRACK), 177 safe_for_write(GPCMD_CLOSE_TRACK),
178 safe_for_write(GPCMD_FLUSH_CACHE), 178 safe_for_write(GPCMD_FLUSH_CACHE),
179 safe_for_write(GPCMD_FORMAT_UNIT), 179 safe_for_write(GPCMD_FORMAT_UNIT),
180 safe_for_write(GPCMD_REPAIR_RZONE_TRACK), 180 safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
181 safe_for_write(GPCMD_RESERVE_RZONE_TRACK), 181 safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
182 safe_for_write(GPCMD_SEND_DVD_STRUCTURE), 182 safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
183 safe_for_write(GPCMD_SEND_EVENT), 183 safe_for_write(GPCMD_SEND_EVENT),
184 safe_for_write(GPCMD_SEND_KEY), 184 safe_for_write(GPCMD_SEND_KEY),
185 safe_for_write(GPCMD_SEND_OPC), 185 safe_for_write(GPCMD_SEND_OPC),
186 safe_for_write(GPCMD_SEND_CUE_SHEET), 186 safe_for_write(GPCMD_SEND_CUE_SHEET),
187 safe_for_write(GPCMD_SET_SPEED), 187 safe_for_write(GPCMD_SET_SPEED),
188 safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL), 188 safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
189 safe_for_write(GPCMD_LOAD_UNLOAD), 189 safe_for_write(GPCMD_LOAD_UNLOAD),
190 safe_for_write(GPCMD_SET_STREAMING), 190 safe_for_write(GPCMD_SET_STREAMING),
191 }; 191 };
192 unsigned char type = cmd_type[cmd[0]]; 192 unsigned char type = cmd_type[cmd[0]];
193 193
194 /* Anybody who can open the device can do a read-safe command */ 194 /* Anybody who can open the device can do a read-safe command */
195 if (type & CMD_READ_SAFE) 195 if (type & CMD_READ_SAFE)
196 return 0; 196 return 0;
197 197
198 /* Write-safe commands just require a writable open.. */ 198 /* Write-safe commands just require a writable open.. */
199 if (type & CMD_WRITE_SAFE) { 199 if (type & CMD_WRITE_SAFE) {
200 if (file->f_mode & FMODE_WRITE) 200 if (file->f_mode & FMODE_WRITE)
201 return 0; 201 return 0;
202 } 202 }
203 203
204 /* And root can do any command.. */ 204 /* And root can do any command.. */
205 if (capable(CAP_SYS_RAWIO)) 205 if (capable(CAP_SYS_RAWIO))
206 return 0; 206 return 0;
207 207
208 if (!type) { 208 if (!type) {
209 cmd_type[cmd[0]] = CMD_WARNED; 209 cmd_type[cmd[0]] = CMD_WARNED;
210 printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]); 210 printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
211 } 211 }
212 212
213 /* Otherwise fail it with an "Operation not permitted" */ 213 /* Otherwise fail it with an "Operation not permitted" */
214 return -EPERM; 214 return -EPERM;
215 } 215 }
216 216
217 static int sg_io(struct file *file, request_queue_t *q, 217 static int sg_io(struct file *file, request_queue_t *q,
218 struct gendisk *bd_disk, struct sg_io_hdr *hdr) 218 struct gendisk *bd_disk, struct sg_io_hdr *hdr)
219 { 219 {
220 unsigned long start_time; 220 unsigned long start_time;
221 int writing = 0, ret = 0; 221 int writing = 0, ret = 0;
222 struct request *rq; 222 struct request *rq;
223 struct bio *bio; 223 struct bio *bio;
224 char sense[SCSI_SENSE_BUFFERSIZE]; 224 char sense[SCSI_SENSE_BUFFERSIZE];
225 unsigned char cmd[BLK_MAX_CDB]; 225 unsigned char cmd[BLK_MAX_CDB];
226 226
227 if (hdr->interface_id != 'S') 227 if (hdr->interface_id != 'S')
228 return -EINVAL; 228 return -EINVAL;
229 if (hdr->cmd_len > BLK_MAX_CDB) 229 if (hdr->cmd_len > BLK_MAX_CDB)
230 return -EINVAL; 230 return -EINVAL;
231 if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len)) 231 if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
232 return -EFAULT; 232 return -EFAULT;
233 if (verify_command(file, cmd)) 233 if (verify_command(file, cmd))
234 return -EPERM; 234 return -EPERM;
235 235
236 if (hdr->dxfer_len > (q->max_hw_sectors << 9)) 236 if (hdr->dxfer_len > (q->max_hw_sectors << 9))
237 return -EIO; 237 return -EIO;
238 238
239 if (hdr->dxfer_len) 239 if (hdr->dxfer_len)
240 switch (hdr->dxfer_direction) { 240 switch (hdr->dxfer_direction) {
241 default: 241 default:
242 return -EINVAL; 242 return -EINVAL;
243 case SG_DXFER_TO_FROM_DEV: 243 case SG_DXFER_TO_FROM_DEV:
244 case SG_DXFER_TO_DEV: 244 case SG_DXFER_TO_DEV:
245 writing = 1; 245 writing = 1;
246 break; 246 break;
247 case SG_DXFER_FROM_DEV: 247 case SG_DXFER_FROM_DEV:
248 break; 248 break;
249 } 249 }
250 250
251 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); 251 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
252 if (!rq) 252 if (!rq)
253 return -ENOMEM; 253 return -ENOMEM;
254 254
255 if (hdr->iovec_count) { 255 if (hdr->iovec_count) {
256 const int size = sizeof(struct sg_iovec) * hdr->iovec_count; 256 const int size = sizeof(struct sg_iovec) * hdr->iovec_count;
257 struct sg_iovec *iov; 257 struct sg_iovec *iov;
258 258
259 iov = kmalloc(size, GFP_KERNEL); 259 iov = kmalloc(size, GFP_KERNEL);
260 if (!iov) { 260 if (!iov) {
261 ret = -ENOMEM; 261 ret = -ENOMEM;
262 goto out; 262 goto out;
263 } 263 }
264 264
265 if (copy_from_user(iov, hdr->dxferp, size)) { 265 if (copy_from_user(iov, hdr->dxferp, size)) {
266 kfree(iov); 266 kfree(iov);
267 ret = -EFAULT; 267 ret = -EFAULT;
268 goto out; 268 goto out;
269 } 269 }
270 270
271 ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count); 271 ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count);
272 kfree(iov); 272 kfree(iov);
273 } else if (hdr->dxfer_len) 273 } else if (hdr->dxfer_len)
274 ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); 274 ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len);
275 275
276 if (ret) 276 if (ret)
277 goto out; 277 goto out;
278 278
279 /* 279 /*
280 * fill in request structure 280 * fill in request structure
281 */ 281 */
282 rq->cmd_len = hdr->cmd_len; 282 rq->cmd_len = hdr->cmd_len;
283 memcpy(rq->cmd, cmd, hdr->cmd_len); 283 memcpy(rq->cmd, cmd, hdr->cmd_len);
284 if (sizeof(rq->cmd) != hdr->cmd_len) 284 if (sizeof(rq->cmd) != hdr->cmd_len)
285 memset(rq->cmd + hdr->cmd_len, 0, sizeof(rq->cmd) - hdr->cmd_len); 285 memset(rq->cmd + hdr->cmd_len, 0, sizeof(rq->cmd) - hdr->cmd_len);
286 286
287 memset(sense, 0, sizeof(sense)); 287 memset(sense, 0, sizeof(sense));
288 rq->sense = sense; 288 rq->sense = sense;
289 rq->sense_len = 0; 289 rq->sense_len = 0;
290 290
291 rq->flags |= REQ_BLOCK_PC; 291 rq->flags |= REQ_BLOCK_PC;
292 bio = rq->bio; 292 bio = rq->bio;
293 293
294 /* 294 /*
295 * bounce this after holding a reference to the original bio, it's 295 * bounce this after holding a reference to the original bio, it's
296 * needed for proper unmapping 296 * needed for proper unmapping
297 */ 297 */
298 if (rq->bio) 298 if (rq->bio)
299 blk_queue_bounce(q, &rq->bio); 299 blk_queue_bounce(q, &rq->bio);
300 300
301 rq->timeout = (hdr->timeout * HZ) / 1000; 301 rq->timeout = (hdr->timeout * HZ) / 1000;
302 if (!rq->timeout) 302 if (!rq->timeout)
303 rq->timeout = q->sg_timeout; 303 rq->timeout = q->sg_timeout;
304 if (!rq->timeout) 304 if (!rq->timeout)
305 rq->timeout = BLK_DEFAULT_TIMEOUT; 305 rq->timeout = BLK_DEFAULT_TIMEOUT;
306 306
307 start_time = jiffies; 307 start_time = jiffies;
308 308
309 /* ignore return value. All information is passed back to caller 309 /* ignore return value. All information is passed back to caller
310 * (if he doesn't check that is his problem). 310 * (if he doesn't check that is his problem).
311 * N.B. a non-zero SCSI status is _not_ necessarily an error. 311 * N.B. a non-zero SCSI status is _not_ necessarily an error.
312 */ 312 */
313 blk_execute_rq(q, bd_disk, rq, 0); 313 blk_execute_rq(q, bd_disk, rq, 0);
314 314
315 /* write to all output members */ 315 /* write to all output members */
316 hdr->status = 0xff & rq->errors; 316 hdr->status = 0xff & rq->errors;
317 hdr->masked_status = status_byte(rq->errors); 317 hdr->masked_status = status_byte(rq->errors);
318 hdr->msg_status = msg_byte(rq->errors); 318 hdr->msg_status = msg_byte(rq->errors);
319 hdr->host_status = host_byte(rq->errors); 319 hdr->host_status = host_byte(rq->errors);
320 hdr->driver_status = driver_byte(rq->errors); 320 hdr->driver_status = driver_byte(rq->errors);
321 hdr->info = 0; 321 hdr->info = 0;
322 if (hdr->masked_status || hdr->host_status || hdr->driver_status) 322 if (hdr->masked_status || hdr->host_status || hdr->driver_status)
323 hdr->info |= SG_INFO_CHECK; 323 hdr->info |= SG_INFO_CHECK;
324 hdr->resid = rq->data_len; 324 hdr->resid = rq->data_len;
325 hdr->duration = ((jiffies - start_time) * 1000) / HZ; 325 hdr->duration = ((jiffies - start_time) * 1000) / HZ;
326 hdr->sb_len_wr = 0; 326 hdr->sb_len_wr = 0;
327 327
328 if (rq->sense_len && hdr->sbp) { 328 if (rq->sense_len && hdr->sbp) {
329 int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len); 329 int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len);
330 330
331 if (!copy_to_user(hdr->sbp, rq->sense, len)) 331 if (!copy_to_user(hdr->sbp, rq->sense, len))
332 hdr->sb_len_wr = len; 332 hdr->sb_len_wr = len;
333 } 333 }
334 334
335 if (blk_rq_unmap_user(bio, hdr->dxfer_len)) 335 if (blk_rq_unmap_user(bio, hdr->dxfer_len))
336 ret = -EFAULT; 336 ret = -EFAULT;
337 337
338 /* may not have succeeded, but output values written to control 338 /* may not have succeeded, but output values written to control
339 * structure (struct sg_io_hdr). */ 339 * structure (struct sg_io_hdr). */
340 out: 340 out:
341 blk_put_request(rq); 341 blk_put_request(rq);
342 return ret; 342 return ret;
343 } 343 }
344 344
345 #define OMAX_SB_LEN 16 /* For backward compatibility */ 345 #define OMAX_SB_LEN 16 /* For backward compatibility */
346 346
347 static int sg_scsi_ioctl(struct file *file, request_queue_t *q, 347 static int sg_scsi_ioctl(struct file *file, request_queue_t *q,
348 struct gendisk *bd_disk, Scsi_Ioctl_Command __user *sic) 348 struct gendisk *bd_disk, Scsi_Ioctl_Command __user *sic)
349 { 349 {
350 struct request *rq; 350 struct request *rq;
351 int err; 351 int err;
352 unsigned int in_len, out_len, bytes, opcode, cmdlen; 352 unsigned int in_len, out_len, bytes, opcode, cmdlen;
353 char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; 353 char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE];
354 354
355 /* 355 /*
356 * get in an out lengths, verify they don't exceed a page worth of data 356 * get in an out lengths, verify they don't exceed a page worth of data
357 */ 357 */
358 if (get_user(in_len, &sic->inlen)) 358 if (get_user(in_len, &sic->inlen))
359 return -EFAULT; 359 return -EFAULT;
360 if (get_user(out_len, &sic->outlen)) 360 if (get_user(out_len, &sic->outlen))
361 return -EFAULT; 361 return -EFAULT;
362 if (in_len > PAGE_SIZE || out_len > PAGE_SIZE) 362 if (in_len > PAGE_SIZE || out_len > PAGE_SIZE)
363 return -EINVAL; 363 return -EINVAL;
364 if (get_user(opcode, sic->data)) 364 if (get_user(opcode, sic->data))
365 return -EFAULT; 365 return -EFAULT;
366 366
367 bytes = max(in_len, out_len); 367 bytes = max(in_len, out_len);
368 if (bytes) { 368 if (bytes) {
369 buffer = kmalloc(bytes, q->bounce_gfp | GFP_USER| __GFP_NOWARN); 369 buffer = kmalloc(bytes, q->bounce_gfp | GFP_USER| __GFP_NOWARN);
370 if (!buffer) 370 if (!buffer)
371 return -ENOMEM; 371 return -ENOMEM;
372 372
373 memset(buffer, 0, bytes); 373 memset(buffer, 0, bytes);
374 } 374 }
375 375
376 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); 376 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
377 377
378 cmdlen = COMMAND_SIZE(opcode); 378 cmdlen = COMMAND_SIZE(opcode);
379 379
380 /* 380 /*
381 * get command and data to send to device, if any 381 * get command and data to send to device, if any
382 */ 382 */
383 err = -EFAULT; 383 err = -EFAULT;
384 rq->cmd_len = cmdlen; 384 rq->cmd_len = cmdlen;
385 if (copy_from_user(rq->cmd, sic->data, cmdlen)) 385 if (copy_from_user(rq->cmd, sic->data, cmdlen))
386 goto error; 386 goto error;
387 387
388 if (copy_from_user(buffer, sic->data + cmdlen, in_len)) 388 if (copy_from_user(buffer, sic->data + cmdlen, in_len))
389 goto error; 389 goto error;
390 390
391 err = verify_command(file, rq->cmd); 391 err = verify_command(file, rq->cmd);
392 if (err) 392 if (err)
393 goto error; 393 goto error;
394 394
395 switch (opcode) { 395 switch (opcode) {
396 case SEND_DIAGNOSTIC: 396 case SEND_DIAGNOSTIC:
397 case FORMAT_UNIT: 397 case FORMAT_UNIT:
398 rq->timeout = FORMAT_UNIT_TIMEOUT; 398 rq->timeout = FORMAT_UNIT_TIMEOUT;
399 break; 399 break;
400 case START_STOP: 400 case START_STOP:
401 rq->timeout = START_STOP_TIMEOUT; 401 rq->timeout = START_STOP_TIMEOUT;
402 break; 402 break;
403 case MOVE_MEDIUM: 403 case MOVE_MEDIUM:
404 rq->timeout = MOVE_MEDIUM_TIMEOUT; 404 rq->timeout = MOVE_MEDIUM_TIMEOUT;
405 break; 405 break;
406 case READ_ELEMENT_STATUS: 406 case READ_ELEMENT_STATUS:
407 rq->timeout = READ_ELEMENT_STATUS_TIMEOUT; 407 rq->timeout = READ_ELEMENT_STATUS_TIMEOUT;
408 break; 408 break;
409 case READ_DEFECT_DATA: 409 case READ_DEFECT_DATA:
410 rq->timeout = READ_DEFECT_DATA_TIMEOUT; 410 rq->timeout = READ_DEFECT_DATA_TIMEOUT;
411 break; 411 break;
412 default: 412 default:
413 rq->timeout = BLK_DEFAULT_TIMEOUT; 413 rq->timeout = BLK_DEFAULT_TIMEOUT;
414 break; 414 break;
415 } 415 }
416 416
417 memset(sense, 0, sizeof(sense)); 417 memset(sense, 0, sizeof(sense));
418 rq->sense = sense; 418 rq->sense = sense;
419 rq->sense_len = 0; 419 rq->sense_len = 0;
420 420
421 rq->data = buffer; 421 rq->data = buffer;
422 rq->data_len = bytes; 422 rq->data_len = bytes;
423 rq->flags |= REQ_BLOCK_PC; 423 rq->flags |= REQ_BLOCK_PC;
424 424
425 blk_execute_rq(q, bd_disk, rq, 0); 425 blk_execute_rq(q, bd_disk, rq, 0);
426 err = rq->errors & 0xff; /* only 8 bit SCSI status */ 426 err = rq->errors & 0xff; /* only 8 bit SCSI status */
427 if (err) { 427 if (err) {
428 if (rq->sense_len && rq->sense) { 428 if (rq->sense_len && rq->sense) {
429 bytes = (OMAX_SB_LEN > rq->sense_len) ? 429 bytes = (OMAX_SB_LEN > rq->sense_len) ?
430 rq->sense_len : OMAX_SB_LEN; 430 rq->sense_len : OMAX_SB_LEN;
431 if (copy_to_user(sic->data, rq->sense, bytes)) 431 if (copy_to_user(sic->data, rq->sense, bytes))
432 err = -EFAULT; 432 err = -EFAULT;
433 } 433 }
434 } else { 434 } else {
435 if (copy_to_user(sic->data, buffer, out_len)) 435 if (copy_to_user(sic->data, buffer, out_len))
436 err = -EFAULT; 436 err = -EFAULT;
437 } 437 }
438 438
439 error: 439 error:
440 kfree(buffer); 440 kfree(buffer);
441 blk_put_request(rq); 441 blk_put_request(rq);
442 return err; 442 return err;
443 } 443 }
444 444
445 445
446 /* Send basic block requests */ 446 /* Send basic block requests */
447 static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int cmd, int data) 447 static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int cmd, int data)
448 { 448 {
449 struct request *rq; 449 struct request *rq;
450 int err; 450 int err;
451 451
452 rq = blk_get_request(q, WRITE, __GFP_WAIT); 452 rq = blk_get_request(q, WRITE, __GFP_WAIT);
453 rq->flags |= REQ_BLOCK_PC; 453 rq->flags |= REQ_BLOCK_PC;
454 rq->data = NULL; 454 rq->data = NULL;
455 rq->data_len = 0; 455 rq->data_len = 0;
456 rq->timeout = BLK_DEFAULT_TIMEOUT; 456 rq->timeout = BLK_DEFAULT_TIMEOUT;
457 memset(rq->cmd, 0, sizeof(rq->cmd)); 457 memset(rq->cmd, 0, sizeof(rq->cmd));
458 rq->cmd[0] = cmd; 458 rq->cmd[0] = cmd;
459 rq->cmd[4] = data; 459 rq->cmd[4] = data;
460 rq->cmd_len = 6; 460 rq->cmd_len = 6;
461 err = blk_execute_rq(q, bd_disk, rq, 0); 461 err = blk_execute_rq(q, bd_disk, rq, 0);
462 blk_put_request(rq); 462 blk_put_request(rq);
463 463
464 return err; 464 return err;
465 } 465 }
466 466
467 static inline int blk_send_start_stop(request_queue_t *q, struct gendisk *bd_disk, int data) 467 static inline int blk_send_start_stop(request_queue_t *q, struct gendisk *bd_disk, int data)
468 { 468 {
469 return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data); 469 return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data);
470 } 470 }
471 471
472 int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg) 472 int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg)
473 { 473 {
474 request_queue_t *q; 474 request_queue_t *q;
475 int err; 475 int err;
476 476
477 q = bd_disk->queue; 477 q = bd_disk->queue;
478 if (!q) 478 if (!q)
479 return -ENXIO; 479 return -ENXIO;
480 480
481 if (blk_get_queue(q)) 481 if (blk_get_queue(q))
482 return -ENXIO; 482 return -ENXIO;
483 483
484 switch (cmd) { 484 switch (cmd) {
485 /* 485 /*
486 * new sgv3 interface 486 * new sgv3 interface
487 */ 487 */
488 case SG_GET_VERSION_NUM: 488 case SG_GET_VERSION_NUM:
489 err = sg_get_version(arg); 489 err = sg_get_version(arg);
490 break; 490 break;
491 case SCSI_IOCTL_GET_IDLUN: 491 case SCSI_IOCTL_GET_IDLUN:
492 err = scsi_get_idlun(q, arg); 492 err = scsi_get_idlun(q, arg);
493 break; 493 break;
494 case SCSI_IOCTL_GET_BUS_NUMBER: 494 case SCSI_IOCTL_GET_BUS_NUMBER:
495 err = scsi_get_bus(q, arg); 495 err = scsi_get_bus(q, arg);
496 break; 496 break;
497 case SG_SET_TIMEOUT: 497 case SG_SET_TIMEOUT:
498 err = sg_set_timeout(q, arg); 498 err = sg_set_timeout(q, arg);
499 break; 499 break;
500 case SG_GET_TIMEOUT: 500 case SG_GET_TIMEOUT:
501 err = sg_get_timeout(q); 501 err = sg_get_timeout(q);
502 break; 502 break;
503 case SG_GET_RESERVED_SIZE: 503 case SG_GET_RESERVED_SIZE:
504 err = sg_get_reserved_size(q, arg); 504 err = sg_get_reserved_size(q, arg);
505 break; 505 break;
506 case SG_SET_RESERVED_SIZE: 506 case SG_SET_RESERVED_SIZE:
507 err = sg_set_reserved_size(q, arg); 507 err = sg_set_reserved_size(q, arg);
508 break; 508 break;
509 case SG_EMULATED_HOST: 509 case SG_EMULATED_HOST:
510 err = sg_emulated_host(q, arg); 510 err = sg_emulated_host(q, arg);
511 break; 511 break;
512 case SG_IO: { 512 case SG_IO: {
513 struct sg_io_hdr hdr; 513 struct sg_io_hdr hdr;
514 514
515 err = -EFAULT; 515 err = -EFAULT;
516 if (copy_from_user(&hdr, arg, sizeof(hdr))) 516 if (copy_from_user(&hdr, arg, sizeof(hdr)))
517 break; 517 break;
518 err = sg_io(file, q, bd_disk, &hdr); 518 err = sg_io(file, q, bd_disk, &hdr);
519 if (err == -EFAULT) 519 if (err == -EFAULT)
520 break; 520 break;
521 521
522 if (copy_to_user(arg, &hdr, sizeof(hdr))) 522 if (copy_to_user(arg, &hdr, sizeof(hdr)))
523 err = -EFAULT; 523 err = -EFAULT;
524 break; 524 break;
525 } 525 }
526 case CDROM_SEND_PACKET: { 526 case CDROM_SEND_PACKET: {
527 struct cdrom_generic_command cgc; 527 struct cdrom_generic_command cgc;
528 struct sg_io_hdr hdr; 528 struct sg_io_hdr hdr;
529 529
530 err = -EFAULT; 530 err = -EFAULT;
531 if (copy_from_user(&cgc, arg, sizeof(cgc))) 531 if (copy_from_user(&cgc, arg, sizeof(cgc)))
532 break; 532 break;
533 cgc.timeout = clock_t_to_jiffies(cgc.timeout); 533 cgc.timeout = clock_t_to_jiffies(cgc.timeout);
534 memset(&hdr, 0, sizeof(hdr)); 534 memset(&hdr, 0, sizeof(hdr));
535 hdr.interface_id = 'S'; 535 hdr.interface_id = 'S';
536 hdr.cmd_len = sizeof(cgc.cmd); 536 hdr.cmd_len = sizeof(cgc.cmd);
537 hdr.dxfer_len = cgc.buflen; 537 hdr.dxfer_len = cgc.buflen;
538 err = 0; 538 err = 0;
539 switch (cgc.data_direction) { 539 switch (cgc.data_direction) {
540 case CGC_DATA_UNKNOWN: 540 case CGC_DATA_UNKNOWN:
541 hdr.dxfer_direction = SG_DXFER_UNKNOWN; 541 hdr.dxfer_direction = SG_DXFER_UNKNOWN;
542 break; 542 break;
543 case CGC_DATA_WRITE: 543 case CGC_DATA_WRITE:
544 hdr.dxfer_direction = SG_DXFER_TO_DEV; 544 hdr.dxfer_direction = SG_DXFER_TO_DEV;
545 break; 545 break;
546 case CGC_DATA_READ: 546 case CGC_DATA_READ:
547 hdr.dxfer_direction = SG_DXFER_FROM_DEV; 547 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
548 break; 548 break;
549 case CGC_DATA_NONE: 549 case CGC_DATA_NONE:
550 hdr.dxfer_direction = SG_DXFER_NONE; 550 hdr.dxfer_direction = SG_DXFER_NONE;
551 break; 551 break;
552 default: 552 default:
553 err = -EINVAL; 553 err = -EINVAL;
554 } 554 }
555 if (err) 555 if (err)
556 break; 556 break;
557 557
558 hdr.dxferp = cgc.buffer; 558 hdr.dxferp = cgc.buffer;
559 hdr.sbp = cgc.sense; 559 hdr.sbp = cgc.sense;
560 if (hdr.sbp) 560 if (hdr.sbp)
561 hdr.mx_sb_len = sizeof(struct request_sense); 561 hdr.mx_sb_len = sizeof(struct request_sense);
562 hdr.timeout = cgc.timeout; 562 hdr.timeout = cgc.timeout;
563 hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd; 563 hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd;
564 hdr.cmd_len = sizeof(cgc.cmd); 564 hdr.cmd_len = sizeof(cgc.cmd);
565 565
566 err = sg_io(file, q, bd_disk, &hdr); 566 err = sg_io(file, q, bd_disk, &hdr);
567 if (err == -EFAULT) 567 if (err == -EFAULT)
568 break; 568 break;
569 569
570 if (hdr.status) 570 if (hdr.status)
571 err = -EIO; 571 err = -EIO;
572 572
573 cgc.stat = err; 573 cgc.stat = err;
574 cgc.buflen = hdr.resid; 574 cgc.buflen = hdr.resid;
575 if (copy_to_user(arg, &cgc, sizeof(cgc))) 575 if (copy_to_user(arg, &cgc, sizeof(cgc)))
576 err = -EFAULT; 576 err = -EFAULT;
577 577
578 break; 578 break;
579 } 579 }
580 580
581 /* 581 /*
582 * old junk scsi send command ioctl 582 * old junk scsi send command ioctl
583 */ 583 */
584 case SCSI_IOCTL_SEND_COMMAND: 584 case SCSI_IOCTL_SEND_COMMAND:
585 printk(KERN_WARNING "program %s is using a deprecated SCSI ioctl, please convert it to SG_IO\n", current->comm); 585 printk(KERN_WARNING "program %s is using a deprecated SCSI ioctl, please convert it to SG_IO\n", current->comm);
586 err = -EINVAL; 586 err = -EINVAL;
587 if (!arg) 587 if (!arg)
588 break; 588 break;
589 589
590 err = sg_scsi_ioctl(file, q, bd_disk, arg); 590 err = sg_scsi_ioctl(file, q, bd_disk, arg);
591 break; 591 break;
592 case CDROMCLOSETRAY: 592 case CDROMCLOSETRAY:
593 err = blk_send_start_stop(q, bd_disk, 0x03); 593 err = blk_send_start_stop(q, bd_disk, 0x03);
594 break; 594 break;
595 case CDROMEJECT: 595 case CDROMEJECT:
596 err = blk_send_start_stop(q, bd_disk, 0x02); 596 err = blk_send_start_stop(q, bd_disk, 0x02);
597 break; 597 break;
598 default: 598 default:
599 err = -ENOTTY; 599 err = -ENOTTY;
600 } 600 }
601 601
602 blk_put_queue(q); 602 blk_put_queue(q);
603 return err; 603 return err;
604 } 604 }
605 605
606 EXPORT_SYMBOL(scsi_cmd_ioctl); 606 EXPORT_SYMBOL(scsi_cmd_ioctl);
607 607