Commit e572ec7e4e432de7ecf7bd2e62117646fa64e518

Authored by Al Viro
1 parent 3d1ab40f4c

[PATCH] fix rmmod problems with elevator attributes, clean them up

Showing 5 changed files with 74 additions and 175 deletions Inline Diff

1 /* 1 /*
2 * Anticipatory & deadline i/o scheduler. 2 * Anticipatory & deadline i/o scheduler.
3 * 3 *
4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
5 * Nick Piggin <nickpiggin@yahoo.com.au> 5 * Nick Piggin <nickpiggin@yahoo.com.au>
6 * 6 *
7 */ 7 */
8 #include <linux/kernel.h> 8 #include <linux/kernel.h>
9 #include <linux/fs.h> 9 #include <linux/fs.h>
10 #include <linux/blkdev.h> 10 #include <linux/blkdev.h>
11 #include <linux/elevator.h> 11 #include <linux/elevator.h>
12 #include <linux/bio.h> 12 #include <linux/bio.h>
13 #include <linux/config.h> 13 #include <linux/config.h>
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/slab.h> 15 #include <linux/slab.h>
16 #include <linux/init.h> 16 #include <linux/init.h>
17 #include <linux/compiler.h> 17 #include <linux/compiler.h>
18 #include <linux/hash.h> 18 #include <linux/hash.h>
19 #include <linux/rbtree.h> 19 #include <linux/rbtree.h>
20 #include <linux/interrupt.h> 20 #include <linux/interrupt.h>
21 21
22 #define REQ_SYNC 1 22 #define REQ_SYNC 1
23 #define REQ_ASYNC 0 23 #define REQ_ASYNC 0
24 24
25 /* 25 /*
26 * See Documentation/block/as-iosched.txt 26 * See Documentation/block/as-iosched.txt
27 */ 27 */
28 28
29 /* 29 /*
30 * max time before a read is submitted. 30 * max time before a read is submitted.
31 */ 31 */
32 #define default_read_expire (HZ / 8) 32 #define default_read_expire (HZ / 8)
33 33
34 /* 34 /*
35 * ditto for writes, these limits are not hard, even 35 * ditto for writes, these limits are not hard, even
36 * if the disk is capable of satisfying them. 36 * if the disk is capable of satisfying them.
37 */ 37 */
38 #define default_write_expire (HZ / 4) 38 #define default_write_expire (HZ / 4)
39 39
40 /* 40 /*
41 * read_batch_expire describes how long we will allow a stream of reads to 41 * read_batch_expire describes how long we will allow a stream of reads to
42 * persist before looking to see whether it is time to switch over to writes. 42 * persist before looking to see whether it is time to switch over to writes.
43 */ 43 */
44 #define default_read_batch_expire (HZ / 2) 44 #define default_read_batch_expire (HZ / 2)
45 45
46 /* 46 /*
47 * write_batch_expire describes how long we want a stream of writes to run for. 47 * write_batch_expire describes how long we want a stream of writes to run for.
48 * This is not a hard limit, but a target we set for the auto-tuning thingy. 48 * This is not a hard limit, but a target we set for the auto-tuning thingy.
49 * See, the problem is: we can send a lot of writes to disk cache / TCQ in 49 * See, the problem is: we can send a lot of writes to disk cache / TCQ in
50 * a short amount of time... 50 * a short amount of time...
51 */ 51 */
52 #define default_write_batch_expire (HZ / 8) 52 #define default_write_batch_expire (HZ / 8)
53 53
54 /* 54 /*
55 * max time we may wait to anticipate a read (default around 6ms) 55 * max time we may wait to anticipate a read (default around 6ms)
56 */ 56 */
57 #define default_antic_expire ((HZ / 150) ? HZ / 150 : 1) 57 #define default_antic_expire ((HZ / 150) ? HZ / 150 : 1)
58 58
59 /* 59 /*
60 * Keep track of up to 20ms thinktimes. We can go as big as we like here, 60 * Keep track of up to 20ms thinktimes. We can go as big as we like here,
61 * however huge values tend to interfere and not decay fast enough. A program 61 * however huge values tend to interfere and not decay fast enough. A program
62 * might be in a non-io phase of operation. Waiting on user input for example, 62 * might be in a non-io phase of operation. Waiting on user input for example,
63 * or doing a lengthy computation. A small penalty can be justified there, and 63 * or doing a lengthy computation. A small penalty can be justified there, and
64 * will still catch out those processes that constantly have large thinktimes. 64 * will still catch out those processes that constantly have large thinktimes.
65 */ 65 */
66 #define MAX_THINKTIME (HZ/50UL) 66 #define MAX_THINKTIME (HZ/50UL)
67 67
68 /* Bits in as_io_context.state */ 68 /* Bits in as_io_context.state */
69 enum as_io_states { 69 enum as_io_states {
70 AS_TASK_RUNNING=0, /* Process has not exited */ 70 AS_TASK_RUNNING=0, /* Process has not exited */
71 AS_TASK_IOSTARTED, /* Process has started some IO */ 71 AS_TASK_IOSTARTED, /* Process has started some IO */
72 AS_TASK_IORUNNING, /* Process has completed some IO */ 72 AS_TASK_IORUNNING, /* Process has completed some IO */
73 }; 73 };
74 74
75 enum anticipation_status { 75 enum anticipation_status {
76 ANTIC_OFF=0, /* Not anticipating (normal operation) */ 76 ANTIC_OFF=0, /* Not anticipating (normal operation) */
77 ANTIC_WAIT_REQ, /* The last read has not yet completed */ 77 ANTIC_WAIT_REQ, /* The last read has not yet completed */
78 ANTIC_WAIT_NEXT, /* Currently anticipating a request vs 78 ANTIC_WAIT_NEXT, /* Currently anticipating a request vs
79 last read (which has completed) */ 79 last read (which has completed) */
80 ANTIC_FINISHED, /* Anticipating but have found a candidate 80 ANTIC_FINISHED, /* Anticipating but have found a candidate
81 * or timed out */ 81 * or timed out */
82 }; 82 };
83 83
84 struct as_data { 84 struct as_data {
85 /* 85 /*
86 * run time data 86 * run time data
87 */ 87 */
88 88
89 struct request_queue *q; /* the "owner" queue */ 89 struct request_queue *q; /* the "owner" queue */
90 90
91 /* 91 /*
92 * requests (as_rq s) are present on both sort_list and fifo_list 92 * requests (as_rq s) are present on both sort_list and fifo_list
93 */ 93 */
94 struct rb_root sort_list[2]; 94 struct rb_root sort_list[2];
95 struct list_head fifo_list[2]; 95 struct list_head fifo_list[2];
96 96
97 struct as_rq *next_arq[2]; /* next in sort order */ 97 struct as_rq *next_arq[2]; /* next in sort order */
98 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ 98 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
99 struct list_head *hash; /* request hash */ 99 struct list_head *hash; /* request hash */
100 100
101 unsigned long exit_prob; /* probability a task will exit while 101 unsigned long exit_prob; /* probability a task will exit while
102 being waited on */ 102 being waited on */
103 unsigned long exit_no_coop; /* probablility an exited task will 103 unsigned long exit_no_coop; /* probablility an exited task will
104 not be part of a later cooperating 104 not be part of a later cooperating
105 request */ 105 request */
106 unsigned long new_ttime_total; /* mean thinktime on new proc */ 106 unsigned long new_ttime_total; /* mean thinktime on new proc */
107 unsigned long new_ttime_mean; 107 unsigned long new_ttime_mean;
108 u64 new_seek_total; /* mean seek on new proc */ 108 u64 new_seek_total; /* mean seek on new proc */
109 sector_t new_seek_mean; 109 sector_t new_seek_mean;
110 110
111 unsigned long current_batch_expires; 111 unsigned long current_batch_expires;
112 unsigned long last_check_fifo[2]; 112 unsigned long last_check_fifo[2];
113 int changed_batch; /* 1: waiting for old batch to end */ 113 int changed_batch; /* 1: waiting for old batch to end */
114 int new_batch; /* 1: waiting on first read complete */ 114 int new_batch; /* 1: waiting on first read complete */
115 int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */ 115 int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */
116 int write_batch_count; /* max # of reqs in a write batch */ 116 int write_batch_count; /* max # of reqs in a write batch */
117 int current_write_count; /* how many requests left this batch */ 117 int current_write_count; /* how many requests left this batch */
118 int write_batch_idled; /* has the write batch gone idle? */ 118 int write_batch_idled; /* has the write batch gone idle? */
119 mempool_t *arq_pool; 119 mempool_t *arq_pool;
120 120
121 enum anticipation_status antic_status; 121 enum anticipation_status antic_status;
122 unsigned long antic_start; /* jiffies: when it started */ 122 unsigned long antic_start; /* jiffies: when it started */
123 struct timer_list antic_timer; /* anticipatory scheduling timer */ 123 struct timer_list antic_timer; /* anticipatory scheduling timer */
124 struct work_struct antic_work; /* Deferred unplugging */ 124 struct work_struct antic_work; /* Deferred unplugging */
125 struct io_context *io_context; /* Identify the expected process */ 125 struct io_context *io_context; /* Identify the expected process */
126 int ioc_finished; /* IO associated with io_context is finished */ 126 int ioc_finished; /* IO associated with io_context is finished */
127 int nr_dispatched; 127 int nr_dispatched;
128 128
129 /* 129 /*
130 * settings that change how the i/o scheduler behaves 130 * settings that change how the i/o scheduler behaves
131 */ 131 */
132 unsigned long fifo_expire[2]; 132 unsigned long fifo_expire[2];
133 unsigned long batch_expire[2]; 133 unsigned long batch_expire[2];
134 unsigned long antic_expire; 134 unsigned long antic_expire;
135 }; 135 };
136 136
137 #define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo) 137 #define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)
138 138
139 /* 139 /*
140 * per-request data. 140 * per-request data.
141 */ 141 */
142 enum arq_state { 142 enum arq_state {
143 AS_RQ_NEW=0, /* New - not referenced and not on any lists */ 143 AS_RQ_NEW=0, /* New - not referenced and not on any lists */
144 AS_RQ_QUEUED, /* In the request queue. It belongs to the 144 AS_RQ_QUEUED, /* In the request queue. It belongs to the
145 scheduler */ 145 scheduler */
146 AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the 146 AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the
147 driver now */ 147 driver now */
148 AS_RQ_PRESCHED, /* Debug poisoning for requests being used */ 148 AS_RQ_PRESCHED, /* Debug poisoning for requests being used */
149 AS_RQ_REMOVED, 149 AS_RQ_REMOVED,
150 AS_RQ_MERGED, 150 AS_RQ_MERGED,
151 AS_RQ_POSTSCHED, /* when they shouldn't be */ 151 AS_RQ_POSTSCHED, /* when they shouldn't be */
152 }; 152 };
153 153
154 struct as_rq { 154 struct as_rq {
155 /* 155 /*
156 * rbtree index, key is the starting offset 156 * rbtree index, key is the starting offset
157 */ 157 */
158 struct rb_node rb_node; 158 struct rb_node rb_node;
159 sector_t rb_key; 159 sector_t rb_key;
160 160
161 struct request *request; 161 struct request *request;
162 162
163 struct io_context *io_context; /* The submitting task */ 163 struct io_context *io_context; /* The submitting task */
164 164
165 /* 165 /*
166 * request hash, key is the ending offset (for back merge lookup) 166 * request hash, key is the ending offset (for back merge lookup)
167 */ 167 */
168 struct list_head hash; 168 struct list_head hash;
169 unsigned int on_hash; 169 unsigned int on_hash;
170 170
171 /* 171 /*
172 * expire fifo 172 * expire fifo
173 */ 173 */
174 struct list_head fifo; 174 struct list_head fifo;
175 unsigned long expires; 175 unsigned long expires;
176 176
177 unsigned int is_sync; 177 unsigned int is_sync;
178 enum arq_state state; 178 enum arq_state state;
179 }; 179 };
180 180
181 #define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private) 181 #define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private)
182 182
183 static kmem_cache_t *arq_pool; 183 static kmem_cache_t *arq_pool;
184 184
185 static atomic_t ioc_count = ATOMIC_INIT(0); 185 static atomic_t ioc_count = ATOMIC_INIT(0);
186 static struct completion *ioc_gone; 186 static struct completion *ioc_gone;
187 187
188 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); 188 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq);
189 static void as_antic_stop(struct as_data *ad); 189 static void as_antic_stop(struct as_data *ad);
190 190
191 /* 191 /*
192 * IO Context helper functions 192 * IO Context helper functions
193 */ 193 */
194 194
195 /* Called to deallocate the as_io_context */ 195 /* Called to deallocate the as_io_context */
196 static void free_as_io_context(struct as_io_context *aic) 196 static void free_as_io_context(struct as_io_context *aic)
197 { 197 {
198 kfree(aic); 198 kfree(aic);
199 if (atomic_dec_and_test(&ioc_count) && ioc_gone) 199 if (atomic_dec_and_test(&ioc_count) && ioc_gone)
200 complete(ioc_gone); 200 complete(ioc_gone);
201 } 201 }
202 202
203 static void as_trim(struct io_context *ioc) 203 static void as_trim(struct io_context *ioc)
204 { 204 {
205 if (ioc->aic) 205 if (ioc->aic)
206 free_as_io_context(ioc->aic); 206 free_as_io_context(ioc->aic);
207 ioc->aic = NULL; 207 ioc->aic = NULL;
208 } 208 }
209 209
210 /* Called when the task exits */ 210 /* Called when the task exits */
211 static void exit_as_io_context(struct as_io_context *aic) 211 static void exit_as_io_context(struct as_io_context *aic)
212 { 212 {
213 WARN_ON(!test_bit(AS_TASK_RUNNING, &aic->state)); 213 WARN_ON(!test_bit(AS_TASK_RUNNING, &aic->state));
214 clear_bit(AS_TASK_RUNNING, &aic->state); 214 clear_bit(AS_TASK_RUNNING, &aic->state);
215 } 215 }
216 216
217 static struct as_io_context *alloc_as_io_context(void) 217 static struct as_io_context *alloc_as_io_context(void)
218 { 218 {
219 struct as_io_context *ret; 219 struct as_io_context *ret;
220 220
221 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 221 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
222 if (ret) { 222 if (ret) {
223 ret->dtor = free_as_io_context; 223 ret->dtor = free_as_io_context;
224 ret->exit = exit_as_io_context; 224 ret->exit = exit_as_io_context;
225 ret->state = 1 << AS_TASK_RUNNING; 225 ret->state = 1 << AS_TASK_RUNNING;
226 atomic_set(&ret->nr_queued, 0); 226 atomic_set(&ret->nr_queued, 0);
227 atomic_set(&ret->nr_dispatched, 0); 227 atomic_set(&ret->nr_dispatched, 0);
228 spin_lock_init(&ret->lock); 228 spin_lock_init(&ret->lock);
229 ret->ttime_total = 0; 229 ret->ttime_total = 0;
230 ret->ttime_samples = 0; 230 ret->ttime_samples = 0;
231 ret->ttime_mean = 0; 231 ret->ttime_mean = 0;
232 ret->seek_total = 0; 232 ret->seek_total = 0;
233 ret->seek_samples = 0; 233 ret->seek_samples = 0;
234 ret->seek_mean = 0; 234 ret->seek_mean = 0;
235 atomic_inc(&ioc_count); 235 atomic_inc(&ioc_count);
236 } 236 }
237 237
238 return ret; 238 return ret;
239 } 239 }
240 240
241 /* 241 /*
242 * If the current task has no AS IO context then create one and initialise it. 242 * If the current task has no AS IO context then create one and initialise it.
243 * Then take a ref on the task's io context and return it. 243 * Then take a ref on the task's io context and return it.
244 */ 244 */
245 static struct io_context *as_get_io_context(void) 245 static struct io_context *as_get_io_context(void)
246 { 246 {
247 struct io_context *ioc = get_io_context(GFP_ATOMIC); 247 struct io_context *ioc = get_io_context(GFP_ATOMIC);
248 if (ioc && !ioc->aic) { 248 if (ioc && !ioc->aic) {
249 ioc->aic = alloc_as_io_context(); 249 ioc->aic = alloc_as_io_context();
250 if (!ioc->aic) { 250 if (!ioc->aic) {
251 put_io_context(ioc); 251 put_io_context(ioc);
252 ioc = NULL; 252 ioc = NULL;
253 } 253 }
254 } 254 }
255 return ioc; 255 return ioc;
256 } 256 }
257 257
258 static void as_put_io_context(struct as_rq *arq) 258 static void as_put_io_context(struct as_rq *arq)
259 { 259 {
260 struct as_io_context *aic; 260 struct as_io_context *aic;
261 261
262 if (unlikely(!arq->io_context)) 262 if (unlikely(!arq->io_context))
263 return; 263 return;
264 264
265 aic = arq->io_context->aic; 265 aic = arq->io_context->aic;
266 266
267 if (arq->is_sync == REQ_SYNC && aic) { 267 if (arq->is_sync == REQ_SYNC && aic) {
268 spin_lock(&aic->lock); 268 spin_lock(&aic->lock);
269 set_bit(AS_TASK_IORUNNING, &aic->state); 269 set_bit(AS_TASK_IORUNNING, &aic->state);
270 aic->last_end_request = jiffies; 270 aic->last_end_request = jiffies;
271 spin_unlock(&aic->lock); 271 spin_unlock(&aic->lock);
272 } 272 }
273 273
274 put_io_context(arq->io_context); 274 put_io_context(arq->io_context);
275 } 275 }
276 276
277 /* 277 /*
278 * the back merge hash support functions 278 * the back merge hash support functions
279 */ 279 */
280 static const int as_hash_shift = 6; 280 static const int as_hash_shift = 6;
281 #define AS_HASH_BLOCK(sec) ((sec) >> 3) 281 #define AS_HASH_BLOCK(sec) ((sec) >> 3)
282 #define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift)) 282 #define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
283 #define AS_HASH_ENTRIES (1 << as_hash_shift) 283 #define AS_HASH_ENTRIES (1 << as_hash_shift)
284 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 284 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
285 #define list_entry_hash(ptr) list_entry((ptr), struct as_rq, hash) 285 #define list_entry_hash(ptr) list_entry((ptr), struct as_rq, hash)
286 286
287 static inline void __as_del_arq_hash(struct as_rq *arq) 287 static inline void __as_del_arq_hash(struct as_rq *arq)
288 { 288 {
289 arq->on_hash = 0; 289 arq->on_hash = 0;
290 list_del_init(&arq->hash); 290 list_del_init(&arq->hash);
291 } 291 }
292 292
293 static inline void as_del_arq_hash(struct as_rq *arq) 293 static inline void as_del_arq_hash(struct as_rq *arq)
294 { 294 {
295 if (arq->on_hash) 295 if (arq->on_hash)
296 __as_del_arq_hash(arq); 296 __as_del_arq_hash(arq);
297 } 297 }
298 298
299 static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq) 299 static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
300 { 300 {
301 struct request *rq = arq->request; 301 struct request *rq = arq->request;
302 302
303 BUG_ON(arq->on_hash); 303 BUG_ON(arq->on_hash);
304 304
305 arq->on_hash = 1; 305 arq->on_hash = 1;
306 list_add(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]); 306 list_add(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
307 } 307 }
308 308
309 /* 309 /*
310 * move hot entry to front of chain 310 * move hot entry to front of chain
311 */ 311 */
312 static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq) 312 static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq)
313 { 313 {
314 struct request *rq = arq->request; 314 struct request *rq = arq->request;
315 struct list_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))]; 315 struct list_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
316 316
317 if (!arq->on_hash) { 317 if (!arq->on_hash) {
318 WARN_ON(1); 318 WARN_ON(1);
319 return; 319 return;
320 } 320 }
321 321
322 if (arq->hash.prev != head) { 322 if (arq->hash.prev != head) {
323 list_del(&arq->hash); 323 list_del(&arq->hash);
324 list_add(&arq->hash, head); 324 list_add(&arq->hash, head);
325 } 325 }
326 } 326 }
327 327
328 static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset) 328 static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
329 { 329 {
330 struct list_head *hash_list = &ad->hash[AS_HASH_FN(offset)]; 330 struct list_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
331 struct list_head *entry, *next = hash_list->next; 331 struct list_head *entry, *next = hash_list->next;
332 332
333 while ((entry = next) != hash_list) { 333 while ((entry = next) != hash_list) {
334 struct as_rq *arq = list_entry_hash(entry); 334 struct as_rq *arq = list_entry_hash(entry);
335 struct request *__rq = arq->request; 335 struct request *__rq = arq->request;
336 336
337 next = entry->next; 337 next = entry->next;
338 338
339 BUG_ON(!arq->on_hash); 339 BUG_ON(!arq->on_hash);
340 340
341 if (!rq_mergeable(__rq)) { 341 if (!rq_mergeable(__rq)) {
342 as_del_arq_hash(arq); 342 as_del_arq_hash(arq);
343 continue; 343 continue;
344 } 344 }
345 345
346 if (rq_hash_key(__rq) == offset) 346 if (rq_hash_key(__rq) == offset)
347 return __rq; 347 return __rq;
348 } 348 }
349 349
350 return NULL; 350 return NULL;
351 } 351 }
352 352
353 /* 353 /*
354 * rb tree support functions 354 * rb tree support functions
355 */ 355 */
356 #define RB_NONE (2) 356 #define RB_NONE (2)
357 #define RB_EMPTY(root) ((root)->rb_node == NULL) 357 #define RB_EMPTY(root) ((root)->rb_node == NULL)
358 #define ON_RB(node) ((node)->rb_color != RB_NONE) 358 #define ON_RB(node) ((node)->rb_color != RB_NONE)
359 #define RB_CLEAR(node) ((node)->rb_color = RB_NONE) 359 #define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
360 #define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node) 360 #define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node)
361 #define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync]) 361 #define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync])
362 #define rq_rb_key(rq) (rq)->sector 362 #define rq_rb_key(rq) (rq)->sector
363 363
364 /* 364 /*
365 * as_find_first_arq finds the first (lowest sector numbered) request 365 * as_find_first_arq finds the first (lowest sector numbered) request
366 * for the specified data_dir. Used to sweep back to the start of the disk 366 * for the specified data_dir. Used to sweep back to the start of the disk
367 * (1-way elevator) after we process the last (highest sector) request. 367 * (1-way elevator) after we process the last (highest sector) request.
368 */ 368 */
369 static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) 369 static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
370 { 370 {
371 struct rb_node *n = ad->sort_list[data_dir].rb_node; 371 struct rb_node *n = ad->sort_list[data_dir].rb_node;
372 372
373 if (n == NULL) 373 if (n == NULL)
374 return NULL; 374 return NULL;
375 375
376 for (;;) { 376 for (;;) {
377 if (n->rb_left == NULL) 377 if (n->rb_left == NULL)
378 return rb_entry_arq(n); 378 return rb_entry_arq(n);
379 379
380 n = n->rb_left; 380 n = n->rb_left;
381 } 381 }
382 } 382 }
383 383
384 /* 384 /*
385 * Add the request to the rb tree if it is unique. If there is an alias (an 385 * Add the request to the rb tree if it is unique. If there is an alias (an
386 * existing request against the same sector), which can happen when using 386 * existing request against the same sector), which can happen when using
387 * direct IO, then return the alias. 387 * direct IO, then return the alias.
388 */ 388 */
389 static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) 389 static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
390 { 390 {
391 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; 391 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
392 struct rb_node *parent = NULL; 392 struct rb_node *parent = NULL;
393 struct as_rq *__arq; 393 struct as_rq *__arq;
394 struct request *rq = arq->request; 394 struct request *rq = arq->request;
395 395
396 arq->rb_key = rq_rb_key(rq); 396 arq->rb_key = rq_rb_key(rq);
397 397
398 while (*p) { 398 while (*p) {
399 parent = *p; 399 parent = *p;
400 __arq = rb_entry_arq(parent); 400 __arq = rb_entry_arq(parent);
401 401
402 if (arq->rb_key < __arq->rb_key) 402 if (arq->rb_key < __arq->rb_key)
403 p = &(*p)->rb_left; 403 p = &(*p)->rb_left;
404 else if (arq->rb_key > __arq->rb_key) 404 else if (arq->rb_key > __arq->rb_key)
405 p = &(*p)->rb_right; 405 p = &(*p)->rb_right;
406 else 406 else
407 return __arq; 407 return __arq;
408 } 408 }
409 409
410 rb_link_node(&arq->rb_node, parent, p); 410 rb_link_node(&arq->rb_node, parent, p);
411 rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); 411 rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
412 412
413 return NULL; 413 return NULL;
414 } 414 }
415 415
416 static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) 416 static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
417 { 417 {
418 struct as_rq *alias; 418 struct as_rq *alias;
419 419
420 while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { 420 while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) {
421 as_move_to_dispatch(ad, alias); 421 as_move_to_dispatch(ad, alias);
422 as_antic_stop(ad); 422 as_antic_stop(ad);
423 } 423 }
424 } 424 }
425 425
426 static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) 426 static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
427 { 427 {
428 if (!ON_RB(&arq->rb_node)) { 428 if (!ON_RB(&arq->rb_node)) {
429 WARN_ON(1); 429 WARN_ON(1);
430 return; 430 return;
431 } 431 }
432 432
433 rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); 433 rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
434 RB_CLEAR(&arq->rb_node); 434 RB_CLEAR(&arq->rb_node);
435 } 435 }
436 436
437 static struct request * 437 static struct request *
438 as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir) 438 as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
439 { 439 {
440 struct rb_node *n = ad->sort_list[data_dir].rb_node; 440 struct rb_node *n = ad->sort_list[data_dir].rb_node;
441 struct as_rq *arq; 441 struct as_rq *arq;
442 442
443 while (n) { 443 while (n) {
444 arq = rb_entry_arq(n); 444 arq = rb_entry_arq(n);
445 445
446 if (sector < arq->rb_key) 446 if (sector < arq->rb_key)
447 n = n->rb_left; 447 n = n->rb_left;
448 else if (sector > arq->rb_key) 448 else if (sector > arq->rb_key)
449 n = n->rb_right; 449 n = n->rb_right;
450 else 450 else
451 return arq->request; 451 return arq->request;
452 } 452 }
453 453
454 return NULL; 454 return NULL;
455 } 455 }
456 456
457 /* 457 /*
458 * IO Scheduler proper 458 * IO Scheduler proper
459 */ 459 */
460 460
461 #define MAXBACK (1024 * 1024) /* 461 #define MAXBACK (1024 * 1024) /*
462 * Maximum distance the disk will go backward 462 * Maximum distance the disk will go backward
463 * for a request. 463 * for a request.
464 */ 464 */
465 465
466 #define BACK_PENALTY 2 466 #define BACK_PENALTY 2
467 467
468 /* 468 /*
469 * as_choose_req selects the preferred one of two requests of the same data_dir 469 * as_choose_req selects the preferred one of two requests of the same data_dir
470 * ignoring time - eg. timeouts, which is the job of as_dispatch_request 470 * ignoring time - eg. timeouts, which is the job of as_dispatch_request
471 */ 471 */
472 static struct as_rq * 472 static struct as_rq *
473 as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) 473 as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
474 { 474 {
475 int data_dir; 475 int data_dir;
476 sector_t last, s1, s2, d1, d2; 476 sector_t last, s1, s2, d1, d2;
477 int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ 477 int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */
478 const sector_t maxback = MAXBACK; 478 const sector_t maxback = MAXBACK;
479 479
480 if (arq1 == NULL || arq1 == arq2) 480 if (arq1 == NULL || arq1 == arq2)
481 return arq2; 481 return arq2;
482 if (arq2 == NULL) 482 if (arq2 == NULL)
483 return arq1; 483 return arq1;
484 484
485 data_dir = arq1->is_sync; 485 data_dir = arq1->is_sync;
486 486
487 last = ad->last_sector[data_dir]; 487 last = ad->last_sector[data_dir];
488 s1 = arq1->request->sector; 488 s1 = arq1->request->sector;
489 s2 = arq2->request->sector; 489 s2 = arq2->request->sector;
490 490
491 BUG_ON(data_dir != arq2->is_sync); 491 BUG_ON(data_dir != arq2->is_sync);
492 492
493 /* 493 /*
494 * Strict one way elevator _except_ in the case where we allow 494 * Strict one way elevator _except_ in the case where we allow
495 * short backward seeks which are biased as twice the cost of a 495 * short backward seeks which are biased as twice the cost of a
496 * similar forward seek. 496 * similar forward seek.
497 */ 497 */
498 if (s1 >= last) 498 if (s1 >= last)
499 d1 = s1 - last; 499 d1 = s1 - last;
500 else if (s1+maxback >= last) 500 else if (s1+maxback >= last)
501 d1 = (last - s1)*BACK_PENALTY; 501 d1 = (last - s1)*BACK_PENALTY;
502 else { 502 else {
503 r1_wrap = 1; 503 r1_wrap = 1;
504 d1 = 0; /* shut up, gcc */ 504 d1 = 0; /* shut up, gcc */
505 } 505 }
506 506
507 if (s2 >= last) 507 if (s2 >= last)
508 d2 = s2 - last; 508 d2 = s2 - last;
509 else if (s2+maxback >= last) 509 else if (s2+maxback >= last)
510 d2 = (last - s2)*BACK_PENALTY; 510 d2 = (last - s2)*BACK_PENALTY;
511 else { 511 else {
512 r2_wrap = 1; 512 r2_wrap = 1;
513 d2 = 0; 513 d2 = 0;
514 } 514 }
515 515
516 /* Found required data */ 516 /* Found required data */
517 if (!r1_wrap && r2_wrap) 517 if (!r1_wrap && r2_wrap)
518 return arq1; 518 return arq1;
519 else if (!r2_wrap && r1_wrap) 519 else if (!r2_wrap && r1_wrap)
520 return arq2; 520 return arq2;
521 else if (r1_wrap && r2_wrap) { 521 else if (r1_wrap && r2_wrap) {
522 /* both behind the head */ 522 /* both behind the head */
523 if (s1 <= s2) 523 if (s1 <= s2)
524 return arq1; 524 return arq1;
525 else 525 else
526 return arq2; 526 return arq2;
527 } 527 }
528 528
529 /* Both requests in front of the head */ 529 /* Both requests in front of the head */
530 if (d1 < d2) 530 if (d1 < d2)
531 return arq1; 531 return arq1;
532 else if (d2 < d1) 532 else if (d2 < d1)
533 return arq2; 533 return arq2;
534 else { 534 else {
535 if (s1 >= s2) 535 if (s1 >= s2)
536 return arq1; 536 return arq1;
537 else 537 else
538 return arq2; 538 return arq2;
539 } 539 }
540 } 540 }
541 541
542 /* 542 /*
543 * as_find_next_arq finds the next request after @prev in elevator order. 543 * as_find_next_arq finds the next request after @prev in elevator order.
544 * this with as_choose_req form the basis for how the scheduler chooses 544 * this with as_choose_req form the basis for how the scheduler chooses
545 * what request to process next. Anticipation works on top of this. 545 * what request to process next. Anticipation works on top of this.
546 */ 546 */
547 static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last) 547 static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last)
548 { 548 {
549 const int data_dir = last->is_sync; 549 const int data_dir = last->is_sync;
550 struct as_rq *ret; 550 struct as_rq *ret;
551 struct rb_node *rbnext = rb_next(&last->rb_node); 551 struct rb_node *rbnext = rb_next(&last->rb_node);
552 struct rb_node *rbprev = rb_prev(&last->rb_node); 552 struct rb_node *rbprev = rb_prev(&last->rb_node);
553 struct as_rq *arq_next, *arq_prev; 553 struct as_rq *arq_next, *arq_prev;
554 554
555 BUG_ON(!ON_RB(&last->rb_node)); 555 BUG_ON(!ON_RB(&last->rb_node));
556 556
557 if (rbprev) 557 if (rbprev)
558 arq_prev = rb_entry_arq(rbprev); 558 arq_prev = rb_entry_arq(rbprev);
559 else 559 else
560 arq_prev = NULL; 560 arq_prev = NULL;
561 561
562 if (rbnext) 562 if (rbnext)
563 arq_next = rb_entry_arq(rbnext); 563 arq_next = rb_entry_arq(rbnext);
564 else { 564 else {
565 arq_next = as_find_first_arq(ad, data_dir); 565 arq_next = as_find_first_arq(ad, data_dir);
566 if (arq_next == last) 566 if (arq_next == last)
567 arq_next = NULL; 567 arq_next = NULL;
568 } 568 }
569 569
570 ret = as_choose_req(ad, arq_next, arq_prev); 570 ret = as_choose_req(ad, arq_next, arq_prev);
571 571
572 return ret; 572 return ret;
573 } 573 }
574 574
575 /* 575 /*
576 * anticipatory scheduling functions follow 576 * anticipatory scheduling functions follow
577 */ 577 */
578 578
579 /* 579 /*
580 * as_antic_expired tells us when we have anticipated too long. 580 * as_antic_expired tells us when we have anticipated too long.
581 * The funny "absolute difference" math on the elapsed time is to handle 581 * The funny "absolute difference" math on the elapsed time is to handle
582 * jiffy wraps, and disks which have been idle for 0x80000000 jiffies. 582 * jiffy wraps, and disks which have been idle for 0x80000000 jiffies.
583 */ 583 */
584 static int as_antic_expired(struct as_data *ad) 584 static int as_antic_expired(struct as_data *ad)
585 { 585 {
586 long delta_jif; 586 long delta_jif;
587 587
588 delta_jif = jiffies - ad->antic_start; 588 delta_jif = jiffies - ad->antic_start;
589 if (unlikely(delta_jif < 0)) 589 if (unlikely(delta_jif < 0))
590 delta_jif = -delta_jif; 590 delta_jif = -delta_jif;
591 if (delta_jif < ad->antic_expire) 591 if (delta_jif < ad->antic_expire)
592 return 0; 592 return 0;
593 593
594 return 1; 594 return 1;
595 } 595 }
596 596
597 /* 597 /*
598 * as_antic_waitnext starts anticipating that a nice request will soon be 598 * as_antic_waitnext starts anticipating that a nice request will soon be
599 * submitted. See also as_antic_waitreq 599 * submitted. See also as_antic_waitreq
600 */ 600 */
601 static void as_antic_waitnext(struct as_data *ad) 601 static void as_antic_waitnext(struct as_data *ad)
602 { 602 {
603 unsigned long timeout; 603 unsigned long timeout;
604 604
605 BUG_ON(ad->antic_status != ANTIC_OFF 605 BUG_ON(ad->antic_status != ANTIC_OFF
606 && ad->antic_status != ANTIC_WAIT_REQ); 606 && ad->antic_status != ANTIC_WAIT_REQ);
607 607
608 timeout = ad->antic_start + ad->antic_expire; 608 timeout = ad->antic_start + ad->antic_expire;
609 609
610 mod_timer(&ad->antic_timer, timeout); 610 mod_timer(&ad->antic_timer, timeout);
611 611
612 ad->antic_status = ANTIC_WAIT_NEXT; 612 ad->antic_status = ANTIC_WAIT_NEXT;
613 } 613 }
614 614
615 /* 615 /*
616 * as_antic_waitreq starts anticipating. We don't start timing the anticipation 616 * as_antic_waitreq starts anticipating. We don't start timing the anticipation
617 * until the request that we're anticipating on has finished. This means we 617 * until the request that we're anticipating on has finished. This means we
618 * are timing from when the candidate process wakes up hopefully. 618 * are timing from when the candidate process wakes up hopefully.
619 */ 619 */
620 static void as_antic_waitreq(struct as_data *ad) 620 static void as_antic_waitreq(struct as_data *ad)
621 { 621 {
622 BUG_ON(ad->antic_status == ANTIC_FINISHED); 622 BUG_ON(ad->antic_status == ANTIC_FINISHED);
623 if (ad->antic_status == ANTIC_OFF) { 623 if (ad->antic_status == ANTIC_OFF) {
624 if (!ad->io_context || ad->ioc_finished) 624 if (!ad->io_context || ad->ioc_finished)
625 as_antic_waitnext(ad); 625 as_antic_waitnext(ad);
626 else 626 else
627 ad->antic_status = ANTIC_WAIT_REQ; 627 ad->antic_status = ANTIC_WAIT_REQ;
628 } 628 }
629 } 629 }
630 630
631 /* 631 /*
632 * This is called directly by the functions in this file to stop anticipation. 632 * This is called directly by the functions in this file to stop anticipation.
633 * We kill the timer and schedule a call to the request_fn asap. 633 * We kill the timer and schedule a call to the request_fn asap.
634 */ 634 */
635 static void as_antic_stop(struct as_data *ad) 635 static void as_antic_stop(struct as_data *ad)
636 { 636 {
637 int status = ad->antic_status; 637 int status = ad->antic_status;
638 638
639 if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) { 639 if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) {
640 if (status == ANTIC_WAIT_NEXT) 640 if (status == ANTIC_WAIT_NEXT)
641 del_timer(&ad->antic_timer); 641 del_timer(&ad->antic_timer);
642 ad->antic_status = ANTIC_FINISHED; 642 ad->antic_status = ANTIC_FINISHED;
643 /* see as_work_handler */ 643 /* see as_work_handler */
644 kblockd_schedule_work(&ad->antic_work); 644 kblockd_schedule_work(&ad->antic_work);
645 } 645 }
646 } 646 }
647 647
648 /* 648 /*
649 * as_antic_timeout is the timer function set by as_antic_waitnext. 649 * as_antic_timeout is the timer function set by as_antic_waitnext.
650 */ 650 */
651 static void as_antic_timeout(unsigned long data) 651 static void as_antic_timeout(unsigned long data)
652 { 652 {
653 struct request_queue *q = (struct request_queue *)data; 653 struct request_queue *q = (struct request_queue *)data;
654 struct as_data *ad = q->elevator->elevator_data; 654 struct as_data *ad = q->elevator->elevator_data;
655 unsigned long flags; 655 unsigned long flags;
656 656
657 spin_lock_irqsave(q->queue_lock, flags); 657 spin_lock_irqsave(q->queue_lock, flags);
658 if (ad->antic_status == ANTIC_WAIT_REQ 658 if (ad->antic_status == ANTIC_WAIT_REQ
659 || ad->antic_status == ANTIC_WAIT_NEXT) { 659 || ad->antic_status == ANTIC_WAIT_NEXT) {
660 struct as_io_context *aic = ad->io_context->aic; 660 struct as_io_context *aic = ad->io_context->aic;
661 661
662 ad->antic_status = ANTIC_FINISHED; 662 ad->antic_status = ANTIC_FINISHED;
663 kblockd_schedule_work(&ad->antic_work); 663 kblockd_schedule_work(&ad->antic_work);
664 664
665 if (aic->ttime_samples == 0) { 665 if (aic->ttime_samples == 0) {
666 /* process anticipated on has exited or timed out*/ 666 /* process anticipated on has exited or timed out*/
667 ad->exit_prob = (7*ad->exit_prob + 256)/8; 667 ad->exit_prob = (7*ad->exit_prob + 256)/8;
668 } 668 }
669 if (!test_bit(AS_TASK_RUNNING, &aic->state)) { 669 if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
670 /* process not "saved" by a cooperating request */ 670 /* process not "saved" by a cooperating request */
671 ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; 671 ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8;
672 } 672 }
673 } 673 }
674 spin_unlock_irqrestore(q->queue_lock, flags); 674 spin_unlock_irqrestore(q->queue_lock, flags);
675 } 675 }
676 676
677 static void as_update_thinktime(struct as_data *ad, struct as_io_context *aic, 677 static void as_update_thinktime(struct as_data *ad, struct as_io_context *aic,
678 unsigned long ttime) 678 unsigned long ttime)
679 { 679 {
680 /* fixed point: 1.0 == 1<<8 */ 680 /* fixed point: 1.0 == 1<<8 */
681 if (aic->ttime_samples == 0) { 681 if (aic->ttime_samples == 0) {
682 ad->new_ttime_total = (7*ad->new_ttime_total + 256*ttime) / 8; 682 ad->new_ttime_total = (7*ad->new_ttime_total + 256*ttime) / 8;
683 ad->new_ttime_mean = ad->new_ttime_total / 256; 683 ad->new_ttime_mean = ad->new_ttime_total / 256;
684 684
685 ad->exit_prob = (7*ad->exit_prob)/8; 685 ad->exit_prob = (7*ad->exit_prob)/8;
686 } 686 }
687 aic->ttime_samples = (7*aic->ttime_samples + 256) / 8; 687 aic->ttime_samples = (7*aic->ttime_samples + 256) / 8;
688 aic->ttime_total = (7*aic->ttime_total + 256*ttime) / 8; 688 aic->ttime_total = (7*aic->ttime_total + 256*ttime) / 8;
689 aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples; 689 aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples;
690 } 690 }
691 691
692 static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, 692 static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic,
693 sector_t sdist) 693 sector_t sdist)
694 { 694 {
695 u64 total; 695 u64 total;
696 696
697 if (aic->seek_samples == 0) { 697 if (aic->seek_samples == 0) {
698 ad->new_seek_total = (7*ad->new_seek_total + 256*(u64)sdist)/8; 698 ad->new_seek_total = (7*ad->new_seek_total + 256*(u64)sdist)/8;
699 ad->new_seek_mean = ad->new_seek_total / 256; 699 ad->new_seek_mean = ad->new_seek_total / 256;
700 } 700 }
701 701
702 /* 702 /*
703 * Don't allow the seek distance to get too large from the 703 * Don't allow the seek distance to get too large from the
704 * odd fragment, pagein, etc 704 * odd fragment, pagein, etc
705 */ 705 */
706 if (aic->seek_samples <= 60) /* second&third seek */ 706 if (aic->seek_samples <= 60) /* second&third seek */
707 sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*1024); 707 sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*1024);
708 else 708 else
709 sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*64); 709 sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*64);
710 710
711 aic->seek_samples = (7*aic->seek_samples + 256) / 8; 711 aic->seek_samples = (7*aic->seek_samples + 256) / 8;
712 aic->seek_total = (7*aic->seek_total + (u64)256*sdist) / 8; 712 aic->seek_total = (7*aic->seek_total + (u64)256*sdist) / 8;
713 total = aic->seek_total + (aic->seek_samples/2); 713 total = aic->seek_total + (aic->seek_samples/2);
714 do_div(total, aic->seek_samples); 714 do_div(total, aic->seek_samples);
715 aic->seek_mean = (sector_t)total; 715 aic->seek_mean = (sector_t)total;
716 } 716 }
717 717
718 /* 718 /*
719 * as_update_iohist keeps a decaying histogram of IO thinktimes, and 719 * as_update_iohist keeps a decaying histogram of IO thinktimes, and
720 * updates @aic->ttime_mean based on that. It is called when a new 720 * updates @aic->ttime_mean based on that. It is called when a new
721 * request is queued. 721 * request is queued.
722 */ 722 */
723 static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, 723 static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
724 struct request *rq) 724 struct request *rq)
725 { 725 {
726 struct as_rq *arq = RQ_DATA(rq); 726 struct as_rq *arq = RQ_DATA(rq);
727 int data_dir = arq->is_sync; 727 int data_dir = arq->is_sync;
728 unsigned long thinktime = 0; 728 unsigned long thinktime = 0;
729 sector_t seek_dist; 729 sector_t seek_dist;
730 730
731 if (aic == NULL) 731 if (aic == NULL)
732 return; 732 return;
733 733
734 if (data_dir == REQ_SYNC) { 734 if (data_dir == REQ_SYNC) {
735 unsigned long in_flight = atomic_read(&aic->nr_queued) 735 unsigned long in_flight = atomic_read(&aic->nr_queued)
736 + atomic_read(&aic->nr_dispatched); 736 + atomic_read(&aic->nr_dispatched);
737 spin_lock(&aic->lock); 737 spin_lock(&aic->lock);
738 if (test_bit(AS_TASK_IORUNNING, &aic->state) || 738 if (test_bit(AS_TASK_IORUNNING, &aic->state) ||
739 test_bit(AS_TASK_IOSTARTED, &aic->state)) { 739 test_bit(AS_TASK_IOSTARTED, &aic->state)) {
740 /* Calculate read -> read thinktime */ 740 /* Calculate read -> read thinktime */
741 if (test_bit(AS_TASK_IORUNNING, &aic->state) 741 if (test_bit(AS_TASK_IORUNNING, &aic->state)
742 && in_flight == 0) { 742 && in_flight == 0) {
743 thinktime = jiffies - aic->last_end_request; 743 thinktime = jiffies - aic->last_end_request;
744 thinktime = min(thinktime, MAX_THINKTIME-1); 744 thinktime = min(thinktime, MAX_THINKTIME-1);
745 } 745 }
746 as_update_thinktime(ad, aic, thinktime); 746 as_update_thinktime(ad, aic, thinktime);
747 747
748 /* Calculate read -> read seek distance */ 748 /* Calculate read -> read seek distance */
749 if (aic->last_request_pos < rq->sector) 749 if (aic->last_request_pos < rq->sector)
750 seek_dist = rq->sector - aic->last_request_pos; 750 seek_dist = rq->sector - aic->last_request_pos;
751 else 751 else
752 seek_dist = aic->last_request_pos - rq->sector; 752 seek_dist = aic->last_request_pos - rq->sector;
753 as_update_seekdist(ad, aic, seek_dist); 753 as_update_seekdist(ad, aic, seek_dist);
754 } 754 }
755 aic->last_request_pos = rq->sector + rq->nr_sectors; 755 aic->last_request_pos = rq->sector + rq->nr_sectors;
756 set_bit(AS_TASK_IOSTARTED, &aic->state); 756 set_bit(AS_TASK_IOSTARTED, &aic->state);
757 spin_unlock(&aic->lock); 757 spin_unlock(&aic->lock);
758 } 758 }
759 } 759 }
760 760
761 /* 761 /*
762 * as_close_req decides if one request is considered "close" to the 762 * as_close_req decides if one request is considered "close" to the
763 * previous one issued. 763 * previous one issued.
764 */ 764 */
765 static int as_close_req(struct as_data *ad, struct as_io_context *aic, 765 static int as_close_req(struct as_data *ad, struct as_io_context *aic,
766 struct as_rq *arq) 766 struct as_rq *arq)
767 { 767 {
768 unsigned long delay; /* milliseconds */ 768 unsigned long delay; /* milliseconds */
769 sector_t last = ad->last_sector[ad->batch_data_dir]; 769 sector_t last = ad->last_sector[ad->batch_data_dir];
770 sector_t next = arq->request->sector; 770 sector_t next = arq->request->sector;
771 sector_t delta; /* acceptable close offset (in sectors) */ 771 sector_t delta; /* acceptable close offset (in sectors) */
772 sector_t s; 772 sector_t s;
773 773
774 if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished) 774 if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished)
775 delay = 0; 775 delay = 0;
776 else 776 else
777 delay = ((jiffies - ad->antic_start) * 1000) / HZ; 777 delay = ((jiffies - ad->antic_start) * 1000) / HZ;
778 778
779 if (delay == 0) 779 if (delay == 0)
780 delta = 8192; 780 delta = 8192;
781 else if (delay <= 20 && delay <= ad->antic_expire) 781 else if (delay <= 20 && delay <= ad->antic_expire)
782 delta = 8192 << delay; 782 delta = 8192 << delay;
783 else 783 else
784 return 1; 784 return 1;
785 785
786 if ((last <= next + (delta>>1)) && (next <= last + delta)) 786 if ((last <= next + (delta>>1)) && (next <= last + delta))
787 return 1; 787 return 1;
788 788
789 if (last < next) 789 if (last < next)
790 s = next - last; 790 s = next - last;
791 else 791 else
792 s = last - next; 792 s = last - next;
793 793
794 if (aic->seek_samples == 0) { 794 if (aic->seek_samples == 0) {
795 /* 795 /*
796 * Process has just started IO. Use past statistics to 796 * Process has just started IO. Use past statistics to
797 * gauge success possibility 797 * gauge success possibility
798 */ 798 */
799 if (ad->new_seek_mean > s) { 799 if (ad->new_seek_mean > s) {
800 /* this request is better than what we're expecting */ 800 /* this request is better than what we're expecting */
801 return 1; 801 return 1;
802 } 802 }
803 803
804 } else { 804 } else {
805 if (aic->seek_mean > s) { 805 if (aic->seek_mean > s) {
806 /* this request is better than what we're expecting */ 806 /* this request is better than what we're expecting */
807 return 1; 807 return 1;
808 } 808 }
809 } 809 }
810 810
811 return 0; 811 return 0;
812 } 812 }
813 813
814 /* 814 /*
815 * as_can_break_anticipation returns true if we have been anticipating this 815 * as_can_break_anticipation returns true if we have been anticipating this
816 * request. 816 * request.
817 * 817 *
818 * It also returns true if the process against which we are anticipating 818 * It also returns true if the process against which we are anticipating
819 * submits a write - that's presumably an fsync, O_SYNC write, etc. We want to 819 * submits a write - that's presumably an fsync, O_SYNC write, etc. We want to
820 * dispatch it ASAP, because we know that application will not be submitting 820 * dispatch it ASAP, because we know that application will not be submitting
821 * any new reads. 821 * any new reads.
822 * 822 *
823 * If the task which has submitted the request has exited, break anticipation. 823 * If the task which has submitted the request has exited, break anticipation.
824 * 824 *
825 * If this task has queued some other IO, do not enter enticipation. 825 * If this task has queued some other IO, do not enter enticipation.
826 */ 826 */
827 static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) 827 static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
828 { 828 {
829 struct io_context *ioc; 829 struct io_context *ioc;
830 struct as_io_context *aic; 830 struct as_io_context *aic;
831 831
832 ioc = ad->io_context; 832 ioc = ad->io_context;
833 BUG_ON(!ioc); 833 BUG_ON(!ioc);
834 834
835 if (arq && ioc == arq->io_context) { 835 if (arq && ioc == arq->io_context) {
836 /* request from same process */ 836 /* request from same process */
837 return 1; 837 return 1;
838 } 838 }
839 839
840 if (ad->ioc_finished && as_antic_expired(ad)) { 840 if (ad->ioc_finished && as_antic_expired(ad)) {
841 /* 841 /*
842 * In this situation status should really be FINISHED, 842 * In this situation status should really be FINISHED,
843 * however the timer hasn't had the chance to run yet. 843 * however the timer hasn't had the chance to run yet.
844 */ 844 */
845 return 1; 845 return 1;
846 } 846 }
847 847
848 aic = ioc->aic; 848 aic = ioc->aic;
849 if (!aic) 849 if (!aic)
850 return 0; 850 return 0;
851 851
852 if (atomic_read(&aic->nr_queued) > 0) { 852 if (atomic_read(&aic->nr_queued) > 0) {
853 /* process has more requests queued */ 853 /* process has more requests queued */
854 return 1; 854 return 1;
855 } 855 }
856 856
857 if (atomic_read(&aic->nr_dispatched) > 0) { 857 if (atomic_read(&aic->nr_dispatched) > 0) {
858 /* process has more requests dispatched */ 858 /* process has more requests dispatched */
859 return 1; 859 return 1;
860 } 860 }
861 861
862 if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) { 862 if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) {
863 /* 863 /*
864 * Found a close request that is not one of ours. 864 * Found a close request that is not one of ours.
865 * 865 *
866 * This makes close requests from another process update 866 * This makes close requests from another process update
867 * our IO history. Is generally useful when there are 867 * our IO history. Is generally useful when there are
868 * two or more cooperating processes working in the same 868 * two or more cooperating processes working in the same
869 * area. 869 * area.
870 */ 870 */
871 if (!test_bit(AS_TASK_RUNNING, &aic->state)) { 871 if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
872 if (aic->ttime_samples == 0) 872 if (aic->ttime_samples == 0)
873 ad->exit_prob = (7*ad->exit_prob + 256)/8; 873 ad->exit_prob = (7*ad->exit_prob + 256)/8;
874 874
875 ad->exit_no_coop = (7*ad->exit_no_coop)/8; 875 ad->exit_no_coop = (7*ad->exit_no_coop)/8;
876 } 876 }
877 877
878 as_update_iohist(ad, aic, arq->request); 878 as_update_iohist(ad, aic, arq->request);
879 return 1; 879 return 1;
880 } 880 }
881 881
882 if (!test_bit(AS_TASK_RUNNING, &aic->state)) { 882 if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
883 /* process anticipated on has exited */ 883 /* process anticipated on has exited */
884 if (aic->ttime_samples == 0) 884 if (aic->ttime_samples == 0)
885 ad->exit_prob = (7*ad->exit_prob + 256)/8; 885 ad->exit_prob = (7*ad->exit_prob + 256)/8;
886 886
887 if (ad->exit_no_coop > 128) 887 if (ad->exit_no_coop > 128)
888 return 1; 888 return 1;
889 } 889 }
890 890
891 if (aic->ttime_samples == 0) { 891 if (aic->ttime_samples == 0) {
892 if (ad->new_ttime_mean > ad->antic_expire) 892 if (ad->new_ttime_mean > ad->antic_expire)
893 return 1; 893 return 1;
894 if (ad->exit_prob * ad->exit_no_coop > 128*256) 894 if (ad->exit_prob * ad->exit_no_coop > 128*256)
895 return 1; 895 return 1;
896 } else if (aic->ttime_mean > ad->antic_expire) { 896 } else if (aic->ttime_mean > ad->antic_expire) {
897 /* the process thinks too much between requests */ 897 /* the process thinks too much between requests */
898 return 1; 898 return 1;
899 } 899 }
900 900
901 return 0; 901 return 0;
902 } 902 }
903 903
904 /* 904 /*
905 * as_can_anticipate indicates weather we should either run arq 905 * as_can_anticipate indicates weather we should either run arq
906 * or keep anticipating a better request. 906 * or keep anticipating a better request.
907 */ 907 */
908 static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) 908 static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
909 { 909 {
910 if (!ad->io_context) 910 if (!ad->io_context)
911 /* 911 /*
912 * Last request submitted was a write 912 * Last request submitted was a write
913 */ 913 */
914 return 0; 914 return 0;
915 915
916 if (ad->antic_status == ANTIC_FINISHED) 916 if (ad->antic_status == ANTIC_FINISHED)
917 /* 917 /*
918 * Don't restart if we have just finished. Run the next request 918 * Don't restart if we have just finished. Run the next request
919 */ 919 */
920 return 0; 920 return 0;
921 921
922 if (as_can_break_anticipation(ad, arq)) 922 if (as_can_break_anticipation(ad, arq))
923 /* 923 /*
924 * This request is a good candidate. Don't keep anticipating, 924 * This request is a good candidate. Don't keep anticipating,
925 * run it. 925 * run it.
926 */ 926 */
927 return 0; 927 return 0;
928 928
929 /* 929 /*
930 * OK from here, we haven't finished, and don't have a decent request! 930 * OK from here, we haven't finished, and don't have a decent request!
931 * Status is either ANTIC_OFF so start waiting, 931 * Status is either ANTIC_OFF so start waiting,
932 * ANTIC_WAIT_REQ so continue waiting for request to finish 932 * ANTIC_WAIT_REQ so continue waiting for request to finish
933 * or ANTIC_WAIT_NEXT so continue waiting for an acceptable request. 933 * or ANTIC_WAIT_NEXT so continue waiting for an acceptable request.
934 */ 934 */
935 935
936 return 1; 936 return 1;
937 } 937 }
938 938
939 /* 939 /*
940 * as_update_arq must be called whenever a request (arq) is added to 940 * as_update_arq must be called whenever a request (arq) is added to
941 * the sort_list. This function keeps caches up to date, and checks if the 941 * the sort_list. This function keeps caches up to date, and checks if the
942 * request might be one we are "anticipating" 942 * request might be one we are "anticipating"
943 */ 943 */
944 static void as_update_arq(struct as_data *ad, struct as_rq *arq) 944 static void as_update_arq(struct as_data *ad, struct as_rq *arq)
945 { 945 {
946 const int data_dir = arq->is_sync; 946 const int data_dir = arq->is_sync;
947 947
948 /* keep the next_arq cache up to date */ 948 /* keep the next_arq cache up to date */
949 ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); 949 ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
950 950
951 /* 951 /*
952 * have we been anticipating this request? 952 * have we been anticipating this request?
953 * or does it come from the same process as the one we are anticipating 953 * or does it come from the same process as the one we are anticipating
954 * for? 954 * for?
955 */ 955 */
956 if (ad->antic_status == ANTIC_WAIT_REQ 956 if (ad->antic_status == ANTIC_WAIT_REQ
957 || ad->antic_status == ANTIC_WAIT_NEXT) { 957 || ad->antic_status == ANTIC_WAIT_NEXT) {
958 if (as_can_break_anticipation(ad, arq)) 958 if (as_can_break_anticipation(ad, arq))
959 as_antic_stop(ad); 959 as_antic_stop(ad);
960 } 960 }
961 } 961 }
962 962
963 /* 963 /*
964 * Gathers timings and resizes the write batch automatically 964 * Gathers timings and resizes the write batch automatically
965 */ 965 */
966 static void update_write_batch(struct as_data *ad) 966 static void update_write_batch(struct as_data *ad)
967 { 967 {
968 unsigned long batch = ad->batch_expire[REQ_ASYNC]; 968 unsigned long batch = ad->batch_expire[REQ_ASYNC];
969 long write_time; 969 long write_time;
970 970
971 write_time = (jiffies - ad->current_batch_expires) + batch; 971 write_time = (jiffies - ad->current_batch_expires) + batch;
972 if (write_time < 0) 972 if (write_time < 0)
973 write_time = 0; 973 write_time = 0;
974 974
975 if (write_time > batch && !ad->write_batch_idled) { 975 if (write_time > batch && !ad->write_batch_idled) {
976 if (write_time > batch * 3) 976 if (write_time > batch * 3)
977 ad->write_batch_count /= 2; 977 ad->write_batch_count /= 2;
978 else 978 else
979 ad->write_batch_count--; 979 ad->write_batch_count--;
980 } else if (write_time < batch && ad->current_write_count == 0) { 980 } else if (write_time < batch && ad->current_write_count == 0) {
981 if (batch > write_time * 3) 981 if (batch > write_time * 3)
982 ad->write_batch_count *= 2; 982 ad->write_batch_count *= 2;
983 else 983 else
984 ad->write_batch_count++; 984 ad->write_batch_count++;
985 } 985 }
986 986
987 if (ad->write_batch_count < 1) 987 if (ad->write_batch_count < 1)
988 ad->write_batch_count = 1; 988 ad->write_batch_count = 1;
989 } 989 }
990 990
991 /* 991 /*
992 * as_completed_request is to be called when a request has completed and 992 * as_completed_request is to be called when a request has completed and
993 * returned something to the requesting process, be it an error or data. 993 * returned something to the requesting process, be it an error or data.
994 */ 994 */
995 static void as_completed_request(request_queue_t *q, struct request *rq) 995 static void as_completed_request(request_queue_t *q, struct request *rq)
996 { 996 {
997 struct as_data *ad = q->elevator->elevator_data; 997 struct as_data *ad = q->elevator->elevator_data;
998 struct as_rq *arq = RQ_DATA(rq); 998 struct as_rq *arq = RQ_DATA(rq);
999 999
1000 WARN_ON(!list_empty(&rq->queuelist)); 1000 WARN_ON(!list_empty(&rq->queuelist));
1001 1001
1002 if (arq->state != AS_RQ_REMOVED) { 1002 if (arq->state != AS_RQ_REMOVED) {
1003 printk("arq->state %d\n", arq->state); 1003 printk("arq->state %d\n", arq->state);
1004 WARN_ON(1); 1004 WARN_ON(1);
1005 goto out; 1005 goto out;
1006 } 1006 }
1007 1007
1008 if (ad->changed_batch && ad->nr_dispatched == 1) { 1008 if (ad->changed_batch && ad->nr_dispatched == 1) {
1009 kblockd_schedule_work(&ad->antic_work); 1009 kblockd_schedule_work(&ad->antic_work);
1010 ad->changed_batch = 0; 1010 ad->changed_batch = 0;
1011 1011
1012 if (ad->batch_data_dir == REQ_SYNC) 1012 if (ad->batch_data_dir == REQ_SYNC)
1013 ad->new_batch = 1; 1013 ad->new_batch = 1;
1014 } 1014 }
1015 WARN_ON(ad->nr_dispatched == 0); 1015 WARN_ON(ad->nr_dispatched == 0);
1016 ad->nr_dispatched--; 1016 ad->nr_dispatched--;
1017 1017
1018 /* 1018 /*
1019 * Start counting the batch from when a request of that direction is 1019 * Start counting the batch from when a request of that direction is
1020 * actually serviced. This should help devices with big TCQ windows 1020 * actually serviced. This should help devices with big TCQ windows
1021 * and writeback caches 1021 * and writeback caches
1022 */ 1022 */
1023 if (ad->new_batch && ad->batch_data_dir == arq->is_sync) { 1023 if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
1024 update_write_batch(ad); 1024 update_write_batch(ad);
1025 ad->current_batch_expires = jiffies + 1025 ad->current_batch_expires = jiffies +
1026 ad->batch_expire[REQ_SYNC]; 1026 ad->batch_expire[REQ_SYNC];
1027 ad->new_batch = 0; 1027 ad->new_batch = 0;
1028 } 1028 }
1029 1029
1030 if (ad->io_context == arq->io_context && ad->io_context) { 1030 if (ad->io_context == arq->io_context && ad->io_context) {
1031 ad->antic_start = jiffies; 1031 ad->antic_start = jiffies;
1032 ad->ioc_finished = 1; 1032 ad->ioc_finished = 1;
1033 if (ad->antic_status == ANTIC_WAIT_REQ) { 1033 if (ad->antic_status == ANTIC_WAIT_REQ) {
1034 /* 1034 /*
1035 * We were waiting on this request, now anticipate 1035 * We were waiting on this request, now anticipate
1036 * the next one 1036 * the next one
1037 */ 1037 */
1038 as_antic_waitnext(ad); 1038 as_antic_waitnext(ad);
1039 } 1039 }
1040 } 1040 }
1041 1041
1042 as_put_io_context(arq); 1042 as_put_io_context(arq);
1043 out: 1043 out:
1044 arq->state = AS_RQ_POSTSCHED; 1044 arq->state = AS_RQ_POSTSCHED;
1045 } 1045 }
1046 1046
1047 /* 1047 /*
1048 * as_remove_queued_request removes a request from the pre dispatch queue 1048 * as_remove_queued_request removes a request from the pre dispatch queue
1049 * without updating refcounts. It is expected the caller will drop the 1049 * without updating refcounts. It is expected the caller will drop the
1050 * reference unless it replaces the request at somepart of the elevator 1050 * reference unless it replaces the request at somepart of the elevator
1051 * (ie. the dispatch queue) 1051 * (ie. the dispatch queue)
1052 */ 1052 */
1053 static void as_remove_queued_request(request_queue_t *q, struct request *rq) 1053 static void as_remove_queued_request(request_queue_t *q, struct request *rq)
1054 { 1054 {
1055 struct as_rq *arq = RQ_DATA(rq); 1055 struct as_rq *arq = RQ_DATA(rq);
1056 const int data_dir = arq->is_sync; 1056 const int data_dir = arq->is_sync;
1057 struct as_data *ad = q->elevator->elevator_data; 1057 struct as_data *ad = q->elevator->elevator_data;
1058 1058
1059 WARN_ON(arq->state != AS_RQ_QUEUED); 1059 WARN_ON(arq->state != AS_RQ_QUEUED);
1060 1060
1061 if (arq->io_context && arq->io_context->aic) { 1061 if (arq->io_context && arq->io_context->aic) {
1062 BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); 1062 BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued));
1063 atomic_dec(&arq->io_context->aic->nr_queued); 1063 atomic_dec(&arq->io_context->aic->nr_queued);
1064 } 1064 }
1065 1065
1066 /* 1066 /*
1067 * Update the "next_arq" cache if we are about to remove its 1067 * Update the "next_arq" cache if we are about to remove its
1068 * entry 1068 * entry
1069 */ 1069 */
1070 if (ad->next_arq[data_dir] == arq) 1070 if (ad->next_arq[data_dir] == arq)
1071 ad->next_arq[data_dir] = as_find_next_arq(ad, arq); 1071 ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
1072 1072
1073 list_del_init(&arq->fifo); 1073 list_del_init(&arq->fifo);
1074 as_del_arq_hash(arq); 1074 as_del_arq_hash(arq);
1075 as_del_arq_rb(ad, arq); 1075 as_del_arq_rb(ad, arq);
1076 } 1076 }
1077 1077
1078 /* 1078 /*
1079 * as_fifo_expired returns 0 if there are no expired reads on the fifo, 1079 * as_fifo_expired returns 0 if there are no expired reads on the fifo,
1080 * 1 otherwise. It is ratelimited so that we only perform the check once per 1080 * 1 otherwise. It is ratelimited so that we only perform the check once per
1081 * `fifo_expire' interval. Otherwise a large number of expired requests 1081 * `fifo_expire' interval. Otherwise a large number of expired requests
1082 * would create a hopeless seekstorm. 1082 * would create a hopeless seekstorm.
1083 * 1083 *
1084 * See as_antic_expired comment. 1084 * See as_antic_expired comment.
1085 */ 1085 */
1086 static int as_fifo_expired(struct as_data *ad, int adir) 1086 static int as_fifo_expired(struct as_data *ad, int adir)
1087 { 1087 {
1088 struct as_rq *arq; 1088 struct as_rq *arq;
1089 long delta_jif; 1089 long delta_jif;
1090 1090
1091 delta_jif = jiffies - ad->last_check_fifo[adir]; 1091 delta_jif = jiffies - ad->last_check_fifo[adir];
1092 if (unlikely(delta_jif < 0)) 1092 if (unlikely(delta_jif < 0))
1093 delta_jif = -delta_jif; 1093 delta_jif = -delta_jif;
1094 if (delta_jif < ad->fifo_expire[adir]) 1094 if (delta_jif < ad->fifo_expire[adir])
1095 return 0; 1095 return 0;
1096 1096
1097 ad->last_check_fifo[adir] = jiffies; 1097 ad->last_check_fifo[adir] = jiffies;
1098 1098
1099 if (list_empty(&ad->fifo_list[adir])) 1099 if (list_empty(&ad->fifo_list[adir]))
1100 return 0; 1100 return 0;
1101 1101
1102 arq = list_entry_fifo(ad->fifo_list[adir].next); 1102 arq = list_entry_fifo(ad->fifo_list[adir].next);
1103 1103
1104 return time_after(jiffies, arq->expires); 1104 return time_after(jiffies, arq->expires);
1105 } 1105 }
1106 1106
1107 /* 1107 /*
1108 * as_batch_expired returns true if the current batch has expired. A batch 1108 * as_batch_expired returns true if the current batch has expired. A batch
1109 * is a set of reads or a set of writes. 1109 * is a set of reads or a set of writes.
1110 */ 1110 */
1111 static inline int as_batch_expired(struct as_data *ad) 1111 static inline int as_batch_expired(struct as_data *ad)
1112 { 1112 {
1113 if (ad->changed_batch || ad->new_batch) 1113 if (ad->changed_batch || ad->new_batch)
1114 return 0; 1114 return 0;
1115 1115
1116 if (ad->batch_data_dir == REQ_SYNC) 1116 if (ad->batch_data_dir == REQ_SYNC)
1117 /* TODO! add a check so a complete fifo gets written? */ 1117 /* TODO! add a check so a complete fifo gets written? */
1118 return time_after(jiffies, ad->current_batch_expires); 1118 return time_after(jiffies, ad->current_batch_expires);
1119 1119
1120 return time_after(jiffies, ad->current_batch_expires) 1120 return time_after(jiffies, ad->current_batch_expires)
1121 || ad->current_write_count == 0; 1121 || ad->current_write_count == 0;
1122 } 1122 }
1123 1123
1124 /* 1124 /*
1125 * move an entry to dispatch queue 1125 * move an entry to dispatch queue
1126 */ 1126 */
1127 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) 1127 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1128 { 1128 {
1129 struct request *rq = arq->request; 1129 struct request *rq = arq->request;
1130 const int data_dir = arq->is_sync; 1130 const int data_dir = arq->is_sync;
1131 1131
1132 BUG_ON(!ON_RB(&arq->rb_node)); 1132 BUG_ON(!ON_RB(&arq->rb_node));
1133 1133
1134 as_antic_stop(ad); 1134 as_antic_stop(ad);
1135 ad->antic_status = ANTIC_OFF; 1135 ad->antic_status = ANTIC_OFF;
1136 1136
1137 /* 1137 /*
1138 * This has to be set in order to be correctly updated by 1138 * This has to be set in order to be correctly updated by
1139 * as_find_next_arq 1139 * as_find_next_arq
1140 */ 1140 */
1141 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; 1141 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
1142 1142
1143 if (data_dir == REQ_SYNC) { 1143 if (data_dir == REQ_SYNC) {
1144 /* In case we have to anticipate after this */ 1144 /* In case we have to anticipate after this */
1145 copy_io_context(&ad->io_context, &arq->io_context); 1145 copy_io_context(&ad->io_context, &arq->io_context);
1146 } else { 1146 } else {
1147 if (ad->io_context) { 1147 if (ad->io_context) {
1148 put_io_context(ad->io_context); 1148 put_io_context(ad->io_context);
1149 ad->io_context = NULL; 1149 ad->io_context = NULL;
1150 } 1150 }
1151 1151
1152 if (ad->current_write_count != 0) 1152 if (ad->current_write_count != 0)
1153 ad->current_write_count--; 1153 ad->current_write_count--;
1154 } 1154 }
1155 ad->ioc_finished = 0; 1155 ad->ioc_finished = 0;
1156 1156
1157 ad->next_arq[data_dir] = as_find_next_arq(ad, arq); 1157 ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
1158 1158
1159 /* 1159 /*
1160 * take it off the sort and fifo list, add to dispatch queue 1160 * take it off the sort and fifo list, add to dispatch queue
1161 */ 1161 */
1162 as_remove_queued_request(ad->q, rq); 1162 as_remove_queued_request(ad->q, rq);
1163 WARN_ON(arq->state != AS_RQ_QUEUED); 1163 WARN_ON(arq->state != AS_RQ_QUEUED);
1164 1164
1165 elv_dispatch_sort(ad->q, rq); 1165 elv_dispatch_sort(ad->q, rq);
1166 1166
1167 arq->state = AS_RQ_DISPATCHED; 1167 arq->state = AS_RQ_DISPATCHED;
1168 if (arq->io_context && arq->io_context->aic) 1168 if (arq->io_context && arq->io_context->aic)
1169 atomic_inc(&arq->io_context->aic->nr_dispatched); 1169 atomic_inc(&arq->io_context->aic->nr_dispatched);
1170 ad->nr_dispatched++; 1170 ad->nr_dispatched++;
1171 } 1171 }
1172 1172
1173 /* 1173 /*
1174 * as_dispatch_request selects the best request according to 1174 * as_dispatch_request selects the best request according to
1175 * read/write expire, batch expire, etc, and moves it to the dispatch 1175 * read/write expire, batch expire, etc, and moves it to the dispatch
1176 * queue. Returns 1 if a request was found, 0 otherwise. 1176 * queue. Returns 1 if a request was found, 0 otherwise.
1177 */ 1177 */
1178 static int as_dispatch_request(request_queue_t *q, int force) 1178 static int as_dispatch_request(request_queue_t *q, int force)
1179 { 1179 {
1180 struct as_data *ad = q->elevator->elevator_data; 1180 struct as_data *ad = q->elevator->elevator_data;
1181 struct as_rq *arq; 1181 struct as_rq *arq;
1182 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); 1182 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
1183 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); 1183 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
1184 1184
1185 if (unlikely(force)) { 1185 if (unlikely(force)) {
1186 /* 1186 /*
1187 * Forced dispatch, accounting is useless. Reset 1187 * Forced dispatch, accounting is useless. Reset
1188 * accounting states and dump fifo_lists. Note that 1188 * accounting states and dump fifo_lists. Note that
1189 * batch_data_dir is reset to REQ_SYNC to avoid 1189 * batch_data_dir is reset to REQ_SYNC to avoid
1190 * screwing write batch accounting as write batch 1190 * screwing write batch accounting as write batch
1191 * accounting occurs on W->R transition. 1191 * accounting occurs on W->R transition.
1192 */ 1192 */
1193 int dispatched = 0; 1193 int dispatched = 0;
1194 1194
1195 ad->batch_data_dir = REQ_SYNC; 1195 ad->batch_data_dir = REQ_SYNC;
1196 ad->changed_batch = 0; 1196 ad->changed_batch = 0;
1197 ad->new_batch = 0; 1197 ad->new_batch = 0;
1198 1198
1199 while (ad->next_arq[REQ_SYNC]) { 1199 while (ad->next_arq[REQ_SYNC]) {
1200 as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); 1200 as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
1201 dispatched++; 1201 dispatched++;
1202 } 1202 }
1203 ad->last_check_fifo[REQ_SYNC] = jiffies; 1203 ad->last_check_fifo[REQ_SYNC] = jiffies;
1204 1204
1205 while (ad->next_arq[REQ_ASYNC]) { 1205 while (ad->next_arq[REQ_ASYNC]) {
1206 as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); 1206 as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
1207 dispatched++; 1207 dispatched++;
1208 } 1208 }
1209 ad->last_check_fifo[REQ_ASYNC] = jiffies; 1209 ad->last_check_fifo[REQ_ASYNC] = jiffies;
1210 1210
1211 return dispatched; 1211 return dispatched;
1212 } 1212 }
1213 1213
1214 /* Signal that the write batch was uncontended, so we can't time it */ 1214 /* Signal that the write batch was uncontended, so we can't time it */
1215 if (ad->batch_data_dir == REQ_ASYNC && !reads) { 1215 if (ad->batch_data_dir == REQ_ASYNC && !reads) {
1216 if (ad->current_write_count == 0 || !writes) 1216 if (ad->current_write_count == 0 || !writes)
1217 ad->write_batch_idled = 1; 1217 ad->write_batch_idled = 1;
1218 } 1218 }
1219 1219
1220 if (!(reads || writes) 1220 if (!(reads || writes)
1221 || ad->antic_status == ANTIC_WAIT_REQ 1221 || ad->antic_status == ANTIC_WAIT_REQ
1222 || ad->antic_status == ANTIC_WAIT_NEXT 1222 || ad->antic_status == ANTIC_WAIT_NEXT
1223 || ad->changed_batch) 1223 || ad->changed_batch)
1224 return 0; 1224 return 0;
1225 1225
1226 if (!(reads && writes && as_batch_expired(ad))) { 1226 if (!(reads && writes && as_batch_expired(ad))) {
1227 /* 1227 /*
1228 * batch is still running or no reads or no writes 1228 * batch is still running or no reads or no writes
1229 */ 1229 */
1230 arq = ad->next_arq[ad->batch_data_dir]; 1230 arq = ad->next_arq[ad->batch_data_dir];
1231 1231
1232 if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { 1232 if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
1233 if (as_fifo_expired(ad, REQ_SYNC)) 1233 if (as_fifo_expired(ad, REQ_SYNC))
1234 goto fifo_expired; 1234 goto fifo_expired;
1235 1235
1236 if (as_can_anticipate(ad, arq)) { 1236 if (as_can_anticipate(ad, arq)) {
1237 as_antic_waitreq(ad); 1237 as_antic_waitreq(ad);
1238 return 0; 1238 return 0;
1239 } 1239 }
1240 } 1240 }
1241 1241
1242 if (arq) { 1242 if (arq) {
1243 /* we have a "next request" */ 1243 /* we have a "next request" */
1244 if (reads && !writes) 1244 if (reads && !writes)
1245 ad->current_batch_expires = 1245 ad->current_batch_expires =
1246 jiffies + ad->batch_expire[REQ_SYNC]; 1246 jiffies + ad->batch_expire[REQ_SYNC];
1247 goto dispatch_request; 1247 goto dispatch_request;
1248 } 1248 }
1249 } 1249 }
1250 1250
1251 /* 1251 /*
1252 * at this point we are not running a batch. select the appropriate 1252 * at this point we are not running a batch. select the appropriate
1253 * data direction (read / write) 1253 * data direction (read / write)
1254 */ 1254 */
1255 1255
1256 if (reads) { 1256 if (reads) {
1257 BUG_ON(RB_EMPTY(&ad->sort_list[REQ_SYNC])); 1257 BUG_ON(RB_EMPTY(&ad->sort_list[REQ_SYNC]));
1258 1258
1259 if (writes && ad->batch_data_dir == REQ_SYNC) 1259 if (writes && ad->batch_data_dir == REQ_SYNC)
1260 /* 1260 /*
1261 * Last batch was a read, switch to writes 1261 * Last batch was a read, switch to writes
1262 */ 1262 */
1263 goto dispatch_writes; 1263 goto dispatch_writes;
1264 1264
1265 if (ad->batch_data_dir == REQ_ASYNC) { 1265 if (ad->batch_data_dir == REQ_ASYNC) {
1266 WARN_ON(ad->new_batch); 1266 WARN_ON(ad->new_batch);
1267 ad->changed_batch = 1; 1267 ad->changed_batch = 1;
1268 } 1268 }
1269 ad->batch_data_dir = REQ_SYNC; 1269 ad->batch_data_dir = REQ_SYNC;
1270 arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); 1270 arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
1271 ad->last_check_fifo[ad->batch_data_dir] = jiffies; 1271 ad->last_check_fifo[ad->batch_data_dir] = jiffies;
1272 goto dispatch_request; 1272 goto dispatch_request;
1273 } 1273 }
1274 1274
1275 /* 1275 /*
1276 * the last batch was a read 1276 * the last batch was a read
1277 */ 1277 */
1278 1278
1279 if (writes) { 1279 if (writes) {
1280 dispatch_writes: 1280 dispatch_writes:
1281 BUG_ON(RB_EMPTY(&ad->sort_list[REQ_ASYNC])); 1281 BUG_ON(RB_EMPTY(&ad->sort_list[REQ_ASYNC]));
1282 1282
1283 if (ad->batch_data_dir == REQ_SYNC) { 1283 if (ad->batch_data_dir == REQ_SYNC) {
1284 ad->changed_batch = 1; 1284 ad->changed_batch = 1;
1285 1285
1286 /* 1286 /*
1287 * new_batch might be 1 when the queue runs out of 1287 * new_batch might be 1 when the queue runs out of
1288 * reads. A subsequent submission of a write might 1288 * reads. A subsequent submission of a write might
1289 * cause a change of batch before the read is finished. 1289 * cause a change of batch before the read is finished.
1290 */ 1290 */
1291 ad->new_batch = 0; 1291 ad->new_batch = 0;
1292 } 1292 }
1293 ad->batch_data_dir = REQ_ASYNC; 1293 ad->batch_data_dir = REQ_ASYNC;
1294 ad->current_write_count = ad->write_batch_count; 1294 ad->current_write_count = ad->write_batch_count;
1295 ad->write_batch_idled = 0; 1295 ad->write_batch_idled = 0;
1296 arq = ad->next_arq[ad->batch_data_dir]; 1296 arq = ad->next_arq[ad->batch_data_dir];
1297 goto dispatch_request; 1297 goto dispatch_request;
1298 } 1298 }
1299 1299
1300 BUG(); 1300 BUG();
1301 return 0; 1301 return 0;
1302 1302
1303 dispatch_request: 1303 dispatch_request:
1304 /* 1304 /*
1305 * If a request has expired, service it. 1305 * If a request has expired, service it.
1306 */ 1306 */
1307 1307
1308 if (as_fifo_expired(ad, ad->batch_data_dir)) { 1308 if (as_fifo_expired(ad, ad->batch_data_dir)) {
1309 fifo_expired: 1309 fifo_expired:
1310 arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); 1310 arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
1311 BUG_ON(arq == NULL); 1311 BUG_ON(arq == NULL);
1312 } 1312 }
1313 1313
1314 if (ad->changed_batch) { 1314 if (ad->changed_batch) {
1315 WARN_ON(ad->new_batch); 1315 WARN_ON(ad->new_batch);
1316 1316
1317 if (ad->nr_dispatched) 1317 if (ad->nr_dispatched)
1318 return 0; 1318 return 0;
1319 1319
1320 if (ad->batch_data_dir == REQ_ASYNC) 1320 if (ad->batch_data_dir == REQ_ASYNC)
1321 ad->current_batch_expires = jiffies + 1321 ad->current_batch_expires = jiffies +
1322 ad->batch_expire[REQ_ASYNC]; 1322 ad->batch_expire[REQ_ASYNC];
1323 else 1323 else
1324 ad->new_batch = 1; 1324 ad->new_batch = 1;
1325 1325
1326 ad->changed_batch = 0; 1326 ad->changed_batch = 0;
1327 } 1327 }
1328 1328
1329 /* 1329 /*
1330 * arq is the selected appropriate request. 1330 * arq is the selected appropriate request.
1331 */ 1331 */
1332 as_move_to_dispatch(ad, arq); 1332 as_move_to_dispatch(ad, arq);
1333 1333
1334 return 1; 1334 return 1;
1335 } 1335 }
1336 1336
1337 /* 1337 /*
1338 * add arq to rbtree and fifo 1338 * add arq to rbtree and fifo
1339 */ 1339 */
1340 static void as_add_request(request_queue_t *q, struct request *rq) 1340 static void as_add_request(request_queue_t *q, struct request *rq)
1341 { 1341 {
1342 struct as_data *ad = q->elevator->elevator_data; 1342 struct as_data *ad = q->elevator->elevator_data;
1343 struct as_rq *arq = RQ_DATA(rq); 1343 struct as_rq *arq = RQ_DATA(rq);
1344 int data_dir; 1344 int data_dir;
1345 1345
1346 arq->state = AS_RQ_NEW; 1346 arq->state = AS_RQ_NEW;
1347 1347
1348 if (rq_data_dir(arq->request) == READ 1348 if (rq_data_dir(arq->request) == READ
1349 || current->flags&PF_SYNCWRITE) 1349 || current->flags&PF_SYNCWRITE)
1350 arq->is_sync = 1; 1350 arq->is_sync = 1;
1351 else 1351 else
1352 arq->is_sync = 0; 1352 arq->is_sync = 0;
1353 data_dir = arq->is_sync; 1353 data_dir = arq->is_sync;
1354 1354
1355 arq->io_context = as_get_io_context(); 1355 arq->io_context = as_get_io_context();
1356 1356
1357 if (arq->io_context) { 1357 if (arq->io_context) {
1358 as_update_iohist(ad, arq->io_context->aic, arq->request); 1358 as_update_iohist(ad, arq->io_context->aic, arq->request);
1359 atomic_inc(&arq->io_context->aic->nr_queued); 1359 atomic_inc(&arq->io_context->aic->nr_queued);
1360 } 1360 }
1361 1361
1362 as_add_arq_rb(ad, arq); 1362 as_add_arq_rb(ad, arq);
1363 if (rq_mergeable(arq->request)) 1363 if (rq_mergeable(arq->request))
1364 as_add_arq_hash(ad, arq); 1364 as_add_arq_hash(ad, arq);
1365 1365
1366 /* 1366 /*
1367 * set expire time (only used for reads) and add to fifo list 1367 * set expire time (only used for reads) and add to fifo list
1368 */ 1368 */
1369 arq->expires = jiffies + ad->fifo_expire[data_dir]; 1369 arq->expires = jiffies + ad->fifo_expire[data_dir];
1370 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); 1370 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
1371 1371
1372 as_update_arq(ad, arq); /* keep state machine up to date */ 1372 as_update_arq(ad, arq); /* keep state machine up to date */
1373 arq->state = AS_RQ_QUEUED; 1373 arq->state = AS_RQ_QUEUED;
1374 } 1374 }
1375 1375
1376 static void as_activate_request(request_queue_t *q, struct request *rq) 1376 static void as_activate_request(request_queue_t *q, struct request *rq)
1377 { 1377 {
1378 struct as_rq *arq = RQ_DATA(rq); 1378 struct as_rq *arq = RQ_DATA(rq);
1379 1379
1380 WARN_ON(arq->state != AS_RQ_DISPATCHED); 1380 WARN_ON(arq->state != AS_RQ_DISPATCHED);
1381 arq->state = AS_RQ_REMOVED; 1381 arq->state = AS_RQ_REMOVED;
1382 if (arq->io_context && arq->io_context->aic) 1382 if (arq->io_context && arq->io_context->aic)
1383 atomic_dec(&arq->io_context->aic->nr_dispatched); 1383 atomic_dec(&arq->io_context->aic->nr_dispatched);
1384 } 1384 }
1385 1385
1386 static void as_deactivate_request(request_queue_t *q, struct request *rq) 1386 static void as_deactivate_request(request_queue_t *q, struct request *rq)
1387 { 1387 {
1388 struct as_rq *arq = RQ_DATA(rq); 1388 struct as_rq *arq = RQ_DATA(rq);
1389 1389
1390 WARN_ON(arq->state != AS_RQ_REMOVED); 1390 WARN_ON(arq->state != AS_RQ_REMOVED);
1391 arq->state = AS_RQ_DISPATCHED; 1391 arq->state = AS_RQ_DISPATCHED;
1392 if (arq->io_context && arq->io_context->aic) 1392 if (arq->io_context && arq->io_context->aic)
1393 atomic_inc(&arq->io_context->aic->nr_dispatched); 1393 atomic_inc(&arq->io_context->aic->nr_dispatched);
1394 } 1394 }
1395 1395
1396 /* 1396 /*
1397 * as_queue_empty tells us if there are requests left in the device. It may 1397 * as_queue_empty tells us if there are requests left in the device. It may
1398 * not be the case that a driver can get the next request even if the queue 1398 * not be the case that a driver can get the next request even if the queue
1399 * is not empty - it is used in the block layer to check for plugging and 1399 * is not empty - it is used in the block layer to check for plugging and
1400 * merging opportunities 1400 * merging opportunities
1401 */ 1401 */
1402 static int as_queue_empty(request_queue_t *q) 1402 static int as_queue_empty(request_queue_t *q)
1403 { 1403 {
1404 struct as_data *ad = q->elevator->elevator_data; 1404 struct as_data *ad = q->elevator->elevator_data;
1405 1405
1406 return list_empty(&ad->fifo_list[REQ_ASYNC]) 1406 return list_empty(&ad->fifo_list[REQ_ASYNC])
1407 && list_empty(&ad->fifo_list[REQ_SYNC]); 1407 && list_empty(&ad->fifo_list[REQ_SYNC]);
1408 } 1408 }
1409 1409
1410 static struct request *as_former_request(request_queue_t *q, 1410 static struct request *as_former_request(request_queue_t *q,
1411 struct request *rq) 1411 struct request *rq)
1412 { 1412 {
1413 struct as_rq *arq = RQ_DATA(rq); 1413 struct as_rq *arq = RQ_DATA(rq);
1414 struct rb_node *rbprev = rb_prev(&arq->rb_node); 1414 struct rb_node *rbprev = rb_prev(&arq->rb_node);
1415 struct request *ret = NULL; 1415 struct request *ret = NULL;
1416 1416
1417 if (rbprev) 1417 if (rbprev)
1418 ret = rb_entry_arq(rbprev)->request; 1418 ret = rb_entry_arq(rbprev)->request;
1419 1419
1420 return ret; 1420 return ret;
1421 } 1421 }
1422 1422
1423 static struct request *as_latter_request(request_queue_t *q, 1423 static struct request *as_latter_request(request_queue_t *q,
1424 struct request *rq) 1424 struct request *rq)
1425 { 1425 {
1426 struct as_rq *arq = RQ_DATA(rq); 1426 struct as_rq *arq = RQ_DATA(rq);
1427 struct rb_node *rbnext = rb_next(&arq->rb_node); 1427 struct rb_node *rbnext = rb_next(&arq->rb_node);
1428 struct request *ret = NULL; 1428 struct request *ret = NULL;
1429 1429
1430 if (rbnext) 1430 if (rbnext)
1431 ret = rb_entry_arq(rbnext)->request; 1431 ret = rb_entry_arq(rbnext)->request;
1432 1432
1433 return ret; 1433 return ret;
1434 } 1434 }
1435 1435
1436 static int 1436 static int
1437 as_merge(request_queue_t *q, struct request **req, struct bio *bio) 1437 as_merge(request_queue_t *q, struct request **req, struct bio *bio)
1438 { 1438 {
1439 struct as_data *ad = q->elevator->elevator_data; 1439 struct as_data *ad = q->elevator->elevator_data;
1440 sector_t rb_key = bio->bi_sector + bio_sectors(bio); 1440 sector_t rb_key = bio->bi_sector + bio_sectors(bio);
1441 struct request *__rq; 1441 struct request *__rq;
1442 int ret; 1442 int ret;
1443 1443
1444 /* 1444 /*
1445 * see if the merge hash can satisfy a back merge 1445 * see if the merge hash can satisfy a back merge
1446 */ 1446 */
1447 __rq = as_find_arq_hash(ad, bio->bi_sector); 1447 __rq = as_find_arq_hash(ad, bio->bi_sector);
1448 if (__rq) { 1448 if (__rq) {
1449 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); 1449 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
1450 1450
1451 if (elv_rq_merge_ok(__rq, bio)) { 1451 if (elv_rq_merge_ok(__rq, bio)) {
1452 ret = ELEVATOR_BACK_MERGE; 1452 ret = ELEVATOR_BACK_MERGE;
1453 goto out; 1453 goto out;
1454 } 1454 }
1455 } 1455 }
1456 1456
1457 /* 1457 /*
1458 * check for front merge 1458 * check for front merge
1459 */ 1459 */
1460 __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio)); 1460 __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
1461 if (__rq) { 1461 if (__rq) {
1462 BUG_ON(rb_key != rq_rb_key(__rq)); 1462 BUG_ON(rb_key != rq_rb_key(__rq));
1463 1463
1464 if (elv_rq_merge_ok(__rq, bio)) { 1464 if (elv_rq_merge_ok(__rq, bio)) {
1465 ret = ELEVATOR_FRONT_MERGE; 1465 ret = ELEVATOR_FRONT_MERGE;
1466 goto out; 1466 goto out;
1467 } 1467 }
1468 } 1468 }
1469 1469
1470 return ELEVATOR_NO_MERGE; 1470 return ELEVATOR_NO_MERGE;
1471 out: 1471 out:
1472 if (ret) { 1472 if (ret) {
1473 if (rq_mergeable(__rq)) 1473 if (rq_mergeable(__rq))
1474 as_hot_arq_hash(ad, RQ_DATA(__rq)); 1474 as_hot_arq_hash(ad, RQ_DATA(__rq));
1475 } 1475 }
1476 *req = __rq; 1476 *req = __rq;
1477 return ret; 1477 return ret;
1478 } 1478 }
1479 1479
1480 static void as_merged_request(request_queue_t *q, struct request *req) 1480 static void as_merged_request(request_queue_t *q, struct request *req)
1481 { 1481 {
1482 struct as_data *ad = q->elevator->elevator_data; 1482 struct as_data *ad = q->elevator->elevator_data;
1483 struct as_rq *arq = RQ_DATA(req); 1483 struct as_rq *arq = RQ_DATA(req);
1484 1484
1485 /* 1485 /*
1486 * hash always needs to be repositioned, key is end sector 1486 * hash always needs to be repositioned, key is end sector
1487 */ 1487 */
1488 as_del_arq_hash(arq); 1488 as_del_arq_hash(arq);
1489 as_add_arq_hash(ad, arq); 1489 as_add_arq_hash(ad, arq);
1490 1490
1491 /* 1491 /*
1492 * if the merge was a front merge, we need to reposition request 1492 * if the merge was a front merge, we need to reposition request
1493 */ 1493 */
1494 if (rq_rb_key(req) != arq->rb_key) { 1494 if (rq_rb_key(req) != arq->rb_key) {
1495 as_del_arq_rb(ad, arq); 1495 as_del_arq_rb(ad, arq);
1496 as_add_arq_rb(ad, arq); 1496 as_add_arq_rb(ad, arq);
1497 /* 1497 /*
1498 * Note! At this stage of this and the next function, our next 1498 * Note! At this stage of this and the next function, our next
1499 * request may not be optimal - eg the request may have "grown" 1499 * request may not be optimal - eg the request may have "grown"
1500 * behind the disk head. We currently don't bother adjusting. 1500 * behind the disk head. We currently don't bother adjusting.
1501 */ 1501 */
1502 } 1502 }
1503 } 1503 }
1504 1504
1505 static void as_merged_requests(request_queue_t *q, struct request *req, 1505 static void as_merged_requests(request_queue_t *q, struct request *req,
1506 struct request *next) 1506 struct request *next)
1507 { 1507 {
1508 struct as_data *ad = q->elevator->elevator_data; 1508 struct as_data *ad = q->elevator->elevator_data;
1509 struct as_rq *arq = RQ_DATA(req); 1509 struct as_rq *arq = RQ_DATA(req);
1510 struct as_rq *anext = RQ_DATA(next); 1510 struct as_rq *anext = RQ_DATA(next);
1511 1511
1512 BUG_ON(!arq); 1512 BUG_ON(!arq);
1513 BUG_ON(!anext); 1513 BUG_ON(!anext);
1514 1514
1515 /* 1515 /*
1516 * reposition arq (this is the merged request) in hash, and in rbtree 1516 * reposition arq (this is the merged request) in hash, and in rbtree
1517 * in case of a front merge 1517 * in case of a front merge
1518 */ 1518 */
1519 as_del_arq_hash(arq); 1519 as_del_arq_hash(arq);
1520 as_add_arq_hash(ad, arq); 1520 as_add_arq_hash(ad, arq);
1521 1521
1522 if (rq_rb_key(req) != arq->rb_key) { 1522 if (rq_rb_key(req) != arq->rb_key) {
1523 as_del_arq_rb(ad, arq); 1523 as_del_arq_rb(ad, arq);
1524 as_add_arq_rb(ad, arq); 1524 as_add_arq_rb(ad, arq);
1525 } 1525 }
1526 1526
1527 /* 1527 /*
1528 * if anext expires before arq, assign its expire time to arq 1528 * if anext expires before arq, assign its expire time to arq
1529 * and move into anext position (anext will be deleted) in fifo 1529 * and move into anext position (anext will be deleted) in fifo
1530 */ 1530 */
1531 if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) { 1531 if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
1532 if (time_before(anext->expires, arq->expires)) { 1532 if (time_before(anext->expires, arq->expires)) {
1533 list_move(&arq->fifo, &anext->fifo); 1533 list_move(&arq->fifo, &anext->fifo);
1534 arq->expires = anext->expires; 1534 arq->expires = anext->expires;
1535 /* 1535 /*
1536 * Don't copy here but swap, because when anext is 1536 * Don't copy here but swap, because when anext is
1537 * removed below, it must contain the unused context 1537 * removed below, it must contain the unused context
1538 */ 1538 */
1539 swap_io_context(&arq->io_context, &anext->io_context); 1539 swap_io_context(&arq->io_context, &anext->io_context);
1540 } 1540 }
1541 } 1541 }
1542 1542
1543 /* 1543 /*
1544 * kill knowledge of next, this one is a goner 1544 * kill knowledge of next, this one is a goner
1545 */ 1545 */
1546 as_remove_queued_request(q, next); 1546 as_remove_queued_request(q, next);
1547 as_put_io_context(anext); 1547 as_put_io_context(anext);
1548 1548
1549 anext->state = AS_RQ_MERGED; 1549 anext->state = AS_RQ_MERGED;
1550 } 1550 }
1551 1551
1552 /* 1552 /*
1553 * This is executed in a "deferred" process context, by kblockd. It calls the 1553 * This is executed in a "deferred" process context, by kblockd. It calls the
1554 * driver's request_fn so the driver can submit that request. 1554 * driver's request_fn so the driver can submit that request.
1555 * 1555 *
1556 * IMPORTANT! This guy will reenter the elevator, so set up all queue global 1556 * IMPORTANT! This guy will reenter the elevator, so set up all queue global
1557 * state before calling, and don't rely on any state over calls. 1557 * state before calling, and don't rely on any state over calls.
1558 * 1558 *
1559 * FIXME! dispatch queue is not a queue at all! 1559 * FIXME! dispatch queue is not a queue at all!
1560 */ 1560 */
1561 static void as_work_handler(void *data) 1561 static void as_work_handler(void *data)
1562 { 1562 {
1563 struct request_queue *q = data; 1563 struct request_queue *q = data;
1564 unsigned long flags; 1564 unsigned long flags;
1565 1565
1566 spin_lock_irqsave(q->queue_lock, flags); 1566 spin_lock_irqsave(q->queue_lock, flags);
1567 if (!as_queue_empty(q)) 1567 if (!as_queue_empty(q))
1568 q->request_fn(q); 1568 q->request_fn(q);
1569 spin_unlock_irqrestore(q->queue_lock, flags); 1569 spin_unlock_irqrestore(q->queue_lock, flags);
1570 } 1570 }
1571 1571
1572 static void as_put_request(request_queue_t *q, struct request *rq) 1572 static void as_put_request(request_queue_t *q, struct request *rq)
1573 { 1573 {
1574 struct as_data *ad = q->elevator->elevator_data; 1574 struct as_data *ad = q->elevator->elevator_data;
1575 struct as_rq *arq = RQ_DATA(rq); 1575 struct as_rq *arq = RQ_DATA(rq);
1576 1576
1577 if (!arq) { 1577 if (!arq) {
1578 WARN_ON(1); 1578 WARN_ON(1);
1579 return; 1579 return;
1580 } 1580 }
1581 1581
1582 if (unlikely(arq->state != AS_RQ_POSTSCHED && 1582 if (unlikely(arq->state != AS_RQ_POSTSCHED &&
1583 arq->state != AS_RQ_PRESCHED && 1583 arq->state != AS_RQ_PRESCHED &&
1584 arq->state != AS_RQ_MERGED)) { 1584 arq->state != AS_RQ_MERGED)) {
1585 printk("arq->state %d\n", arq->state); 1585 printk("arq->state %d\n", arq->state);
1586 WARN_ON(1); 1586 WARN_ON(1);
1587 } 1587 }
1588 1588
1589 mempool_free(arq, ad->arq_pool); 1589 mempool_free(arq, ad->arq_pool);
1590 rq->elevator_private = NULL; 1590 rq->elevator_private = NULL;
1591 } 1591 }
1592 1592
1593 static int as_set_request(request_queue_t *q, struct request *rq, 1593 static int as_set_request(request_queue_t *q, struct request *rq,
1594 struct bio *bio, gfp_t gfp_mask) 1594 struct bio *bio, gfp_t gfp_mask)
1595 { 1595 {
1596 struct as_data *ad = q->elevator->elevator_data; 1596 struct as_data *ad = q->elevator->elevator_data;
1597 struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); 1597 struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
1598 1598
1599 if (arq) { 1599 if (arq) {
1600 memset(arq, 0, sizeof(*arq)); 1600 memset(arq, 0, sizeof(*arq));
1601 RB_CLEAR(&arq->rb_node); 1601 RB_CLEAR(&arq->rb_node);
1602 arq->request = rq; 1602 arq->request = rq;
1603 arq->state = AS_RQ_PRESCHED; 1603 arq->state = AS_RQ_PRESCHED;
1604 arq->io_context = NULL; 1604 arq->io_context = NULL;
1605 INIT_LIST_HEAD(&arq->hash); 1605 INIT_LIST_HEAD(&arq->hash);
1606 arq->on_hash = 0; 1606 arq->on_hash = 0;
1607 INIT_LIST_HEAD(&arq->fifo); 1607 INIT_LIST_HEAD(&arq->fifo);
1608 rq->elevator_private = arq; 1608 rq->elevator_private = arq;
1609 return 0; 1609 return 0;
1610 } 1610 }
1611 1611
1612 return 1; 1612 return 1;
1613 } 1613 }
1614 1614
1615 static int as_may_queue(request_queue_t *q, int rw, struct bio *bio) 1615 static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
1616 { 1616 {
1617 int ret = ELV_MQUEUE_MAY; 1617 int ret = ELV_MQUEUE_MAY;
1618 struct as_data *ad = q->elevator->elevator_data; 1618 struct as_data *ad = q->elevator->elevator_data;
1619 struct io_context *ioc; 1619 struct io_context *ioc;
1620 if (ad->antic_status == ANTIC_WAIT_REQ || 1620 if (ad->antic_status == ANTIC_WAIT_REQ ||
1621 ad->antic_status == ANTIC_WAIT_NEXT) { 1621 ad->antic_status == ANTIC_WAIT_NEXT) {
1622 ioc = as_get_io_context(); 1622 ioc = as_get_io_context();
1623 if (ad->io_context == ioc) 1623 if (ad->io_context == ioc)
1624 ret = ELV_MQUEUE_MUST; 1624 ret = ELV_MQUEUE_MUST;
1625 put_io_context(ioc); 1625 put_io_context(ioc);
1626 } 1626 }
1627 1627
1628 return ret; 1628 return ret;
1629 } 1629 }
1630 1630
1631 static void as_exit_queue(elevator_t *e) 1631 static void as_exit_queue(elevator_t *e)
1632 { 1632 {
1633 struct as_data *ad = e->elevator_data; 1633 struct as_data *ad = e->elevator_data;
1634 1634
1635 del_timer_sync(&ad->antic_timer); 1635 del_timer_sync(&ad->antic_timer);
1636 kblockd_flush(); 1636 kblockd_flush();
1637 1637
1638 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); 1638 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
1639 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); 1639 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
1640 1640
1641 mempool_destroy(ad->arq_pool); 1641 mempool_destroy(ad->arq_pool);
1642 put_io_context(ad->io_context); 1642 put_io_context(ad->io_context);
1643 kfree(ad->hash); 1643 kfree(ad->hash);
1644 kfree(ad); 1644 kfree(ad);
1645 } 1645 }
1646 1646
1647 /* 1647 /*
1648 * initialize elevator private data (as_data), and alloc a arq for 1648 * initialize elevator private data (as_data), and alloc a arq for
1649 * each request on the free lists 1649 * each request on the free lists
1650 */ 1650 */
1651 static int as_init_queue(request_queue_t *q, elevator_t *e) 1651 static int as_init_queue(request_queue_t *q, elevator_t *e)
1652 { 1652 {
1653 struct as_data *ad; 1653 struct as_data *ad;
1654 int i; 1654 int i;
1655 1655
1656 if (!arq_pool) 1656 if (!arq_pool)
1657 return -ENOMEM; 1657 return -ENOMEM;
1658 1658
1659 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); 1659 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
1660 if (!ad) 1660 if (!ad)
1661 return -ENOMEM; 1661 return -ENOMEM;
1662 memset(ad, 0, sizeof(*ad)); 1662 memset(ad, 0, sizeof(*ad));
1663 1663
1664 ad->q = q; /* Identify what queue the data belongs to */ 1664 ad->q = q; /* Identify what queue the data belongs to */
1665 1665
1666 ad->hash = kmalloc_node(sizeof(struct list_head)*AS_HASH_ENTRIES, 1666 ad->hash = kmalloc_node(sizeof(struct list_head)*AS_HASH_ENTRIES,
1667 GFP_KERNEL, q->node); 1667 GFP_KERNEL, q->node);
1668 if (!ad->hash) { 1668 if (!ad->hash) {
1669 kfree(ad); 1669 kfree(ad);
1670 return -ENOMEM; 1670 return -ENOMEM;
1671 } 1671 }
1672 1672
1673 ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 1673 ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1674 mempool_free_slab, arq_pool, q->node); 1674 mempool_free_slab, arq_pool, q->node);
1675 if (!ad->arq_pool) { 1675 if (!ad->arq_pool) {
1676 kfree(ad->hash); 1676 kfree(ad->hash);
1677 kfree(ad); 1677 kfree(ad);
1678 return -ENOMEM; 1678 return -ENOMEM;
1679 } 1679 }
1680 1680
1681 /* anticipatory scheduling helpers */ 1681 /* anticipatory scheduling helpers */
1682 ad->antic_timer.function = as_antic_timeout; 1682 ad->antic_timer.function = as_antic_timeout;
1683 ad->antic_timer.data = (unsigned long)q; 1683 ad->antic_timer.data = (unsigned long)q;
1684 init_timer(&ad->antic_timer); 1684 init_timer(&ad->antic_timer);
1685 INIT_WORK(&ad->antic_work, as_work_handler, q); 1685 INIT_WORK(&ad->antic_work, as_work_handler, q);
1686 1686
1687 for (i = 0; i < AS_HASH_ENTRIES; i++) 1687 for (i = 0; i < AS_HASH_ENTRIES; i++)
1688 INIT_LIST_HEAD(&ad->hash[i]); 1688 INIT_LIST_HEAD(&ad->hash[i]);
1689 1689
1690 INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); 1690 INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
1691 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); 1691 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
1692 ad->sort_list[REQ_SYNC] = RB_ROOT; 1692 ad->sort_list[REQ_SYNC] = RB_ROOT;
1693 ad->sort_list[REQ_ASYNC] = RB_ROOT; 1693 ad->sort_list[REQ_ASYNC] = RB_ROOT;
1694 ad->fifo_expire[REQ_SYNC] = default_read_expire; 1694 ad->fifo_expire[REQ_SYNC] = default_read_expire;
1695 ad->fifo_expire[REQ_ASYNC] = default_write_expire; 1695 ad->fifo_expire[REQ_ASYNC] = default_write_expire;
1696 ad->antic_expire = default_antic_expire; 1696 ad->antic_expire = default_antic_expire;
1697 ad->batch_expire[REQ_SYNC] = default_read_batch_expire; 1697 ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
1698 ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; 1698 ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
1699 e->elevator_data = ad; 1699 e->elevator_data = ad;
1700 1700
1701 ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; 1701 ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
1702 ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; 1702 ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
1703 if (ad->write_batch_count < 2) 1703 if (ad->write_batch_count < 2)
1704 ad->write_batch_count = 2; 1704 ad->write_batch_count = 2;
1705 1705
1706 return 0; 1706 return 0;
1707 } 1707 }
1708 1708
1709 /* 1709 /*
1710 * sysfs parts below 1710 * sysfs parts below
1711 */ 1711 */
1712 1712
1713 static ssize_t 1713 static ssize_t
1714 as_var_show(unsigned int var, char *page) 1714 as_var_show(unsigned int var, char *page)
1715 { 1715 {
1716 return sprintf(page, "%d\n", var); 1716 return sprintf(page, "%d\n", var);
1717 } 1717 }
1718 1718
1719 static ssize_t 1719 static ssize_t
1720 as_var_store(unsigned long *var, const char *page, size_t count) 1720 as_var_store(unsigned long *var, const char *page, size_t count)
1721 { 1721 {
1722 char *p = (char *) page; 1722 char *p = (char *) page;
1723 1723
1724 *var = simple_strtoul(p, &p, 10); 1724 *var = simple_strtoul(p, &p, 10);
1725 return count; 1725 return count;
1726 } 1726 }
1727 1727
1728 static ssize_t as_est_show(elevator_t *e, char *page) 1728 static ssize_t est_time_show(elevator_t *e, char *page)
1729 { 1729 {
1730 struct as_data *ad = e->elevator_data; 1730 struct as_data *ad = e->elevator_data;
1731 int pos = 0; 1731 int pos = 0;
1732 1732
1733 pos += sprintf(page+pos, "%lu %% exit probability\n", 1733 pos += sprintf(page+pos, "%lu %% exit probability\n",
1734 100*ad->exit_prob/256); 1734 100*ad->exit_prob/256);
1735 pos += sprintf(page+pos, "%lu %% probability of exiting without a " 1735 pos += sprintf(page+pos, "%lu %% probability of exiting without a "
1736 "cooperating process submitting IO\n", 1736 "cooperating process submitting IO\n",
1737 100*ad->exit_no_coop/256); 1737 100*ad->exit_no_coop/256);
1738 pos += sprintf(page+pos, "%lu ms new thinktime\n", ad->new_ttime_mean); 1738 pos += sprintf(page+pos, "%lu ms new thinktime\n", ad->new_ttime_mean);
1739 pos += sprintf(page+pos, "%llu sectors new seek distance\n", 1739 pos += sprintf(page+pos, "%llu sectors new seek distance\n",
1740 (unsigned long long)ad->new_seek_mean); 1740 (unsigned long long)ad->new_seek_mean);
1741 1741
1742 return pos; 1742 return pos;
1743 } 1743 }
1744 1744
1745 #define SHOW_FUNCTION(__FUNC, __VAR) \ 1745 #define SHOW_FUNCTION(__FUNC, __VAR) \
1746 static ssize_t __FUNC(elevator_t *e, char *page) \ 1746 static ssize_t __FUNC(elevator_t *e, char *page) \
1747 { \ 1747 { \
1748 struct as_data *ad = e->elevator_data; \ 1748 struct as_data *ad = e->elevator_data; \
1749 return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ 1749 return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
1750 } 1750 }
1751 SHOW_FUNCTION(as_readexpire_show, ad->fifo_expire[REQ_SYNC]); 1751 SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]);
1752 SHOW_FUNCTION(as_writeexpire_show, ad->fifo_expire[REQ_ASYNC]); 1752 SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]);
1753 SHOW_FUNCTION(as_anticexpire_show, ad->antic_expire); 1753 SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire);
1754 SHOW_FUNCTION(as_read_batchexpire_show, ad->batch_expire[REQ_SYNC]); 1754 SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]);
1755 SHOW_FUNCTION(as_write_batchexpire_show, ad->batch_expire[REQ_ASYNC]); 1755 SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
1756 #undef SHOW_FUNCTION 1756 #undef SHOW_FUNCTION
1757 1757
1758 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ 1758 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
1759 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 1759 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
1760 { \ 1760 { \
1761 struct as_data *ad = e->elevator_data; \ 1761 struct as_data *ad = e->elevator_data; \
1762 int ret = as_var_store(__PTR, (page), count); \ 1762 int ret = as_var_store(__PTR, (page), count); \
1763 if (*(__PTR) < (MIN)) \ 1763 if (*(__PTR) < (MIN)) \
1764 *(__PTR) = (MIN); \ 1764 *(__PTR) = (MIN); \
1765 else if (*(__PTR) > (MAX)) \ 1765 else if (*(__PTR) > (MAX)) \
1766 *(__PTR) = (MAX); \ 1766 *(__PTR) = (MAX); \
1767 *(__PTR) = msecs_to_jiffies(*(__PTR)); \ 1767 *(__PTR) = msecs_to_jiffies(*(__PTR)); \
1768 return ret; \ 1768 return ret; \
1769 } 1769 }
1770 STORE_FUNCTION(as_readexpire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); 1770 STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX);
1771 STORE_FUNCTION(as_writeexpire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX); 1771 STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX);
1772 STORE_FUNCTION(as_anticexpire_store, &ad->antic_expire, 0, INT_MAX); 1772 STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX);
1773 STORE_FUNCTION(as_read_batchexpire_store, 1773 STORE_FUNCTION(as_read_batch_expire_store,
1774 &ad->batch_expire[REQ_SYNC], 0, INT_MAX); 1774 &ad->batch_expire[REQ_SYNC], 0, INT_MAX);
1775 STORE_FUNCTION(as_write_batchexpire_store, 1775 STORE_FUNCTION(as_write_batch_expire_store,
1776 &ad->batch_expire[REQ_ASYNC], 0, INT_MAX); 1776 &ad->batch_expire[REQ_ASYNC], 0, INT_MAX);
1777 #undef STORE_FUNCTION 1777 #undef STORE_FUNCTION
1778 1778
1779 static struct elv_fs_entry as_est_entry = { 1779 #define AS_ATTR(name) \
1780 .attr = {.name = "est_time", .mode = S_IRUGO }, 1780 __ATTR(name, S_IRUGO|S_IWUSR, as_##name##_show, as_##name##_store)
1781 .show = as_est_show,
1782 };
1783 static struct elv_fs_entry as_readexpire_entry = {
1784 .attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR },
1785 .show = as_readexpire_show,
1786 .store = as_readexpire_store,
1787 };
1788 static struct elv_fs_entry as_writeexpire_entry = {
1789 .attr = {.name = "write_expire", .mode = S_IRUGO | S_IWUSR },
1790 .show = as_writeexpire_show,
1791 .store = as_writeexpire_store,
1792 };
1793 static struct elv_fs_entry as_anticexpire_entry = {
1794 .attr = {.name = "antic_expire", .mode = S_IRUGO | S_IWUSR },
1795 .show = as_anticexpire_show,
1796 .store = as_anticexpire_store,
1797 };
1798 static struct elv_fs_entry as_read_batchexpire_entry = {
1799 .attr = {.name = "read_batch_expire", .mode = S_IRUGO | S_IWUSR },
1800 .show = as_read_batchexpire_show,
1801 .store = as_read_batchexpire_store,
1802 };
1803 static struct elv_fs_entry as_write_batchexpire_entry = {
1804 .attr = {.name = "write_batch_expire", .mode = S_IRUGO | S_IWUSR },
1805 .show = as_write_batchexpire_show,
1806 .store = as_write_batchexpire_store,
1807 };
1808 1781
1809 static struct attribute *as_attrs[] = { 1782 static struct elv_fs_entry as_attrs[] = {
1810 &as_est_entry.attr, 1783 __ATTR_RO(est_time),
1811 &as_readexpire_entry.attr, 1784 AS_ATTR(read_expire),
1812 &as_writeexpire_entry.attr, 1785 AS_ATTR(write_expire),
1813 &as_anticexpire_entry.attr, 1786 AS_ATTR(antic_expire),
1814 &as_read_batchexpire_entry.attr, 1787 AS_ATTR(read_batch_expire),
1815 &as_write_batchexpire_entry.attr, 1788 AS_ATTR(write_batch_expire),
1816 NULL, 1789 __ATTR_NULL
1817 }; 1790 };
1818 1791
1819 static struct elevator_type iosched_as = { 1792 static struct elevator_type iosched_as = {
1820 .ops = { 1793 .ops = {
1821 .elevator_merge_fn = as_merge, 1794 .elevator_merge_fn = as_merge,
1822 .elevator_merged_fn = as_merged_request, 1795 .elevator_merged_fn = as_merged_request,
1823 .elevator_merge_req_fn = as_merged_requests, 1796 .elevator_merge_req_fn = as_merged_requests,
1824 .elevator_dispatch_fn = as_dispatch_request, 1797 .elevator_dispatch_fn = as_dispatch_request,
1825 .elevator_add_req_fn = as_add_request, 1798 .elevator_add_req_fn = as_add_request,
1826 .elevator_activate_req_fn = as_activate_request, 1799 .elevator_activate_req_fn = as_activate_request,
1827 .elevator_deactivate_req_fn = as_deactivate_request, 1800 .elevator_deactivate_req_fn = as_deactivate_request,
1828 .elevator_queue_empty_fn = as_queue_empty, 1801 .elevator_queue_empty_fn = as_queue_empty,
1829 .elevator_completed_req_fn = as_completed_request, 1802 .elevator_completed_req_fn = as_completed_request,
1830 .elevator_former_req_fn = as_former_request, 1803 .elevator_former_req_fn = as_former_request,
1831 .elevator_latter_req_fn = as_latter_request, 1804 .elevator_latter_req_fn = as_latter_request,
1832 .elevator_set_req_fn = as_set_request, 1805 .elevator_set_req_fn = as_set_request,
1833 .elevator_put_req_fn = as_put_request, 1806 .elevator_put_req_fn = as_put_request,
1834 .elevator_may_queue_fn = as_may_queue, 1807 .elevator_may_queue_fn = as_may_queue,
1835 .elevator_init_fn = as_init_queue, 1808 .elevator_init_fn = as_init_queue,
1836 .elevator_exit_fn = as_exit_queue, 1809 .elevator_exit_fn = as_exit_queue,
1837 .trim = as_trim, 1810 .trim = as_trim,
1838 }, 1811 },
1839 1812
1840 .elevator_attrs = as_attrs, 1813 .elevator_attrs = as_attrs,
1841 .elevator_name = "anticipatory", 1814 .elevator_name = "anticipatory",
1842 .elevator_owner = THIS_MODULE, 1815 .elevator_owner = THIS_MODULE,
1843 }; 1816 };
1844 1817
1845 static int __init as_init(void) 1818 static int __init as_init(void)
1846 { 1819 {
1847 int ret; 1820 int ret;
1848 1821
1849 arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq), 1822 arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
1850 0, 0, NULL, NULL); 1823 0, 0, NULL, NULL);
1851 if (!arq_pool) 1824 if (!arq_pool)
1852 return -ENOMEM; 1825 return -ENOMEM;
1853 1826
1854 ret = elv_register(&iosched_as); 1827 ret = elv_register(&iosched_as);
1855 if (!ret) { 1828 if (!ret) {
1856 /* 1829 /*
1857 * don't allow AS to get unregistered, since we would have 1830 * don't allow AS to get unregistered, since we would have
1858 * to browse all tasks in the system and release their 1831 * to browse all tasks in the system and release their
1859 * as_io_context first 1832 * as_io_context first
1860 */ 1833 */
1861 __module_get(THIS_MODULE); 1834 __module_get(THIS_MODULE);
1862 return 0; 1835 return 0;
1863 } 1836 }
1864 1837
1865 kmem_cache_destroy(arq_pool); 1838 kmem_cache_destroy(arq_pool);
1866 return ret; 1839 return ret;
1867 } 1840 }
1868 1841
1869 static void __exit as_exit(void) 1842 static void __exit as_exit(void)
1870 { 1843 {
1871 DECLARE_COMPLETION(all_gone); 1844 DECLARE_COMPLETION(all_gone);
1872 elv_unregister(&iosched_as); 1845 elv_unregister(&iosched_as);
1873 ioc_gone = &all_gone; 1846 ioc_gone = &all_gone;
1874 barrier(); 1847 barrier();
1875 if (atomic_read(&ioc_count)) 1848 if (atomic_read(&ioc_count))
1876 complete(ioc_gone); 1849 complete(ioc_gone);
1877 synchronize_rcu(); 1850 synchronize_rcu();
1878 kmem_cache_destroy(arq_pool); 1851 kmem_cache_destroy(arq_pool);
1879 } 1852 }
1880 1853
1881 module_init(as_init); 1854 module_init(as_init);
1882 module_exit(as_exit); 1855 module_exit(as_exit);
1883 1856
1884 MODULE_AUTHOR("Nick Piggin"); 1857 MODULE_AUTHOR("Nick Piggin");
1885 MODULE_LICENSE("GPL"); 1858 MODULE_LICENSE("GPL");
1886 MODULE_DESCRIPTION("anticipatory IO scheduler"); 1859 MODULE_DESCRIPTION("anticipatory IO scheduler");
1887 1860
1 /* 1 /*
2 * CFQ, or complete fairness queueing, disk scheduler. 2 * CFQ, or complete fairness queueing, disk scheduler.
3 * 3 *
4 * Based on ideas from a previously unfinished io 4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 * 6 *
7 * Copyright (C) 2003 Jens Axboe <axboe@suse.de> 7 * Copyright (C) 2003 Jens Axboe <axboe@suse.de>
8 */ 8 */
9 #include <linux/config.h> 9 #include <linux/config.h>
10 #include <linux/module.h> 10 #include <linux/module.h>
11 #include <linux/blkdev.h> 11 #include <linux/blkdev.h>
12 #include <linux/elevator.h> 12 #include <linux/elevator.h>
13 #include <linux/hash.h> 13 #include <linux/hash.h>
14 #include <linux/rbtree.h> 14 #include <linux/rbtree.h>
15 #include <linux/ioprio.h> 15 #include <linux/ioprio.h>
16 16
17 /* 17 /*
18 * tunables 18 * tunables
19 */ 19 */
20 static const int cfq_quantum = 4; /* max queue in one round of service */ 20 static const int cfq_quantum = 4; /* max queue in one round of service */
21 static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ 21 static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
22 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 22 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
23 static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 23 static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
24 static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ 24 static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
25 25
26 static const int cfq_slice_sync = HZ / 10; 26 static const int cfq_slice_sync = HZ / 10;
27 static int cfq_slice_async = HZ / 25; 27 static int cfq_slice_async = HZ / 25;
28 static const int cfq_slice_async_rq = 2; 28 static const int cfq_slice_async_rq = 2;
29 static int cfq_slice_idle = HZ / 100; 29 static int cfq_slice_idle = HZ / 100;
30 30
31 #define CFQ_IDLE_GRACE (HZ / 10) 31 #define CFQ_IDLE_GRACE (HZ / 10)
32 #define CFQ_SLICE_SCALE (5) 32 #define CFQ_SLICE_SCALE (5)
33 33
34 #define CFQ_KEY_ASYNC (0) 34 #define CFQ_KEY_ASYNC (0)
35 #define CFQ_KEY_ANY (0xffff) 35 #define CFQ_KEY_ANY (0xffff)
36 36
37 /* 37 /*
38 * disable queueing at the driver/hardware level 38 * disable queueing at the driver/hardware level
39 */ 39 */
40 static const int cfq_max_depth = 2; 40 static const int cfq_max_depth = 2;
41 41
42 static DEFINE_RWLOCK(cfq_exit_lock); 42 static DEFINE_RWLOCK(cfq_exit_lock);
43 43
44 /* 44 /*
45 * for the hash of cfqq inside the cfqd 45 * for the hash of cfqq inside the cfqd
46 */ 46 */
47 #define CFQ_QHASH_SHIFT 6 47 #define CFQ_QHASH_SHIFT 6
48 #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) 48 #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
49 #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) 49 #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
50 50
51 /* 51 /*
52 * for the hash of crq inside the cfqq 52 * for the hash of crq inside the cfqq
53 */ 53 */
54 #define CFQ_MHASH_SHIFT 6 54 #define CFQ_MHASH_SHIFT 6
55 #define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) 55 #define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
56 #define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) 56 #define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
57 #define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT) 57 #define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
58 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 58 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
59 #define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) 59 #define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
60 60
61 #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) 61 #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
62 #define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) 62 #define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
63 63
64 #define RQ_DATA(rq) (rq)->elevator_private 64 #define RQ_DATA(rq) (rq)->elevator_private
65 65
66 /* 66 /*
67 * rb-tree defines 67 * rb-tree defines
68 */ 68 */
69 #define RB_NONE (2) 69 #define RB_NONE (2)
70 #define RB_EMPTY(node) ((node)->rb_node == NULL) 70 #define RB_EMPTY(node) ((node)->rb_node == NULL)
71 #define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE 71 #define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE
72 #define RB_CLEAR(node) do { \ 72 #define RB_CLEAR(node) do { \
73 (node)->rb_parent = NULL; \ 73 (node)->rb_parent = NULL; \
74 RB_CLEAR_COLOR((node)); \ 74 RB_CLEAR_COLOR((node)); \
75 (node)->rb_right = NULL; \ 75 (node)->rb_right = NULL; \
76 (node)->rb_left = NULL; \ 76 (node)->rb_left = NULL; \
77 } while (0) 77 } while (0)
78 #define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL) 78 #define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
79 #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) 79 #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
80 #define rq_rb_key(rq) (rq)->sector 80 #define rq_rb_key(rq) (rq)->sector
81 81
82 static kmem_cache_t *crq_pool; 82 static kmem_cache_t *crq_pool;
83 static kmem_cache_t *cfq_pool; 83 static kmem_cache_t *cfq_pool;
84 static kmem_cache_t *cfq_ioc_pool; 84 static kmem_cache_t *cfq_ioc_pool;
85 85
86 static atomic_t ioc_count = ATOMIC_INIT(0); 86 static atomic_t ioc_count = ATOMIC_INIT(0);
87 static struct completion *ioc_gone; 87 static struct completion *ioc_gone;
88 88
89 #define CFQ_PRIO_LISTS IOPRIO_BE_NR 89 #define CFQ_PRIO_LISTS IOPRIO_BE_NR
90 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 90 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
91 #define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) 91 #define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
92 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 92 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
93 93
94 #define ASYNC (0) 94 #define ASYNC (0)
95 #define SYNC (1) 95 #define SYNC (1)
96 96
97 #define cfq_cfqq_dispatched(cfqq) \ 97 #define cfq_cfqq_dispatched(cfqq) \
98 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC]) 98 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])
99 99
100 #define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) 100 #define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)
101 101
102 #define cfq_cfqq_sync(cfqq) \ 102 #define cfq_cfqq_sync(cfqq) \
103 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) 103 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
104 104
105 /* 105 /*
106 * Per block device queue structure 106 * Per block device queue structure
107 */ 107 */
108 struct cfq_data { 108 struct cfq_data {
109 request_queue_t *queue; 109 request_queue_t *queue;
110 110
111 /* 111 /*
112 * rr list of queues with requests and the count of them 112 * rr list of queues with requests and the count of them
113 */ 113 */
114 struct list_head rr_list[CFQ_PRIO_LISTS]; 114 struct list_head rr_list[CFQ_PRIO_LISTS];
115 struct list_head busy_rr; 115 struct list_head busy_rr;
116 struct list_head cur_rr; 116 struct list_head cur_rr;
117 struct list_head idle_rr; 117 struct list_head idle_rr;
118 unsigned int busy_queues; 118 unsigned int busy_queues;
119 119
120 /* 120 /*
121 * non-ordered list of empty cfqq's 121 * non-ordered list of empty cfqq's
122 */ 122 */
123 struct list_head empty_list; 123 struct list_head empty_list;
124 124
125 /* 125 /*
126 * cfqq lookup hash 126 * cfqq lookup hash
127 */ 127 */
128 struct hlist_head *cfq_hash; 128 struct hlist_head *cfq_hash;
129 129
130 /* 130 /*
131 * global crq hash for all queues 131 * global crq hash for all queues
132 */ 132 */
133 struct hlist_head *crq_hash; 133 struct hlist_head *crq_hash;
134 134
135 unsigned int max_queued; 135 unsigned int max_queued;
136 136
137 mempool_t *crq_pool; 137 mempool_t *crq_pool;
138 138
139 int rq_in_driver; 139 int rq_in_driver;
140 140
141 /* 141 /*
142 * schedule slice state info 142 * schedule slice state info
143 */ 143 */
144 /* 144 /*
145 * idle window management 145 * idle window management
146 */ 146 */
147 struct timer_list idle_slice_timer; 147 struct timer_list idle_slice_timer;
148 struct work_struct unplug_work; 148 struct work_struct unplug_work;
149 149
150 struct cfq_queue *active_queue; 150 struct cfq_queue *active_queue;
151 struct cfq_io_context *active_cic; 151 struct cfq_io_context *active_cic;
152 int cur_prio, cur_end_prio; 152 int cur_prio, cur_end_prio;
153 unsigned int dispatch_slice; 153 unsigned int dispatch_slice;
154 154
155 struct timer_list idle_class_timer; 155 struct timer_list idle_class_timer;
156 156
157 sector_t last_sector; 157 sector_t last_sector;
158 unsigned long last_end_request; 158 unsigned long last_end_request;
159 159
160 unsigned int rq_starved; 160 unsigned int rq_starved;
161 161
162 /* 162 /*
163 * tunables, see top of file 163 * tunables, see top of file
164 */ 164 */
165 unsigned int cfq_quantum; 165 unsigned int cfq_quantum;
166 unsigned int cfq_queued; 166 unsigned int cfq_queued;
167 unsigned int cfq_fifo_expire[2]; 167 unsigned int cfq_fifo_expire[2];
168 unsigned int cfq_back_penalty; 168 unsigned int cfq_back_penalty;
169 unsigned int cfq_back_max; 169 unsigned int cfq_back_max;
170 unsigned int cfq_slice[2]; 170 unsigned int cfq_slice[2];
171 unsigned int cfq_slice_async_rq; 171 unsigned int cfq_slice_async_rq;
172 unsigned int cfq_slice_idle; 172 unsigned int cfq_slice_idle;
173 unsigned int cfq_max_depth; 173 unsigned int cfq_max_depth;
174 174
175 struct list_head cic_list; 175 struct list_head cic_list;
176 }; 176 };
177 177
178 /* 178 /*
179 * Per process-grouping structure 179 * Per process-grouping structure
180 */ 180 */
181 struct cfq_queue { 181 struct cfq_queue {
182 /* reference count */ 182 /* reference count */
183 atomic_t ref; 183 atomic_t ref;
184 /* parent cfq_data */ 184 /* parent cfq_data */
185 struct cfq_data *cfqd; 185 struct cfq_data *cfqd;
186 /* cfqq lookup hash */ 186 /* cfqq lookup hash */
187 struct hlist_node cfq_hash; 187 struct hlist_node cfq_hash;
188 /* hash key */ 188 /* hash key */
189 unsigned int key; 189 unsigned int key;
190 /* on either rr or empty list of cfqd */ 190 /* on either rr or empty list of cfqd */
191 struct list_head cfq_list; 191 struct list_head cfq_list;
192 /* sorted list of pending requests */ 192 /* sorted list of pending requests */
193 struct rb_root sort_list; 193 struct rb_root sort_list;
194 /* if fifo isn't expired, next request to serve */ 194 /* if fifo isn't expired, next request to serve */
195 struct cfq_rq *next_crq; 195 struct cfq_rq *next_crq;
196 /* requests queued in sort_list */ 196 /* requests queued in sort_list */
197 int queued[2]; 197 int queued[2];
198 /* currently allocated requests */ 198 /* currently allocated requests */
199 int allocated[2]; 199 int allocated[2];
200 /* fifo list of requests in sort_list */ 200 /* fifo list of requests in sort_list */
201 struct list_head fifo; 201 struct list_head fifo;
202 202
203 unsigned long slice_start; 203 unsigned long slice_start;
204 unsigned long slice_end; 204 unsigned long slice_end;
205 unsigned long slice_left; 205 unsigned long slice_left;
206 unsigned long service_last; 206 unsigned long service_last;
207 207
208 /* number of requests that are on the dispatch list */ 208 /* number of requests that are on the dispatch list */
209 int on_dispatch[2]; 209 int on_dispatch[2];
210 210
211 /* io prio of this group */ 211 /* io prio of this group */
212 unsigned short ioprio, org_ioprio; 212 unsigned short ioprio, org_ioprio;
213 unsigned short ioprio_class, org_ioprio_class; 213 unsigned short ioprio_class, org_ioprio_class;
214 214
215 /* various state flags, see below */ 215 /* various state flags, see below */
216 unsigned int flags; 216 unsigned int flags;
217 }; 217 };
218 218
219 struct cfq_rq { 219 struct cfq_rq {
220 struct rb_node rb_node; 220 struct rb_node rb_node;
221 sector_t rb_key; 221 sector_t rb_key;
222 struct request *request; 222 struct request *request;
223 struct hlist_node hash; 223 struct hlist_node hash;
224 224
225 struct cfq_queue *cfq_queue; 225 struct cfq_queue *cfq_queue;
226 struct cfq_io_context *io_context; 226 struct cfq_io_context *io_context;
227 227
228 unsigned int crq_flags; 228 unsigned int crq_flags;
229 }; 229 };
230 230
231 enum cfqq_state_flags { 231 enum cfqq_state_flags {
232 CFQ_CFQQ_FLAG_on_rr = 0, 232 CFQ_CFQQ_FLAG_on_rr = 0,
233 CFQ_CFQQ_FLAG_wait_request, 233 CFQ_CFQQ_FLAG_wait_request,
234 CFQ_CFQQ_FLAG_must_alloc, 234 CFQ_CFQQ_FLAG_must_alloc,
235 CFQ_CFQQ_FLAG_must_alloc_slice, 235 CFQ_CFQQ_FLAG_must_alloc_slice,
236 CFQ_CFQQ_FLAG_must_dispatch, 236 CFQ_CFQQ_FLAG_must_dispatch,
237 CFQ_CFQQ_FLAG_fifo_expire, 237 CFQ_CFQQ_FLAG_fifo_expire,
238 CFQ_CFQQ_FLAG_idle_window, 238 CFQ_CFQQ_FLAG_idle_window,
239 CFQ_CFQQ_FLAG_prio_changed, 239 CFQ_CFQQ_FLAG_prio_changed,
240 }; 240 };
241 241
242 #define CFQ_CFQQ_FNS(name) \ 242 #define CFQ_CFQQ_FNS(name) \
243 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ 243 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
244 { \ 244 { \
245 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ 245 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
246 } \ 246 } \
247 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ 247 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
248 { \ 248 { \
249 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ 249 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
250 } \ 250 } \
251 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ 251 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
252 { \ 252 { \
253 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ 253 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
254 } 254 }
255 255
256 CFQ_CFQQ_FNS(on_rr); 256 CFQ_CFQQ_FNS(on_rr);
257 CFQ_CFQQ_FNS(wait_request); 257 CFQ_CFQQ_FNS(wait_request);
258 CFQ_CFQQ_FNS(must_alloc); 258 CFQ_CFQQ_FNS(must_alloc);
259 CFQ_CFQQ_FNS(must_alloc_slice); 259 CFQ_CFQQ_FNS(must_alloc_slice);
260 CFQ_CFQQ_FNS(must_dispatch); 260 CFQ_CFQQ_FNS(must_dispatch);
261 CFQ_CFQQ_FNS(fifo_expire); 261 CFQ_CFQQ_FNS(fifo_expire);
262 CFQ_CFQQ_FNS(idle_window); 262 CFQ_CFQQ_FNS(idle_window);
263 CFQ_CFQQ_FNS(prio_changed); 263 CFQ_CFQQ_FNS(prio_changed);
264 #undef CFQ_CFQQ_FNS 264 #undef CFQ_CFQQ_FNS
265 265
266 enum cfq_rq_state_flags { 266 enum cfq_rq_state_flags {
267 CFQ_CRQ_FLAG_is_sync = 0, 267 CFQ_CRQ_FLAG_is_sync = 0,
268 }; 268 };
269 269
270 #define CFQ_CRQ_FNS(name) \ 270 #define CFQ_CRQ_FNS(name) \
271 static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \ 271 static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \
272 { \ 272 { \
273 crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \ 273 crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \
274 } \ 274 } \
275 static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \ 275 static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \
276 { \ 276 { \
277 crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \ 277 crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \
278 } \ 278 } \
279 static inline int cfq_crq_##name(const struct cfq_rq *crq) \ 279 static inline int cfq_crq_##name(const struct cfq_rq *crq) \
280 { \ 280 { \
281 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ 281 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \
282 } 282 }
283 283
284 CFQ_CRQ_FNS(is_sync); 284 CFQ_CRQ_FNS(is_sync);
285 #undef CFQ_CRQ_FNS 285 #undef CFQ_CRQ_FNS
286 286
287 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); 287 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
288 static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); 288 static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
289 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); 289 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
290 290
291 #define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) 291 #define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)
292 292
293 /* 293 /*
294 * lots of deadline iosched dupes, can be abstracted later... 294 * lots of deadline iosched dupes, can be abstracted later...
295 */ 295 */
296 static inline void cfq_del_crq_hash(struct cfq_rq *crq) 296 static inline void cfq_del_crq_hash(struct cfq_rq *crq)
297 { 297 {
298 hlist_del_init(&crq->hash); 298 hlist_del_init(&crq->hash);
299 } 299 }
300 300
301 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) 301 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
302 { 302 {
303 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); 303 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
304 304
305 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); 305 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
306 } 306 }
307 307
308 static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) 308 static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
309 { 309 {
310 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; 310 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
311 struct hlist_node *entry, *next; 311 struct hlist_node *entry, *next;
312 312
313 hlist_for_each_safe(entry, next, hash_list) { 313 hlist_for_each_safe(entry, next, hash_list) {
314 struct cfq_rq *crq = list_entry_hash(entry); 314 struct cfq_rq *crq = list_entry_hash(entry);
315 struct request *__rq = crq->request; 315 struct request *__rq = crq->request;
316 316
317 if (!rq_mergeable(__rq)) { 317 if (!rq_mergeable(__rq)) {
318 cfq_del_crq_hash(crq); 318 cfq_del_crq_hash(crq);
319 continue; 319 continue;
320 } 320 }
321 321
322 if (rq_hash_key(__rq) == offset) 322 if (rq_hash_key(__rq) == offset)
323 return __rq; 323 return __rq;
324 } 324 }
325 325
326 return NULL; 326 return NULL;
327 } 327 }
328 328
329 /* 329 /*
330 * scheduler run of queue, if there are requests pending and no one in the 330 * scheduler run of queue, if there are requests pending and no one in the
331 * driver that will restart queueing 331 * driver that will restart queueing
332 */ 332 */
333 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 333 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
334 { 334 {
335 if (cfqd->busy_queues) 335 if (cfqd->busy_queues)
336 kblockd_schedule_work(&cfqd->unplug_work); 336 kblockd_schedule_work(&cfqd->unplug_work);
337 } 337 }
338 338
339 static int cfq_queue_empty(request_queue_t *q) 339 static int cfq_queue_empty(request_queue_t *q)
340 { 340 {
341 struct cfq_data *cfqd = q->elevator->elevator_data; 341 struct cfq_data *cfqd = q->elevator->elevator_data;
342 342
343 return !cfqd->busy_queues; 343 return !cfqd->busy_queues;
344 } 344 }
345 345
346 /* 346 /*
347 * Lifted from AS - choose which of crq1 and crq2 that is best served now. 347 * Lifted from AS - choose which of crq1 and crq2 that is best served now.
348 * We choose the request that is closest to the head right now. Distance 348 * We choose the request that is closest to the head right now. Distance
349 * behind the head are penalized and only allowed to a certain extent. 349 * behind the head are penalized and only allowed to a certain extent.
350 */ 350 */
351 static struct cfq_rq * 351 static struct cfq_rq *
352 cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) 352 cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
353 { 353 {
354 sector_t last, s1, s2, d1 = 0, d2 = 0; 354 sector_t last, s1, s2, d1 = 0, d2 = 0;
355 int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */ 355 int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */
356 unsigned long back_max; 356 unsigned long back_max;
357 357
358 if (crq1 == NULL || crq1 == crq2) 358 if (crq1 == NULL || crq1 == crq2)
359 return crq2; 359 return crq2;
360 if (crq2 == NULL) 360 if (crq2 == NULL)
361 return crq1; 361 return crq1;
362 362
363 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) 363 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
364 return crq1; 364 return crq1;
365 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) 365 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
366 return crq2; 366 return crq2;
367 367
368 s1 = crq1->request->sector; 368 s1 = crq1->request->sector;
369 s2 = crq2->request->sector; 369 s2 = crq2->request->sector;
370 370
371 last = cfqd->last_sector; 371 last = cfqd->last_sector;
372 372
373 /* 373 /*
374 * by definition, 1KiB is 2 sectors 374 * by definition, 1KiB is 2 sectors
375 */ 375 */
376 back_max = cfqd->cfq_back_max * 2; 376 back_max = cfqd->cfq_back_max * 2;
377 377
378 /* 378 /*
379 * Strict one way elevator _except_ in the case where we allow 379 * Strict one way elevator _except_ in the case where we allow
380 * short backward seeks which are biased as twice the cost of a 380 * short backward seeks which are biased as twice the cost of a
381 * similar forward seek. 381 * similar forward seek.
382 */ 382 */
383 if (s1 >= last) 383 if (s1 >= last)
384 d1 = s1 - last; 384 d1 = s1 - last;
385 else if (s1 + back_max >= last) 385 else if (s1 + back_max >= last)
386 d1 = (last - s1) * cfqd->cfq_back_penalty; 386 d1 = (last - s1) * cfqd->cfq_back_penalty;
387 else 387 else
388 r1_wrap = 1; 388 r1_wrap = 1;
389 389
390 if (s2 >= last) 390 if (s2 >= last)
391 d2 = s2 - last; 391 d2 = s2 - last;
392 else if (s2 + back_max >= last) 392 else if (s2 + back_max >= last)
393 d2 = (last - s2) * cfqd->cfq_back_penalty; 393 d2 = (last - s2) * cfqd->cfq_back_penalty;
394 else 394 else
395 r2_wrap = 1; 395 r2_wrap = 1;
396 396
397 /* Found required data */ 397 /* Found required data */
398 if (!r1_wrap && r2_wrap) 398 if (!r1_wrap && r2_wrap)
399 return crq1; 399 return crq1;
400 else if (!r2_wrap && r1_wrap) 400 else if (!r2_wrap && r1_wrap)
401 return crq2; 401 return crq2;
402 else if (r1_wrap && r2_wrap) { 402 else if (r1_wrap && r2_wrap) {
403 /* both behind the head */ 403 /* both behind the head */
404 if (s1 <= s2) 404 if (s1 <= s2)
405 return crq1; 405 return crq1;
406 else 406 else
407 return crq2; 407 return crq2;
408 } 408 }
409 409
410 /* Both requests in front of the head */ 410 /* Both requests in front of the head */
411 if (d1 < d2) 411 if (d1 < d2)
412 return crq1; 412 return crq1;
413 else if (d2 < d1) 413 else if (d2 < d1)
414 return crq2; 414 return crq2;
415 else { 415 else {
416 if (s1 >= s2) 416 if (s1 >= s2)
417 return crq1; 417 return crq1;
418 else 418 else
419 return crq2; 419 return crq2;
420 } 420 }
421 } 421 }
422 422
423 /* 423 /*
424 * would be nice to take fifo expire time into account as well 424 * would be nice to take fifo expire time into account as well
425 */ 425 */
426 static struct cfq_rq * 426 static struct cfq_rq *
427 cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 427 cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
428 struct cfq_rq *last) 428 struct cfq_rq *last)
429 { 429 {
430 struct cfq_rq *crq_next = NULL, *crq_prev = NULL; 430 struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
431 struct rb_node *rbnext, *rbprev; 431 struct rb_node *rbnext, *rbprev;
432 432
433 if (!(rbnext = rb_next(&last->rb_node))) { 433 if (!(rbnext = rb_next(&last->rb_node))) {
434 rbnext = rb_first(&cfqq->sort_list); 434 rbnext = rb_first(&cfqq->sort_list);
435 if (rbnext == &last->rb_node) 435 if (rbnext == &last->rb_node)
436 rbnext = NULL; 436 rbnext = NULL;
437 } 437 }
438 438
439 rbprev = rb_prev(&last->rb_node); 439 rbprev = rb_prev(&last->rb_node);
440 440
441 if (rbprev) 441 if (rbprev)
442 crq_prev = rb_entry_crq(rbprev); 442 crq_prev = rb_entry_crq(rbprev);
443 if (rbnext) 443 if (rbnext)
444 crq_next = rb_entry_crq(rbnext); 444 crq_next = rb_entry_crq(rbnext);
445 445
446 return cfq_choose_req(cfqd, crq_next, crq_prev); 446 return cfq_choose_req(cfqd, crq_next, crq_prev);
447 } 447 }
448 448
449 static void cfq_update_next_crq(struct cfq_rq *crq) 449 static void cfq_update_next_crq(struct cfq_rq *crq)
450 { 450 {
451 struct cfq_queue *cfqq = crq->cfq_queue; 451 struct cfq_queue *cfqq = crq->cfq_queue;
452 452
453 if (cfqq->next_crq == crq) 453 if (cfqq->next_crq == crq)
454 cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); 454 cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
455 } 455 }
456 456
457 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) 457 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
458 { 458 {
459 struct cfq_data *cfqd = cfqq->cfqd; 459 struct cfq_data *cfqd = cfqq->cfqd;
460 struct list_head *list, *entry; 460 struct list_head *list, *entry;
461 461
462 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 462 BUG_ON(!cfq_cfqq_on_rr(cfqq));
463 463
464 list_del(&cfqq->cfq_list); 464 list_del(&cfqq->cfq_list);
465 465
466 if (cfq_class_rt(cfqq)) 466 if (cfq_class_rt(cfqq))
467 list = &cfqd->cur_rr; 467 list = &cfqd->cur_rr;
468 else if (cfq_class_idle(cfqq)) 468 else if (cfq_class_idle(cfqq))
469 list = &cfqd->idle_rr; 469 list = &cfqd->idle_rr;
470 else { 470 else {
471 /* 471 /*
472 * if cfqq has requests in flight, don't allow it to be 472 * if cfqq has requests in flight, don't allow it to be
473 * found in cfq_set_active_queue before it has finished them. 473 * found in cfq_set_active_queue before it has finished them.
474 * this is done to increase fairness between a process that 474 * this is done to increase fairness between a process that
475 * has lots of io pending vs one that only generates one 475 * has lots of io pending vs one that only generates one
476 * sporadically or synchronously 476 * sporadically or synchronously
477 */ 477 */
478 if (cfq_cfqq_dispatched(cfqq)) 478 if (cfq_cfqq_dispatched(cfqq))
479 list = &cfqd->busy_rr; 479 list = &cfqd->busy_rr;
480 else 480 else
481 list = &cfqd->rr_list[cfqq->ioprio]; 481 list = &cfqd->rr_list[cfqq->ioprio];
482 } 482 }
483 483
484 /* 484 /*
485 * if queue was preempted, just add to front to be fair. busy_rr 485 * if queue was preempted, just add to front to be fair. busy_rr
486 * isn't sorted. 486 * isn't sorted.
487 */ 487 */
488 if (preempted || list == &cfqd->busy_rr) { 488 if (preempted || list == &cfqd->busy_rr) {
489 list_add(&cfqq->cfq_list, list); 489 list_add(&cfqq->cfq_list, list);
490 return; 490 return;
491 } 491 }
492 492
493 /* 493 /*
494 * sort by when queue was last serviced 494 * sort by when queue was last serviced
495 */ 495 */
496 entry = list; 496 entry = list;
497 while ((entry = entry->prev) != list) { 497 while ((entry = entry->prev) != list) {
498 struct cfq_queue *__cfqq = list_entry_cfqq(entry); 498 struct cfq_queue *__cfqq = list_entry_cfqq(entry);
499 499
500 if (!__cfqq->service_last) 500 if (!__cfqq->service_last)
501 break; 501 break;
502 if (time_before(__cfqq->service_last, cfqq->service_last)) 502 if (time_before(__cfqq->service_last, cfqq->service_last))
503 break; 503 break;
504 } 504 }
505 505
506 list_add(&cfqq->cfq_list, entry); 506 list_add(&cfqq->cfq_list, entry);
507 } 507 }
508 508
509 /* 509 /*
510 * add to busy list of queues for service, trying to be fair in ordering 510 * add to busy list of queues for service, trying to be fair in ordering
511 * the pending list according to last request service 511 * the pending list according to last request service
512 */ 512 */
513 static inline void 513 static inline void
514 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 514 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
515 { 515 {
516 BUG_ON(cfq_cfqq_on_rr(cfqq)); 516 BUG_ON(cfq_cfqq_on_rr(cfqq));
517 cfq_mark_cfqq_on_rr(cfqq); 517 cfq_mark_cfqq_on_rr(cfqq);
518 cfqd->busy_queues++; 518 cfqd->busy_queues++;
519 519
520 cfq_resort_rr_list(cfqq, 0); 520 cfq_resort_rr_list(cfqq, 0);
521 } 521 }
522 522
523 static inline void 523 static inline void
524 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 524 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
525 { 525 {
526 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 526 BUG_ON(!cfq_cfqq_on_rr(cfqq));
527 cfq_clear_cfqq_on_rr(cfqq); 527 cfq_clear_cfqq_on_rr(cfqq);
528 list_move(&cfqq->cfq_list, &cfqd->empty_list); 528 list_move(&cfqq->cfq_list, &cfqd->empty_list);
529 529
530 BUG_ON(!cfqd->busy_queues); 530 BUG_ON(!cfqd->busy_queues);
531 cfqd->busy_queues--; 531 cfqd->busy_queues--;
532 } 532 }
533 533
534 /* 534 /*
535 * rb tree support functions 535 * rb tree support functions
536 */ 536 */
537 static inline void cfq_del_crq_rb(struct cfq_rq *crq) 537 static inline void cfq_del_crq_rb(struct cfq_rq *crq)
538 { 538 {
539 struct cfq_queue *cfqq = crq->cfq_queue; 539 struct cfq_queue *cfqq = crq->cfq_queue;
540 struct cfq_data *cfqd = cfqq->cfqd; 540 struct cfq_data *cfqd = cfqq->cfqd;
541 const int sync = cfq_crq_is_sync(crq); 541 const int sync = cfq_crq_is_sync(crq);
542 542
543 BUG_ON(!cfqq->queued[sync]); 543 BUG_ON(!cfqq->queued[sync]);
544 cfqq->queued[sync]--; 544 cfqq->queued[sync]--;
545 545
546 cfq_update_next_crq(crq); 546 cfq_update_next_crq(crq);
547 547
548 rb_erase(&crq->rb_node, &cfqq->sort_list); 548 rb_erase(&crq->rb_node, &cfqq->sort_list);
549 RB_CLEAR_COLOR(&crq->rb_node); 549 RB_CLEAR_COLOR(&crq->rb_node);
550 550
551 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list)) 551 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
552 cfq_del_cfqq_rr(cfqd, cfqq); 552 cfq_del_cfqq_rr(cfqd, cfqq);
553 } 553 }
554 554
555 static struct cfq_rq * 555 static struct cfq_rq *
556 __cfq_add_crq_rb(struct cfq_rq *crq) 556 __cfq_add_crq_rb(struct cfq_rq *crq)
557 { 557 {
558 struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; 558 struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;
559 struct rb_node *parent = NULL; 559 struct rb_node *parent = NULL;
560 struct cfq_rq *__crq; 560 struct cfq_rq *__crq;
561 561
562 while (*p) { 562 while (*p) {
563 parent = *p; 563 parent = *p;
564 __crq = rb_entry_crq(parent); 564 __crq = rb_entry_crq(parent);
565 565
566 if (crq->rb_key < __crq->rb_key) 566 if (crq->rb_key < __crq->rb_key)
567 p = &(*p)->rb_left; 567 p = &(*p)->rb_left;
568 else if (crq->rb_key > __crq->rb_key) 568 else if (crq->rb_key > __crq->rb_key)
569 p = &(*p)->rb_right; 569 p = &(*p)->rb_right;
570 else 570 else
571 return __crq; 571 return __crq;
572 } 572 }
573 573
574 rb_link_node(&crq->rb_node, parent, p); 574 rb_link_node(&crq->rb_node, parent, p);
575 return NULL; 575 return NULL;
576 } 576 }
577 577
578 static void cfq_add_crq_rb(struct cfq_rq *crq) 578 static void cfq_add_crq_rb(struct cfq_rq *crq)
579 { 579 {
580 struct cfq_queue *cfqq = crq->cfq_queue; 580 struct cfq_queue *cfqq = crq->cfq_queue;
581 struct cfq_data *cfqd = cfqq->cfqd; 581 struct cfq_data *cfqd = cfqq->cfqd;
582 struct request *rq = crq->request; 582 struct request *rq = crq->request;
583 struct cfq_rq *__alias; 583 struct cfq_rq *__alias;
584 584
585 crq->rb_key = rq_rb_key(rq); 585 crq->rb_key = rq_rb_key(rq);
586 cfqq->queued[cfq_crq_is_sync(crq)]++; 586 cfqq->queued[cfq_crq_is_sync(crq)]++;
587 587
588 /* 588 /*
589 * looks a little odd, but the first insert might return an alias. 589 * looks a little odd, but the first insert might return an alias.
590 * if that happens, put the alias on the dispatch list 590 * if that happens, put the alias on the dispatch list
591 */ 591 */
592 while ((__alias = __cfq_add_crq_rb(crq)) != NULL) 592 while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
593 cfq_dispatch_insert(cfqd->queue, __alias); 593 cfq_dispatch_insert(cfqd->queue, __alias);
594 594
595 rb_insert_color(&crq->rb_node, &cfqq->sort_list); 595 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
596 596
597 if (!cfq_cfqq_on_rr(cfqq)) 597 if (!cfq_cfqq_on_rr(cfqq))
598 cfq_add_cfqq_rr(cfqd, cfqq); 598 cfq_add_cfqq_rr(cfqd, cfqq);
599 599
600 /* 600 /*
601 * check if this request is a better next-serve candidate 601 * check if this request is a better next-serve candidate
602 */ 602 */
603 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 603 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
604 } 604 }
605 605
606 static inline void 606 static inline void
607 cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) 607 cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
608 { 608 {
609 rb_erase(&crq->rb_node, &cfqq->sort_list); 609 rb_erase(&crq->rb_node, &cfqq->sort_list);
610 cfqq->queued[cfq_crq_is_sync(crq)]--; 610 cfqq->queued[cfq_crq_is_sync(crq)]--;
611 611
612 cfq_add_crq_rb(crq); 612 cfq_add_crq_rb(crq);
613 } 613 }
614 614
615 static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) 615 static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
616 616
617 { 617 {
618 struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY); 618 struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY);
619 struct rb_node *n; 619 struct rb_node *n;
620 620
621 if (!cfqq) 621 if (!cfqq)
622 goto out; 622 goto out;
623 623
624 n = cfqq->sort_list.rb_node; 624 n = cfqq->sort_list.rb_node;
625 while (n) { 625 while (n) {
626 struct cfq_rq *crq = rb_entry_crq(n); 626 struct cfq_rq *crq = rb_entry_crq(n);
627 627
628 if (sector < crq->rb_key) 628 if (sector < crq->rb_key)
629 n = n->rb_left; 629 n = n->rb_left;
630 else if (sector > crq->rb_key) 630 else if (sector > crq->rb_key)
631 n = n->rb_right; 631 n = n->rb_right;
632 else 632 else
633 return crq->request; 633 return crq->request;
634 } 634 }
635 635
636 out: 636 out:
637 return NULL; 637 return NULL;
638 } 638 }
639 639
640 static void cfq_activate_request(request_queue_t *q, struct request *rq) 640 static void cfq_activate_request(request_queue_t *q, struct request *rq)
641 { 641 {
642 struct cfq_data *cfqd = q->elevator->elevator_data; 642 struct cfq_data *cfqd = q->elevator->elevator_data;
643 643
644 cfqd->rq_in_driver++; 644 cfqd->rq_in_driver++;
645 } 645 }
646 646
647 static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 647 static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
648 { 648 {
649 struct cfq_data *cfqd = q->elevator->elevator_data; 649 struct cfq_data *cfqd = q->elevator->elevator_data;
650 650
651 WARN_ON(!cfqd->rq_in_driver); 651 WARN_ON(!cfqd->rq_in_driver);
652 cfqd->rq_in_driver--; 652 cfqd->rq_in_driver--;
653 } 653 }
654 654
655 static void cfq_remove_request(struct request *rq) 655 static void cfq_remove_request(struct request *rq)
656 { 656 {
657 struct cfq_rq *crq = RQ_DATA(rq); 657 struct cfq_rq *crq = RQ_DATA(rq);
658 658
659 list_del_init(&rq->queuelist); 659 list_del_init(&rq->queuelist);
660 cfq_del_crq_rb(crq); 660 cfq_del_crq_rb(crq);
661 cfq_del_crq_hash(crq); 661 cfq_del_crq_hash(crq);
662 } 662 }
663 663
664 static int 664 static int
665 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) 665 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
666 { 666 {
667 struct cfq_data *cfqd = q->elevator->elevator_data; 667 struct cfq_data *cfqd = q->elevator->elevator_data;
668 struct request *__rq; 668 struct request *__rq;
669 int ret; 669 int ret;
670 670
671 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); 671 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
672 if (__rq && elv_rq_merge_ok(__rq, bio)) { 672 if (__rq && elv_rq_merge_ok(__rq, bio)) {
673 ret = ELEVATOR_BACK_MERGE; 673 ret = ELEVATOR_BACK_MERGE;
674 goto out; 674 goto out;
675 } 675 }
676 676
677 __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio)); 677 __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
678 if (__rq && elv_rq_merge_ok(__rq, bio)) { 678 if (__rq && elv_rq_merge_ok(__rq, bio)) {
679 ret = ELEVATOR_FRONT_MERGE; 679 ret = ELEVATOR_FRONT_MERGE;
680 goto out; 680 goto out;
681 } 681 }
682 682
683 return ELEVATOR_NO_MERGE; 683 return ELEVATOR_NO_MERGE;
684 out: 684 out:
685 *req = __rq; 685 *req = __rq;
686 return ret; 686 return ret;
687 } 687 }
688 688
689 static void cfq_merged_request(request_queue_t *q, struct request *req) 689 static void cfq_merged_request(request_queue_t *q, struct request *req)
690 { 690 {
691 struct cfq_data *cfqd = q->elevator->elevator_data; 691 struct cfq_data *cfqd = q->elevator->elevator_data;
692 struct cfq_rq *crq = RQ_DATA(req); 692 struct cfq_rq *crq = RQ_DATA(req);
693 693
694 cfq_del_crq_hash(crq); 694 cfq_del_crq_hash(crq);
695 cfq_add_crq_hash(cfqd, crq); 695 cfq_add_crq_hash(cfqd, crq);
696 696
697 if (rq_rb_key(req) != crq->rb_key) { 697 if (rq_rb_key(req) != crq->rb_key) {
698 struct cfq_queue *cfqq = crq->cfq_queue; 698 struct cfq_queue *cfqq = crq->cfq_queue;
699 699
700 cfq_update_next_crq(crq); 700 cfq_update_next_crq(crq);
701 cfq_reposition_crq_rb(cfqq, crq); 701 cfq_reposition_crq_rb(cfqq, crq);
702 } 702 }
703 } 703 }
704 704
705 static void 705 static void
706 cfq_merged_requests(request_queue_t *q, struct request *rq, 706 cfq_merged_requests(request_queue_t *q, struct request *rq,
707 struct request *next) 707 struct request *next)
708 { 708 {
709 cfq_merged_request(q, rq); 709 cfq_merged_request(q, rq);
710 710
711 /* 711 /*
712 * reposition in fifo if next is older than rq 712 * reposition in fifo if next is older than rq
713 */ 713 */
714 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 714 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
715 time_before(next->start_time, rq->start_time)) 715 time_before(next->start_time, rq->start_time))
716 list_move(&rq->queuelist, &next->queuelist); 716 list_move(&rq->queuelist, &next->queuelist);
717 717
718 cfq_remove_request(next); 718 cfq_remove_request(next);
719 } 719 }
720 720
721 static inline void 721 static inline void
722 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 722 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
723 { 723 {
724 if (cfqq) { 724 if (cfqq) {
725 /* 725 /*
726 * stop potential idle class queues waiting service 726 * stop potential idle class queues waiting service
727 */ 727 */
728 del_timer(&cfqd->idle_class_timer); 728 del_timer(&cfqd->idle_class_timer);
729 729
730 cfqq->slice_start = jiffies; 730 cfqq->slice_start = jiffies;
731 cfqq->slice_end = 0; 731 cfqq->slice_end = 0;
732 cfqq->slice_left = 0; 732 cfqq->slice_left = 0;
733 cfq_clear_cfqq_must_alloc_slice(cfqq); 733 cfq_clear_cfqq_must_alloc_slice(cfqq);
734 cfq_clear_cfqq_fifo_expire(cfqq); 734 cfq_clear_cfqq_fifo_expire(cfqq);
735 } 735 }
736 736
737 cfqd->active_queue = cfqq; 737 cfqd->active_queue = cfqq;
738 } 738 }
739 739
740 /* 740 /*
741 * current cfqq expired its slice (or was too idle), select new one 741 * current cfqq expired its slice (or was too idle), select new one
742 */ 742 */
743 static void 743 static void
744 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 744 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
745 int preempted) 745 int preempted)
746 { 746 {
747 unsigned long now = jiffies; 747 unsigned long now = jiffies;
748 748
749 if (cfq_cfqq_wait_request(cfqq)) 749 if (cfq_cfqq_wait_request(cfqq))
750 del_timer(&cfqd->idle_slice_timer); 750 del_timer(&cfqd->idle_slice_timer);
751 751
752 if (!preempted && !cfq_cfqq_dispatched(cfqq)) { 752 if (!preempted && !cfq_cfqq_dispatched(cfqq)) {
753 cfqq->service_last = now; 753 cfqq->service_last = now;
754 cfq_schedule_dispatch(cfqd); 754 cfq_schedule_dispatch(cfqd);
755 } 755 }
756 756
757 cfq_clear_cfqq_must_dispatch(cfqq); 757 cfq_clear_cfqq_must_dispatch(cfqq);
758 cfq_clear_cfqq_wait_request(cfqq); 758 cfq_clear_cfqq_wait_request(cfqq);
759 759
760 /* 760 /*
761 * store what was left of this slice, if the queue idled out 761 * store what was left of this slice, if the queue idled out
762 * or was preempted 762 * or was preempted
763 */ 763 */
764 if (time_after(cfqq->slice_end, now)) 764 if (time_after(cfqq->slice_end, now))
765 cfqq->slice_left = cfqq->slice_end - now; 765 cfqq->slice_left = cfqq->slice_end - now;
766 else 766 else
767 cfqq->slice_left = 0; 767 cfqq->slice_left = 0;
768 768
769 if (cfq_cfqq_on_rr(cfqq)) 769 if (cfq_cfqq_on_rr(cfqq))
770 cfq_resort_rr_list(cfqq, preempted); 770 cfq_resort_rr_list(cfqq, preempted);
771 771
772 if (cfqq == cfqd->active_queue) 772 if (cfqq == cfqd->active_queue)
773 cfqd->active_queue = NULL; 773 cfqd->active_queue = NULL;
774 774
775 if (cfqd->active_cic) { 775 if (cfqd->active_cic) {
776 put_io_context(cfqd->active_cic->ioc); 776 put_io_context(cfqd->active_cic->ioc);
777 cfqd->active_cic = NULL; 777 cfqd->active_cic = NULL;
778 } 778 }
779 779
780 cfqd->dispatch_slice = 0; 780 cfqd->dispatch_slice = 0;
781 } 781 }
782 782
783 static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) 783 static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
784 { 784 {
785 struct cfq_queue *cfqq = cfqd->active_queue; 785 struct cfq_queue *cfqq = cfqd->active_queue;
786 786
787 if (cfqq) 787 if (cfqq)
788 __cfq_slice_expired(cfqd, cfqq, preempted); 788 __cfq_slice_expired(cfqd, cfqq, preempted);
789 } 789 }
790 790
791 /* 791 /*
792 * 0 792 * 0
793 * 0,1 793 * 0,1
794 * 0,1,2 794 * 0,1,2
795 * 0,1,2,3 795 * 0,1,2,3
796 * 0,1,2,3,4 796 * 0,1,2,3,4
797 * 0,1,2,3,4,5 797 * 0,1,2,3,4,5
798 * 0,1,2,3,4,5,6 798 * 0,1,2,3,4,5,6
799 * 0,1,2,3,4,5,6,7 799 * 0,1,2,3,4,5,6,7
800 */ 800 */
801 static int cfq_get_next_prio_level(struct cfq_data *cfqd) 801 static int cfq_get_next_prio_level(struct cfq_data *cfqd)
802 { 802 {
803 int prio, wrap; 803 int prio, wrap;
804 804
805 prio = -1; 805 prio = -1;
806 wrap = 0; 806 wrap = 0;
807 do { 807 do {
808 int p; 808 int p;
809 809
810 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { 810 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
811 if (!list_empty(&cfqd->rr_list[p])) { 811 if (!list_empty(&cfqd->rr_list[p])) {
812 prio = p; 812 prio = p;
813 break; 813 break;
814 } 814 }
815 } 815 }
816 816
817 if (prio != -1) 817 if (prio != -1)
818 break; 818 break;
819 cfqd->cur_prio = 0; 819 cfqd->cur_prio = 0;
820 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 820 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
821 cfqd->cur_end_prio = 0; 821 cfqd->cur_end_prio = 0;
822 if (wrap) 822 if (wrap)
823 break; 823 break;
824 wrap = 1; 824 wrap = 1;
825 } 825 }
826 } while (1); 826 } while (1);
827 827
828 if (unlikely(prio == -1)) 828 if (unlikely(prio == -1))
829 return -1; 829 return -1;
830 830
831 BUG_ON(prio >= CFQ_PRIO_LISTS); 831 BUG_ON(prio >= CFQ_PRIO_LISTS);
832 832
833 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); 833 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
834 834
835 cfqd->cur_prio = prio + 1; 835 cfqd->cur_prio = prio + 1;
836 if (cfqd->cur_prio > cfqd->cur_end_prio) { 836 if (cfqd->cur_prio > cfqd->cur_end_prio) {
837 cfqd->cur_end_prio = cfqd->cur_prio; 837 cfqd->cur_end_prio = cfqd->cur_prio;
838 cfqd->cur_prio = 0; 838 cfqd->cur_prio = 0;
839 } 839 }
840 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 840 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
841 cfqd->cur_prio = 0; 841 cfqd->cur_prio = 0;
842 cfqd->cur_end_prio = 0; 842 cfqd->cur_end_prio = 0;
843 } 843 }
844 844
845 return prio; 845 return prio;
846 } 846 }
847 847
848 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 848 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
849 { 849 {
850 struct cfq_queue *cfqq = NULL; 850 struct cfq_queue *cfqq = NULL;
851 851
852 /* 852 /*
853 * if current list is non-empty, grab first entry. if it is empty, 853 * if current list is non-empty, grab first entry. if it is empty,
854 * get next prio level and grab first entry then if any are spliced 854 * get next prio level and grab first entry then if any are spliced
855 */ 855 */
856 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) 856 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
857 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 857 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
858 858
859 /* 859 /*
860 * if we have idle queues and no rt or be queues had pending 860 * if we have idle queues and no rt or be queues had pending
861 * requests, either allow immediate service if the grace period 861 * requests, either allow immediate service if the grace period
862 * has passed or arm the idle grace timer 862 * has passed or arm the idle grace timer
863 */ 863 */
864 if (!cfqq && !list_empty(&cfqd->idle_rr)) { 864 if (!cfqq && !list_empty(&cfqd->idle_rr)) {
865 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; 865 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
866 866
867 if (time_after_eq(jiffies, end)) 867 if (time_after_eq(jiffies, end))
868 cfqq = list_entry_cfqq(cfqd->idle_rr.next); 868 cfqq = list_entry_cfqq(cfqd->idle_rr.next);
869 else 869 else
870 mod_timer(&cfqd->idle_class_timer, end); 870 mod_timer(&cfqd->idle_class_timer, end);
871 } 871 }
872 872
873 __cfq_set_active_queue(cfqd, cfqq); 873 __cfq_set_active_queue(cfqd, cfqq);
874 return cfqq; 874 return cfqq;
875 } 875 }
876 876
877 static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) 877 static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
878 878
879 { 879 {
880 unsigned long sl; 880 unsigned long sl;
881 881
882 WARN_ON(!RB_EMPTY(&cfqq->sort_list)); 882 WARN_ON(!RB_EMPTY(&cfqq->sort_list));
883 WARN_ON(cfqq != cfqd->active_queue); 883 WARN_ON(cfqq != cfqd->active_queue);
884 884
885 /* 885 /*
886 * idle is disabled, either manually or by past process history 886 * idle is disabled, either manually or by past process history
887 */ 887 */
888 if (!cfqd->cfq_slice_idle) 888 if (!cfqd->cfq_slice_idle)
889 return 0; 889 return 0;
890 if (!cfq_cfqq_idle_window(cfqq)) 890 if (!cfq_cfqq_idle_window(cfqq))
891 return 0; 891 return 0;
892 /* 892 /*
893 * task has exited, don't wait 893 * task has exited, don't wait
894 */ 894 */
895 if (cfqd->active_cic && !cfqd->active_cic->ioc->task) 895 if (cfqd->active_cic && !cfqd->active_cic->ioc->task)
896 return 0; 896 return 0;
897 897
898 cfq_mark_cfqq_must_dispatch(cfqq); 898 cfq_mark_cfqq_must_dispatch(cfqq);
899 cfq_mark_cfqq_wait_request(cfqq); 899 cfq_mark_cfqq_wait_request(cfqq);
900 900
901 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); 901 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
902 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 902 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
903 return 1; 903 return 1;
904 } 904 }
905 905
906 static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) 906 static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
907 { 907 {
908 struct cfq_data *cfqd = q->elevator->elevator_data; 908 struct cfq_data *cfqd = q->elevator->elevator_data;
909 struct cfq_queue *cfqq = crq->cfq_queue; 909 struct cfq_queue *cfqq = crq->cfq_queue;
910 910
911 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); 911 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
912 cfq_remove_request(crq->request); 912 cfq_remove_request(crq->request);
913 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; 913 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
914 elv_dispatch_sort(q, crq->request); 914 elv_dispatch_sort(q, crq->request);
915 } 915 }
916 916
917 /* 917 /*
918 * return expired entry, or NULL to just start from scratch in rbtree 918 * return expired entry, or NULL to just start from scratch in rbtree
919 */ 919 */
920 static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) 920 static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
921 { 921 {
922 struct cfq_data *cfqd = cfqq->cfqd; 922 struct cfq_data *cfqd = cfqq->cfqd;
923 struct request *rq; 923 struct request *rq;
924 struct cfq_rq *crq; 924 struct cfq_rq *crq;
925 925
926 if (cfq_cfqq_fifo_expire(cfqq)) 926 if (cfq_cfqq_fifo_expire(cfqq))
927 return NULL; 927 return NULL;
928 928
929 if (!list_empty(&cfqq->fifo)) { 929 if (!list_empty(&cfqq->fifo)) {
930 int fifo = cfq_cfqq_class_sync(cfqq); 930 int fifo = cfq_cfqq_class_sync(cfqq);
931 931
932 crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); 932 crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
933 rq = crq->request; 933 rq = crq->request;
934 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { 934 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
935 cfq_mark_cfqq_fifo_expire(cfqq); 935 cfq_mark_cfqq_fifo_expire(cfqq);
936 return crq; 936 return crq;
937 } 937 }
938 } 938 }
939 939
940 return NULL; 940 return NULL;
941 } 941 }
942 942
943 /* 943 /*
944 * Scale schedule slice based on io priority. Use the sync time slice only 944 * Scale schedule slice based on io priority. Use the sync time slice only
945 * if a queue is marked sync and has sync io queued. A sync queue with async 945 * if a queue is marked sync and has sync io queued. A sync queue with async
946 * io only, should not get full sync slice length. 946 * io only, should not get full sync slice length.
947 */ 947 */
948 static inline int 948 static inline int
949 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 949 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
950 { 950 {
951 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; 951 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];
952 952
953 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 953 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
954 954
955 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); 955 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio));
956 } 956 }
957 957
958 static inline void 958 static inline void
959 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 959 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
960 { 960 {
961 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 961 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
962 } 962 }
963 963
964 static inline int 964 static inline int
965 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 965 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
966 { 966 {
967 const int base_rq = cfqd->cfq_slice_async_rq; 967 const int base_rq = cfqd->cfq_slice_async_rq;
968 968
969 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 969 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
970 970
971 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 971 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
972 } 972 }
973 973
974 /* 974 /*
975 * get next queue for service 975 * get next queue for service
976 */ 976 */
977 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 977 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
978 { 978 {
979 unsigned long now = jiffies; 979 unsigned long now = jiffies;
980 struct cfq_queue *cfqq; 980 struct cfq_queue *cfqq;
981 981
982 cfqq = cfqd->active_queue; 982 cfqq = cfqd->active_queue;
983 if (!cfqq) 983 if (!cfqq)
984 goto new_queue; 984 goto new_queue;
985 985
986 /* 986 /*
987 * slice has expired 987 * slice has expired
988 */ 988 */
989 if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end)) 989 if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end))
990 goto expire; 990 goto expire;
991 991
992 /* 992 /*
993 * if queue has requests, dispatch one. if not, check if 993 * if queue has requests, dispatch one. if not, check if
994 * enough slice is left to wait for one 994 * enough slice is left to wait for one
995 */ 995 */
996 if (!RB_EMPTY(&cfqq->sort_list)) 996 if (!RB_EMPTY(&cfqq->sort_list))
997 goto keep_queue; 997 goto keep_queue;
998 else if (cfq_cfqq_class_sync(cfqq) && 998 else if (cfq_cfqq_class_sync(cfqq) &&
999 time_before(now, cfqq->slice_end)) { 999 time_before(now, cfqq->slice_end)) {
1000 if (cfq_arm_slice_timer(cfqd, cfqq)) 1000 if (cfq_arm_slice_timer(cfqd, cfqq))
1001 return NULL; 1001 return NULL;
1002 } 1002 }
1003 1003
1004 expire: 1004 expire:
1005 cfq_slice_expired(cfqd, 0); 1005 cfq_slice_expired(cfqd, 0);
1006 new_queue: 1006 new_queue:
1007 cfqq = cfq_set_active_queue(cfqd); 1007 cfqq = cfq_set_active_queue(cfqd);
1008 keep_queue: 1008 keep_queue:
1009 return cfqq; 1009 return cfqq;
1010 } 1010 }
1011 1011
1012 static int 1012 static int
1013 __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1013 __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1014 int max_dispatch) 1014 int max_dispatch)
1015 { 1015 {
1016 int dispatched = 0; 1016 int dispatched = 0;
1017 1017
1018 BUG_ON(RB_EMPTY(&cfqq->sort_list)); 1018 BUG_ON(RB_EMPTY(&cfqq->sort_list));
1019 1019
1020 do { 1020 do {
1021 struct cfq_rq *crq; 1021 struct cfq_rq *crq;
1022 1022
1023 /* 1023 /*
1024 * follow expired path, else get first next available 1024 * follow expired path, else get first next available
1025 */ 1025 */
1026 if ((crq = cfq_check_fifo(cfqq)) == NULL) 1026 if ((crq = cfq_check_fifo(cfqq)) == NULL)
1027 crq = cfqq->next_crq; 1027 crq = cfqq->next_crq;
1028 1028
1029 /* 1029 /*
1030 * finally, insert request into driver dispatch list 1030 * finally, insert request into driver dispatch list
1031 */ 1031 */
1032 cfq_dispatch_insert(cfqd->queue, crq); 1032 cfq_dispatch_insert(cfqd->queue, crq);
1033 1033
1034 cfqd->dispatch_slice++; 1034 cfqd->dispatch_slice++;
1035 dispatched++; 1035 dispatched++;
1036 1036
1037 if (!cfqd->active_cic) { 1037 if (!cfqd->active_cic) {
1038 atomic_inc(&crq->io_context->ioc->refcount); 1038 atomic_inc(&crq->io_context->ioc->refcount);
1039 cfqd->active_cic = crq->io_context; 1039 cfqd->active_cic = crq->io_context;
1040 } 1040 }
1041 1041
1042 if (RB_EMPTY(&cfqq->sort_list)) 1042 if (RB_EMPTY(&cfqq->sort_list))
1043 break; 1043 break;
1044 1044
1045 } while (dispatched < max_dispatch); 1045 } while (dispatched < max_dispatch);
1046 1046
1047 /* 1047 /*
1048 * if slice end isn't set yet, set it. if at least one request was 1048 * if slice end isn't set yet, set it. if at least one request was
1049 * sync, use the sync time slice value 1049 * sync, use the sync time slice value
1050 */ 1050 */
1051 if (!cfqq->slice_end) 1051 if (!cfqq->slice_end)
1052 cfq_set_prio_slice(cfqd, cfqq); 1052 cfq_set_prio_slice(cfqd, cfqq);
1053 1053
1054 /* 1054 /*
1055 * expire an async queue immediately if it has used up its slice. idle 1055 * expire an async queue immediately if it has used up its slice. idle
1056 * queue always expire after 1 dispatch round. 1056 * queue always expire after 1 dispatch round.
1057 */ 1057 */
1058 if ((!cfq_cfqq_sync(cfqq) && 1058 if ((!cfq_cfqq_sync(cfqq) &&
1059 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) || 1059 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
1060 cfq_class_idle(cfqq)) 1060 cfq_class_idle(cfqq))
1061 cfq_slice_expired(cfqd, 0); 1061 cfq_slice_expired(cfqd, 0);
1062 1062
1063 return dispatched; 1063 return dispatched;
1064 } 1064 }
1065 1065
1066 static int 1066 static int
1067 cfq_forced_dispatch_cfqqs(struct list_head *list) 1067 cfq_forced_dispatch_cfqqs(struct list_head *list)
1068 { 1068 {
1069 int dispatched = 0; 1069 int dispatched = 0;
1070 struct cfq_queue *cfqq, *next; 1070 struct cfq_queue *cfqq, *next;
1071 struct cfq_rq *crq; 1071 struct cfq_rq *crq;
1072 1072
1073 list_for_each_entry_safe(cfqq, next, list, cfq_list) { 1073 list_for_each_entry_safe(cfqq, next, list, cfq_list) {
1074 while ((crq = cfqq->next_crq)) { 1074 while ((crq = cfqq->next_crq)) {
1075 cfq_dispatch_insert(cfqq->cfqd->queue, crq); 1075 cfq_dispatch_insert(cfqq->cfqd->queue, crq);
1076 dispatched++; 1076 dispatched++;
1077 } 1077 }
1078 BUG_ON(!list_empty(&cfqq->fifo)); 1078 BUG_ON(!list_empty(&cfqq->fifo));
1079 } 1079 }
1080 return dispatched; 1080 return dispatched;
1081 } 1081 }
1082 1082
1083 static int 1083 static int
1084 cfq_forced_dispatch(struct cfq_data *cfqd) 1084 cfq_forced_dispatch(struct cfq_data *cfqd)
1085 { 1085 {
1086 int i, dispatched = 0; 1086 int i, dispatched = 0;
1087 1087
1088 for (i = 0; i < CFQ_PRIO_LISTS; i++) 1088 for (i = 0; i < CFQ_PRIO_LISTS; i++)
1089 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]); 1089 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]);
1090 1090
1091 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr); 1091 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr);
1092 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr); 1092 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr);
1093 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr); 1093 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);
1094 1094
1095 cfq_slice_expired(cfqd, 0); 1095 cfq_slice_expired(cfqd, 0);
1096 1096
1097 BUG_ON(cfqd->busy_queues); 1097 BUG_ON(cfqd->busy_queues);
1098 1098
1099 return dispatched; 1099 return dispatched;
1100 } 1100 }
1101 1101
1102 static int 1102 static int
1103 cfq_dispatch_requests(request_queue_t *q, int force) 1103 cfq_dispatch_requests(request_queue_t *q, int force)
1104 { 1104 {
1105 struct cfq_data *cfqd = q->elevator->elevator_data; 1105 struct cfq_data *cfqd = q->elevator->elevator_data;
1106 struct cfq_queue *cfqq; 1106 struct cfq_queue *cfqq;
1107 1107
1108 if (!cfqd->busy_queues) 1108 if (!cfqd->busy_queues)
1109 return 0; 1109 return 0;
1110 1110
1111 if (unlikely(force)) 1111 if (unlikely(force))
1112 return cfq_forced_dispatch(cfqd); 1112 return cfq_forced_dispatch(cfqd);
1113 1113
1114 cfqq = cfq_select_queue(cfqd); 1114 cfqq = cfq_select_queue(cfqd);
1115 if (cfqq) { 1115 if (cfqq) {
1116 int max_dispatch; 1116 int max_dispatch;
1117 1117
1118 /* 1118 /*
1119 * if idle window is disabled, allow queue buildup 1119 * if idle window is disabled, allow queue buildup
1120 */ 1120 */
1121 if (!cfq_cfqq_idle_window(cfqq) && 1121 if (!cfq_cfqq_idle_window(cfqq) &&
1122 cfqd->rq_in_driver >= cfqd->cfq_max_depth) 1122 cfqd->rq_in_driver >= cfqd->cfq_max_depth)
1123 return 0; 1123 return 0;
1124 1124
1125 cfq_clear_cfqq_must_dispatch(cfqq); 1125 cfq_clear_cfqq_must_dispatch(cfqq);
1126 cfq_clear_cfqq_wait_request(cfqq); 1126 cfq_clear_cfqq_wait_request(cfqq);
1127 del_timer(&cfqd->idle_slice_timer); 1127 del_timer(&cfqd->idle_slice_timer);
1128 1128
1129 max_dispatch = cfqd->cfq_quantum; 1129 max_dispatch = cfqd->cfq_quantum;
1130 if (cfq_class_idle(cfqq)) 1130 if (cfq_class_idle(cfqq))
1131 max_dispatch = 1; 1131 max_dispatch = 1;
1132 1132
1133 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1133 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1134 } 1134 }
1135 1135
1136 return 0; 1136 return 0;
1137 } 1137 }
1138 1138
1139 /* 1139 /*
1140 * task holds one reference to the queue, dropped when task exits. each crq 1140 * task holds one reference to the queue, dropped when task exits. each crq
1141 * in-flight on this queue also holds a reference, dropped when crq is freed. 1141 * in-flight on this queue also holds a reference, dropped when crq is freed.
1142 * 1142 *
1143 * queue lock must be held here. 1143 * queue lock must be held here.
1144 */ 1144 */
1145 static void cfq_put_queue(struct cfq_queue *cfqq) 1145 static void cfq_put_queue(struct cfq_queue *cfqq)
1146 { 1146 {
1147 struct cfq_data *cfqd = cfqq->cfqd; 1147 struct cfq_data *cfqd = cfqq->cfqd;
1148 1148
1149 BUG_ON(atomic_read(&cfqq->ref) <= 0); 1149 BUG_ON(atomic_read(&cfqq->ref) <= 0);
1150 1150
1151 if (!atomic_dec_and_test(&cfqq->ref)) 1151 if (!atomic_dec_and_test(&cfqq->ref))
1152 return; 1152 return;
1153 1153
1154 BUG_ON(rb_first(&cfqq->sort_list)); 1154 BUG_ON(rb_first(&cfqq->sort_list));
1155 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 1155 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
1156 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1156 BUG_ON(cfq_cfqq_on_rr(cfqq));
1157 1157
1158 if (unlikely(cfqd->active_queue == cfqq)) 1158 if (unlikely(cfqd->active_queue == cfqq))
1159 __cfq_slice_expired(cfqd, cfqq, 0); 1159 __cfq_slice_expired(cfqd, cfqq, 0);
1160 1160
1161 /* 1161 /*
1162 * it's on the empty list and still hashed 1162 * it's on the empty list and still hashed
1163 */ 1163 */
1164 list_del(&cfqq->cfq_list); 1164 list_del(&cfqq->cfq_list);
1165 hlist_del(&cfqq->cfq_hash); 1165 hlist_del(&cfqq->cfq_hash);
1166 kmem_cache_free(cfq_pool, cfqq); 1166 kmem_cache_free(cfq_pool, cfqq);
1167 } 1167 }
1168 1168
1169 static inline struct cfq_queue * 1169 static inline struct cfq_queue *
1170 __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, 1170 __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1171 const int hashval) 1171 const int hashval)
1172 { 1172 {
1173 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; 1173 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
1174 struct hlist_node *entry, *next; 1174 struct hlist_node *entry, *next;
1175 1175
1176 hlist_for_each_safe(entry, next, hash_list) { 1176 hlist_for_each_safe(entry, next, hash_list) {
1177 struct cfq_queue *__cfqq = list_entry_qhash(entry); 1177 struct cfq_queue *__cfqq = list_entry_qhash(entry);
1178 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio); 1178 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio);
1179 1179
1180 if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY)) 1180 if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY))
1181 return __cfqq; 1181 return __cfqq;
1182 } 1182 }
1183 1183
1184 return NULL; 1184 return NULL;
1185 } 1185 }
1186 1186
1187 static struct cfq_queue * 1187 static struct cfq_queue *
1188 cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio) 1188 cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
1189 { 1189 {
1190 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); 1190 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
1191 } 1191 }
1192 1192
1193 static void cfq_free_io_context(struct cfq_io_context *cic) 1193 static void cfq_free_io_context(struct cfq_io_context *cic)
1194 { 1194 {
1195 struct cfq_io_context *__cic; 1195 struct cfq_io_context *__cic;
1196 struct list_head *entry, *next; 1196 struct list_head *entry, *next;
1197 int freed = 1; 1197 int freed = 1;
1198 1198
1199 list_for_each_safe(entry, next, &cic->list) { 1199 list_for_each_safe(entry, next, &cic->list) {
1200 __cic = list_entry(entry, struct cfq_io_context, list); 1200 __cic = list_entry(entry, struct cfq_io_context, list);
1201 kmem_cache_free(cfq_ioc_pool, __cic); 1201 kmem_cache_free(cfq_ioc_pool, __cic);
1202 freed++; 1202 freed++;
1203 } 1203 }
1204 1204
1205 kmem_cache_free(cfq_ioc_pool, cic); 1205 kmem_cache_free(cfq_ioc_pool, cic);
1206 if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) 1206 if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone)
1207 complete(ioc_gone); 1207 complete(ioc_gone);
1208 } 1208 }
1209 1209
1210 static void cfq_trim(struct io_context *ioc) 1210 static void cfq_trim(struct io_context *ioc)
1211 { 1211 {
1212 ioc->set_ioprio = NULL; 1212 ioc->set_ioprio = NULL;
1213 if (ioc->cic) 1213 if (ioc->cic)
1214 cfq_free_io_context(ioc->cic); 1214 cfq_free_io_context(ioc->cic);
1215 } 1215 }
1216 1216
1217 /* 1217 /*
1218 * Called with interrupts disabled 1218 * Called with interrupts disabled
1219 */ 1219 */
1220 static void cfq_exit_single_io_context(struct cfq_io_context *cic) 1220 static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1221 { 1221 {
1222 struct cfq_data *cfqd = cic->key; 1222 struct cfq_data *cfqd = cic->key;
1223 request_queue_t *q; 1223 request_queue_t *q;
1224 1224
1225 if (!cfqd) 1225 if (!cfqd)
1226 return; 1226 return;
1227 1227
1228 q = cfqd->queue; 1228 q = cfqd->queue;
1229 1229
1230 WARN_ON(!irqs_disabled()); 1230 WARN_ON(!irqs_disabled());
1231 1231
1232 spin_lock(q->queue_lock); 1232 spin_lock(q->queue_lock);
1233 1233
1234 if (cic->cfqq[ASYNC]) { 1234 if (cic->cfqq[ASYNC]) {
1235 if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue)) 1235 if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue))
1236 __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0); 1236 __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0);
1237 cfq_put_queue(cic->cfqq[ASYNC]); 1237 cfq_put_queue(cic->cfqq[ASYNC]);
1238 cic->cfqq[ASYNC] = NULL; 1238 cic->cfqq[ASYNC] = NULL;
1239 } 1239 }
1240 1240
1241 if (cic->cfqq[SYNC]) { 1241 if (cic->cfqq[SYNC]) {
1242 if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue)) 1242 if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue))
1243 __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0); 1243 __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0);
1244 cfq_put_queue(cic->cfqq[SYNC]); 1244 cfq_put_queue(cic->cfqq[SYNC]);
1245 cic->cfqq[SYNC] = NULL; 1245 cic->cfqq[SYNC] = NULL;
1246 } 1246 }
1247 1247
1248 cic->key = NULL; 1248 cic->key = NULL;
1249 list_del_init(&cic->queue_list); 1249 list_del_init(&cic->queue_list);
1250 spin_unlock(q->queue_lock); 1250 spin_unlock(q->queue_lock);
1251 } 1251 }
1252 1252
1253 static void cfq_exit_io_context(struct cfq_io_context *cic) 1253 static void cfq_exit_io_context(struct cfq_io_context *cic)
1254 { 1254 {
1255 struct cfq_io_context *__cic; 1255 struct cfq_io_context *__cic;
1256 struct list_head *entry; 1256 struct list_head *entry;
1257 unsigned long flags; 1257 unsigned long flags;
1258 1258
1259 local_irq_save(flags); 1259 local_irq_save(flags);
1260 1260
1261 /* 1261 /*
1262 * put the reference this task is holding to the various queues 1262 * put the reference this task is holding to the various queues
1263 */ 1263 */
1264 read_lock(&cfq_exit_lock); 1264 read_lock(&cfq_exit_lock);
1265 list_for_each(entry, &cic->list) { 1265 list_for_each(entry, &cic->list) {
1266 __cic = list_entry(entry, struct cfq_io_context, list); 1266 __cic = list_entry(entry, struct cfq_io_context, list);
1267 cfq_exit_single_io_context(__cic); 1267 cfq_exit_single_io_context(__cic);
1268 } 1268 }
1269 1269
1270 cfq_exit_single_io_context(cic); 1270 cfq_exit_single_io_context(cic);
1271 read_unlock(&cfq_exit_lock); 1271 read_unlock(&cfq_exit_lock);
1272 local_irq_restore(flags); 1272 local_irq_restore(flags);
1273 } 1273 }
1274 1274
1275 static struct cfq_io_context * 1275 static struct cfq_io_context *
1276 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1276 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1277 { 1277 {
1278 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); 1278 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
1279 1279
1280 if (cic) { 1280 if (cic) {
1281 INIT_LIST_HEAD(&cic->list); 1281 INIT_LIST_HEAD(&cic->list);
1282 cic->cfqq[ASYNC] = NULL; 1282 cic->cfqq[ASYNC] = NULL;
1283 cic->cfqq[SYNC] = NULL; 1283 cic->cfqq[SYNC] = NULL;
1284 cic->key = NULL; 1284 cic->key = NULL;
1285 cic->last_end_request = jiffies; 1285 cic->last_end_request = jiffies;
1286 cic->ttime_total = 0; 1286 cic->ttime_total = 0;
1287 cic->ttime_samples = 0; 1287 cic->ttime_samples = 0;
1288 cic->ttime_mean = 0; 1288 cic->ttime_mean = 0;
1289 cic->dtor = cfq_free_io_context; 1289 cic->dtor = cfq_free_io_context;
1290 cic->exit = cfq_exit_io_context; 1290 cic->exit = cfq_exit_io_context;
1291 INIT_LIST_HEAD(&cic->queue_list); 1291 INIT_LIST_HEAD(&cic->queue_list);
1292 atomic_inc(&ioc_count); 1292 atomic_inc(&ioc_count);
1293 } 1293 }
1294 1294
1295 return cic; 1295 return cic;
1296 } 1296 }
1297 1297
1298 static void cfq_init_prio_data(struct cfq_queue *cfqq) 1298 static void cfq_init_prio_data(struct cfq_queue *cfqq)
1299 { 1299 {
1300 struct task_struct *tsk = current; 1300 struct task_struct *tsk = current;
1301 int ioprio_class; 1301 int ioprio_class;
1302 1302
1303 if (!cfq_cfqq_prio_changed(cfqq)) 1303 if (!cfq_cfqq_prio_changed(cfqq))
1304 return; 1304 return;
1305 1305
1306 ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); 1306 ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio);
1307 switch (ioprio_class) { 1307 switch (ioprio_class) {
1308 default: 1308 default:
1309 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); 1309 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
1310 case IOPRIO_CLASS_NONE: 1310 case IOPRIO_CLASS_NONE:
1311 /* 1311 /*
1312 * no prio set, place us in the middle of the BE classes 1312 * no prio set, place us in the middle of the BE classes
1313 */ 1313 */
1314 cfqq->ioprio = task_nice_ioprio(tsk); 1314 cfqq->ioprio = task_nice_ioprio(tsk);
1315 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1315 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1316 break; 1316 break;
1317 case IOPRIO_CLASS_RT: 1317 case IOPRIO_CLASS_RT:
1318 cfqq->ioprio = task_ioprio(tsk); 1318 cfqq->ioprio = task_ioprio(tsk);
1319 cfqq->ioprio_class = IOPRIO_CLASS_RT; 1319 cfqq->ioprio_class = IOPRIO_CLASS_RT;
1320 break; 1320 break;
1321 case IOPRIO_CLASS_BE: 1321 case IOPRIO_CLASS_BE:
1322 cfqq->ioprio = task_ioprio(tsk); 1322 cfqq->ioprio = task_ioprio(tsk);
1323 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1323 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1324 break; 1324 break;
1325 case IOPRIO_CLASS_IDLE: 1325 case IOPRIO_CLASS_IDLE:
1326 cfqq->ioprio_class = IOPRIO_CLASS_IDLE; 1326 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
1327 cfqq->ioprio = 7; 1327 cfqq->ioprio = 7;
1328 cfq_clear_cfqq_idle_window(cfqq); 1328 cfq_clear_cfqq_idle_window(cfqq);
1329 break; 1329 break;
1330 } 1330 }
1331 1331
1332 /* 1332 /*
1333 * keep track of original prio settings in case we have to temporarily 1333 * keep track of original prio settings in case we have to temporarily
1334 * elevate the priority of this queue 1334 * elevate the priority of this queue
1335 */ 1335 */
1336 cfqq->org_ioprio = cfqq->ioprio; 1336 cfqq->org_ioprio = cfqq->ioprio;
1337 cfqq->org_ioprio_class = cfqq->ioprio_class; 1337 cfqq->org_ioprio_class = cfqq->ioprio_class;
1338 1338
1339 if (cfq_cfqq_on_rr(cfqq)) 1339 if (cfq_cfqq_on_rr(cfqq))
1340 cfq_resort_rr_list(cfqq, 0); 1340 cfq_resort_rr_list(cfqq, 0);
1341 1341
1342 cfq_clear_cfqq_prio_changed(cfqq); 1342 cfq_clear_cfqq_prio_changed(cfqq);
1343 } 1343 }
1344 1344
1345 static inline void changed_ioprio(struct cfq_io_context *cic) 1345 static inline void changed_ioprio(struct cfq_io_context *cic)
1346 { 1346 {
1347 struct cfq_data *cfqd = cic->key; 1347 struct cfq_data *cfqd = cic->key;
1348 struct cfq_queue *cfqq; 1348 struct cfq_queue *cfqq;
1349 if (cfqd) { 1349 if (cfqd) {
1350 spin_lock(cfqd->queue->queue_lock); 1350 spin_lock(cfqd->queue->queue_lock);
1351 cfqq = cic->cfqq[ASYNC]; 1351 cfqq = cic->cfqq[ASYNC];
1352 if (cfqq) { 1352 if (cfqq) {
1353 struct cfq_queue *new_cfqq; 1353 struct cfq_queue *new_cfqq;
1354 new_cfqq = cfq_get_queue(cfqd, CFQ_KEY_ASYNC, 1354 new_cfqq = cfq_get_queue(cfqd, CFQ_KEY_ASYNC,
1355 cic->ioc->task, GFP_ATOMIC); 1355 cic->ioc->task, GFP_ATOMIC);
1356 if (new_cfqq) { 1356 if (new_cfqq) {
1357 cic->cfqq[ASYNC] = new_cfqq; 1357 cic->cfqq[ASYNC] = new_cfqq;
1358 cfq_put_queue(cfqq); 1358 cfq_put_queue(cfqq);
1359 } 1359 }
1360 } 1360 }
1361 cfqq = cic->cfqq[SYNC]; 1361 cfqq = cic->cfqq[SYNC];
1362 if (cfqq) { 1362 if (cfqq) {
1363 cfq_mark_cfqq_prio_changed(cfqq); 1363 cfq_mark_cfqq_prio_changed(cfqq);
1364 cfq_init_prio_data(cfqq); 1364 cfq_init_prio_data(cfqq);
1365 } 1365 }
1366 spin_unlock(cfqd->queue->queue_lock); 1366 spin_unlock(cfqd->queue->queue_lock);
1367 } 1367 }
1368 } 1368 }
1369 1369
1370 /* 1370 /*
1371 * callback from sys_ioprio_set, irqs are disabled 1371 * callback from sys_ioprio_set, irqs are disabled
1372 */ 1372 */
1373 static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) 1373 static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1374 { 1374 {
1375 struct cfq_io_context *cic; 1375 struct cfq_io_context *cic;
1376 1376
1377 write_lock(&cfq_exit_lock); 1377 write_lock(&cfq_exit_lock);
1378 1378
1379 cic = ioc->cic; 1379 cic = ioc->cic;
1380 1380
1381 changed_ioprio(cic); 1381 changed_ioprio(cic);
1382 1382
1383 list_for_each_entry(cic, &cic->list, list) 1383 list_for_each_entry(cic, &cic->list, list)
1384 changed_ioprio(cic); 1384 changed_ioprio(cic);
1385 1385
1386 write_unlock(&cfq_exit_lock); 1386 write_unlock(&cfq_exit_lock);
1387 1387
1388 return 0; 1388 return 0;
1389 } 1389 }
1390 1390
1391 static struct cfq_queue * 1391 static struct cfq_queue *
1392 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, 1392 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk,
1393 gfp_t gfp_mask) 1393 gfp_t gfp_mask)
1394 { 1394 {
1395 const int hashval = hash_long(key, CFQ_QHASH_SHIFT); 1395 const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
1396 struct cfq_queue *cfqq, *new_cfqq = NULL; 1396 struct cfq_queue *cfqq, *new_cfqq = NULL;
1397 unsigned short ioprio; 1397 unsigned short ioprio;
1398 1398
1399 retry: 1399 retry:
1400 ioprio = tsk->ioprio; 1400 ioprio = tsk->ioprio;
1401 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval); 1401 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval);
1402 1402
1403 if (!cfqq) { 1403 if (!cfqq) {
1404 if (new_cfqq) { 1404 if (new_cfqq) {
1405 cfqq = new_cfqq; 1405 cfqq = new_cfqq;
1406 new_cfqq = NULL; 1406 new_cfqq = NULL;
1407 } else if (gfp_mask & __GFP_WAIT) { 1407 } else if (gfp_mask & __GFP_WAIT) {
1408 spin_unlock_irq(cfqd->queue->queue_lock); 1408 spin_unlock_irq(cfqd->queue->queue_lock);
1409 new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1409 new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
1410 spin_lock_irq(cfqd->queue->queue_lock); 1410 spin_lock_irq(cfqd->queue->queue_lock);
1411 goto retry; 1411 goto retry;
1412 } else { 1412 } else {
1413 cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1413 cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
1414 if (!cfqq) 1414 if (!cfqq)
1415 goto out; 1415 goto out;
1416 } 1416 }
1417 1417
1418 memset(cfqq, 0, sizeof(*cfqq)); 1418 memset(cfqq, 0, sizeof(*cfqq));
1419 1419
1420 INIT_HLIST_NODE(&cfqq->cfq_hash); 1420 INIT_HLIST_NODE(&cfqq->cfq_hash);
1421 INIT_LIST_HEAD(&cfqq->cfq_list); 1421 INIT_LIST_HEAD(&cfqq->cfq_list);
1422 RB_CLEAR_ROOT(&cfqq->sort_list); 1422 RB_CLEAR_ROOT(&cfqq->sort_list);
1423 INIT_LIST_HEAD(&cfqq->fifo); 1423 INIT_LIST_HEAD(&cfqq->fifo);
1424 1424
1425 cfqq->key = key; 1425 cfqq->key = key;
1426 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); 1426 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
1427 atomic_set(&cfqq->ref, 0); 1427 atomic_set(&cfqq->ref, 0);
1428 cfqq->cfqd = cfqd; 1428 cfqq->cfqd = cfqd;
1429 cfqq->service_last = 0; 1429 cfqq->service_last = 0;
1430 /* 1430 /*
1431 * set ->slice_left to allow preemption for a new process 1431 * set ->slice_left to allow preemption for a new process
1432 */ 1432 */
1433 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1433 cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
1434 cfq_mark_cfqq_idle_window(cfqq); 1434 cfq_mark_cfqq_idle_window(cfqq);
1435 cfq_mark_cfqq_prio_changed(cfqq); 1435 cfq_mark_cfqq_prio_changed(cfqq);
1436 cfq_init_prio_data(cfqq); 1436 cfq_init_prio_data(cfqq);
1437 } 1437 }
1438 1438
1439 if (new_cfqq) 1439 if (new_cfqq)
1440 kmem_cache_free(cfq_pool, new_cfqq); 1440 kmem_cache_free(cfq_pool, new_cfqq);
1441 1441
1442 atomic_inc(&cfqq->ref); 1442 atomic_inc(&cfqq->ref);
1443 out: 1443 out:
1444 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); 1444 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
1445 return cfqq; 1445 return cfqq;
1446 } 1446 }
1447 1447
1448 /* 1448 /*
1449 * Setup general io context and cfq io context. There can be several cfq 1449 * Setup general io context and cfq io context. There can be several cfq
1450 * io contexts per general io context, if this process is doing io to more 1450 * io contexts per general io context, if this process is doing io to more
1451 * than one device managed by cfq. Note that caller is holding a reference to 1451 * than one device managed by cfq. Note that caller is holding a reference to
1452 * cfqq, so we don't need to worry about it disappearing 1452 * cfqq, so we don't need to worry about it disappearing
1453 */ 1453 */
1454 static struct cfq_io_context * 1454 static struct cfq_io_context *
1455 cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) 1455 cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
1456 { 1456 {
1457 struct io_context *ioc = NULL; 1457 struct io_context *ioc = NULL;
1458 struct cfq_io_context *cic; 1458 struct cfq_io_context *cic;
1459 1459
1460 might_sleep_if(gfp_mask & __GFP_WAIT); 1460 might_sleep_if(gfp_mask & __GFP_WAIT);
1461 1461
1462 ioc = get_io_context(gfp_mask); 1462 ioc = get_io_context(gfp_mask);
1463 if (!ioc) 1463 if (!ioc)
1464 return NULL; 1464 return NULL;
1465 1465
1466 restart: 1466 restart:
1467 if ((cic = ioc->cic) == NULL) { 1467 if ((cic = ioc->cic) == NULL) {
1468 cic = cfq_alloc_io_context(cfqd, gfp_mask); 1468 cic = cfq_alloc_io_context(cfqd, gfp_mask);
1469 1469
1470 if (cic == NULL) 1470 if (cic == NULL)
1471 goto err; 1471 goto err;
1472 1472
1473 /* 1473 /*
1474 * manually increment generic io_context usage count, it 1474 * manually increment generic io_context usage count, it
1475 * cannot go away since we are already holding one ref to it 1475 * cannot go away since we are already holding one ref to it
1476 */ 1476 */
1477 cic->ioc = ioc; 1477 cic->ioc = ioc;
1478 cic->key = cfqd; 1478 cic->key = cfqd;
1479 read_lock(&cfq_exit_lock); 1479 read_lock(&cfq_exit_lock);
1480 ioc->set_ioprio = cfq_ioc_set_ioprio; 1480 ioc->set_ioprio = cfq_ioc_set_ioprio;
1481 ioc->cic = cic; 1481 ioc->cic = cic;
1482 list_add(&cic->queue_list, &cfqd->cic_list); 1482 list_add(&cic->queue_list, &cfqd->cic_list);
1483 read_unlock(&cfq_exit_lock); 1483 read_unlock(&cfq_exit_lock);
1484 } else { 1484 } else {
1485 struct cfq_io_context *__cic; 1485 struct cfq_io_context *__cic;
1486 1486
1487 /* 1487 /*
1488 * the first cic on the list is actually the head itself 1488 * the first cic on the list is actually the head itself
1489 */ 1489 */
1490 if (cic->key == cfqd) 1490 if (cic->key == cfqd)
1491 goto out; 1491 goto out;
1492 1492
1493 if (unlikely(!cic->key)) { 1493 if (unlikely(!cic->key)) {
1494 read_lock(&cfq_exit_lock); 1494 read_lock(&cfq_exit_lock);
1495 if (list_empty(&cic->list)) 1495 if (list_empty(&cic->list))
1496 ioc->cic = NULL; 1496 ioc->cic = NULL;
1497 else 1497 else
1498 ioc->cic = list_entry(cic->list.next, 1498 ioc->cic = list_entry(cic->list.next,
1499 struct cfq_io_context, 1499 struct cfq_io_context,
1500 list); 1500 list);
1501 read_unlock(&cfq_exit_lock); 1501 read_unlock(&cfq_exit_lock);
1502 kmem_cache_free(cfq_ioc_pool, cic); 1502 kmem_cache_free(cfq_ioc_pool, cic);
1503 atomic_dec(&ioc_count); 1503 atomic_dec(&ioc_count);
1504 goto restart; 1504 goto restart;
1505 } 1505 }
1506 1506
1507 /* 1507 /*
1508 * cic exists, check if we already are there. linear search 1508 * cic exists, check if we already are there. linear search
1509 * should be ok here, the list will usually not be more than 1509 * should be ok here, the list will usually not be more than
1510 * 1 or a few entries long 1510 * 1 or a few entries long
1511 */ 1511 */
1512 list_for_each_entry(__cic, &cic->list, list) { 1512 list_for_each_entry(__cic, &cic->list, list) {
1513 /* 1513 /*
1514 * this process is already holding a reference to 1514 * this process is already holding a reference to
1515 * this queue, so no need to get one more 1515 * this queue, so no need to get one more
1516 */ 1516 */
1517 if (__cic->key == cfqd) { 1517 if (__cic->key == cfqd) {
1518 cic = __cic; 1518 cic = __cic;
1519 goto out; 1519 goto out;
1520 } 1520 }
1521 if (unlikely(!__cic->key)) { 1521 if (unlikely(!__cic->key)) {
1522 read_lock(&cfq_exit_lock); 1522 read_lock(&cfq_exit_lock);
1523 list_del(&__cic->list); 1523 list_del(&__cic->list);
1524 read_unlock(&cfq_exit_lock); 1524 read_unlock(&cfq_exit_lock);
1525 kmem_cache_free(cfq_ioc_pool, __cic); 1525 kmem_cache_free(cfq_ioc_pool, __cic);
1526 atomic_dec(&ioc_count); 1526 atomic_dec(&ioc_count);
1527 goto restart; 1527 goto restart;
1528 } 1528 }
1529 } 1529 }
1530 1530
1531 /* 1531 /*
1532 * nope, process doesn't have a cic assoicated with this 1532 * nope, process doesn't have a cic assoicated with this
1533 * cfqq yet. get a new one and add to list 1533 * cfqq yet. get a new one and add to list
1534 */ 1534 */
1535 __cic = cfq_alloc_io_context(cfqd, gfp_mask); 1535 __cic = cfq_alloc_io_context(cfqd, gfp_mask);
1536 if (__cic == NULL) 1536 if (__cic == NULL)
1537 goto err; 1537 goto err;
1538 1538
1539 __cic->ioc = ioc; 1539 __cic->ioc = ioc;
1540 __cic->key = cfqd; 1540 __cic->key = cfqd;
1541 read_lock(&cfq_exit_lock); 1541 read_lock(&cfq_exit_lock);
1542 list_add(&__cic->list, &cic->list); 1542 list_add(&__cic->list, &cic->list);
1543 list_add(&__cic->queue_list, &cfqd->cic_list); 1543 list_add(&__cic->queue_list, &cfqd->cic_list);
1544 read_unlock(&cfq_exit_lock); 1544 read_unlock(&cfq_exit_lock);
1545 cic = __cic; 1545 cic = __cic;
1546 } 1546 }
1547 1547
1548 out: 1548 out:
1549 return cic; 1549 return cic;
1550 err: 1550 err:
1551 put_io_context(ioc); 1551 put_io_context(ioc);
1552 return NULL; 1552 return NULL;
1553 } 1553 }
1554 1554
1555 static void 1555 static void
1556 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 1556 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1557 { 1557 {
1558 unsigned long elapsed, ttime; 1558 unsigned long elapsed, ttime;
1559 1559
1560 /* 1560 /*
1561 * if this context already has stuff queued, thinktime is from 1561 * if this context already has stuff queued, thinktime is from
1562 * last queue not last end 1562 * last queue not last end
1563 */ 1563 */
1564 #if 0 1564 #if 0
1565 if (time_after(cic->last_end_request, cic->last_queue)) 1565 if (time_after(cic->last_end_request, cic->last_queue))
1566 elapsed = jiffies - cic->last_end_request; 1566 elapsed = jiffies - cic->last_end_request;
1567 else 1567 else
1568 elapsed = jiffies - cic->last_queue; 1568 elapsed = jiffies - cic->last_queue;
1569 #else 1569 #else
1570 elapsed = jiffies - cic->last_end_request; 1570 elapsed = jiffies - cic->last_end_request;
1571 #endif 1571 #endif
1572 1572
1573 ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 1573 ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
1574 1574
1575 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 1575 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
1576 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 1576 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
1577 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 1577 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
1578 } 1578 }
1579 1579
1580 #define sample_valid(samples) ((samples) > 80) 1580 #define sample_valid(samples) ((samples) > 80)
1581 1581
1582 /* 1582 /*
1583 * Disable idle window if the process thinks too long or seeks so much that 1583 * Disable idle window if the process thinks too long or seeks so much that
1584 * it doesn't matter 1584 * it doesn't matter
1585 */ 1585 */
1586 static void 1586 static void
1587 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1587 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1588 struct cfq_io_context *cic) 1588 struct cfq_io_context *cic)
1589 { 1589 {
1590 int enable_idle = cfq_cfqq_idle_window(cfqq); 1590 int enable_idle = cfq_cfqq_idle_window(cfqq);
1591 1591
1592 if (!cic->ioc->task || !cfqd->cfq_slice_idle) 1592 if (!cic->ioc->task || !cfqd->cfq_slice_idle)
1593 enable_idle = 0; 1593 enable_idle = 0;
1594 else if (sample_valid(cic->ttime_samples)) { 1594 else if (sample_valid(cic->ttime_samples)) {
1595 if (cic->ttime_mean > cfqd->cfq_slice_idle) 1595 if (cic->ttime_mean > cfqd->cfq_slice_idle)
1596 enable_idle = 0; 1596 enable_idle = 0;
1597 else 1597 else
1598 enable_idle = 1; 1598 enable_idle = 1;
1599 } 1599 }
1600 1600
1601 if (enable_idle) 1601 if (enable_idle)
1602 cfq_mark_cfqq_idle_window(cfqq); 1602 cfq_mark_cfqq_idle_window(cfqq);
1603 else 1603 else
1604 cfq_clear_cfqq_idle_window(cfqq); 1604 cfq_clear_cfqq_idle_window(cfqq);
1605 } 1605 }
1606 1606
1607 1607
1608 /* 1608 /*
1609 * Check if new_cfqq should preempt the currently active queue. Return 0 for 1609 * Check if new_cfqq should preempt the currently active queue. Return 0 for
1610 * no or if we aren't sure, a 1 will cause a preempt. 1610 * no or if we aren't sure, a 1 will cause a preempt.
1611 */ 1611 */
1612 static int 1612 static int
1613 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 1613 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1614 struct cfq_rq *crq) 1614 struct cfq_rq *crq)
1615 { 1615 {
1616 struct cfq_queue *cfqq = cfqd->active_queue; 1616 struct cfq_queue *cfqq = cfqd->active_queue;
1617 1617
1618 if (cfq_class_idle(new_cfqq)) 1618 if (cfq_class_idle(new_cfqq))
1619 return 0; 1619 return 0;
1620 1620
1621 if (!cfqq) 1621 if (!cfqq)
1622 return 1; 1622 return 1;
1623 1623
1624 if (cfq_class_idle(cfqq)) 1624 if (cfq_class_idle(cfqq))
1625 return 1; 1625 return 1;
1626 if (!cfq_cfqq_wait_request(new_cfqq)) 1626 if (!cfq_cfqq_wait_request(new_cfqq))
1627 return 0; 1627 return 0;
1628 /* 1628 /*
1629 * if it doesn't have slice left, forget it 1629 * if it doesn't have slice left, forget it
1630 */ 1630 */
1631 if (new_cfqq->slice_left < cfqd->cfq_slice_idle) 1631 if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
1632 return 0; 1632 return 0;
1633 if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) 1633 if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))
1634 return 1; 1634 return 1;
1635 1635
1636 return 0; 1636 return 0;
1637 } 1637 }
1638 1638
1639 /* 1639 /*
1640 * cfqq preempts the active queue. if we allowed preempt with no slice left, 1640 * cfqq preempts the active queue. if we allowed preempt with no slice left,
1641 * let it have half of its nominal slice. 1641 * let it have half of its nominal slice.
1642 */ 1642 */
1643 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1643 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1644 { 1644 {
1645 struct cfq_queue *__cfqq, *next; 1645 struct cfq_queue *__cfqq, *next;
1646 1646
1647 list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list) 1647 list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
1648 cfq_resort_rr_list(__cfqq, 1); 1648 cfq_resort_rr_list(__cfqq, 1);
1649 1649
1650 if (!cfqq->slice_left) 1650 if (!cfqq->slice_left)
1651 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; 1651 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
1652 1652
1653 cfqq->slice_end = cfqq->slice_left + jiffies; 1653 cfqq->slice_end = cfqq->slice_left + jiffies;
1654 __cfq_slice_expired(cfqd, cfqq, 1); 1654 __cfq_slice_expired(cfqd, cfqq, 1);
1655 __cfq_set_active_queue(cfqd, cfqq); 1655 __cfq_set_active_queue(cfqd, cfqq);
1656 } 1656 }
1657 1657
1658 /* 1658 /*
1659 * should really be a ll_rw_blk.c helper 1659 * should really be a ll_rw_blk.c helper
1660 */ 1660 */
1661 static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1661 static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1662 { 1662 {
1663 request_queue_t *q = cfqd->queue; 1663 request_queue_t *q = cfqd->queue;
1664 1664
1665 if (!blk_queue_plugged(q)) 1665 if (!blk_queue_plugged(q))
1666 q->request_fn(q); 1666 q->request_fn(q);
1667 else 1667 else
1668 __generic_unplug_device(q); 1668 __generic_unplug_device(q);
1669 } 1669 }
1670 1670
1671 /* 1671 /*
1672 * Called when a new fs request (crq) is added (to cfqq). Check if there's 1672 * Called when a new fs request (crq) is added (to cfqq). Check if there's
1673 * something we should do about it 1673 * something we should do about it
1674 */ 1674 */
1675 static void 1675 static void
1676 cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1676 cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1677 struct cfq_rq *crq) 1677 struct cfq_rq *crq)
1678 { 1678 {
1679 struct cfq_io_context *cic; 1679 struct cfq_io_context *cic;
1680 1680
1681 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 1681 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
1682 1682
1683 /* 1683 /*
1684 * we never wait for an async request and we don't allow preemption 1684 * we never wait for an async request and we don't allow preemption
1685 * of an async request. so just return early 1685 * of an async request. so just return early
1686 */ 1686 */
1687 if (!cfq_crq_is_sync(crq)) 1687 if (!cfq_crq_is_sync(crq))
1688 return; 1688 return;
1689 1689
1690 cic = crq->io_context; 1690 cic = crq->io_context;
1691 1691
1692 cfq_update_io_thinktime(cfqd, cic); 1692 cfq_update_io_thinktime(cfqd, cic);
1693 cfq_update_idle_window(cfqd, cfqq, cic); 1693 cfq_update_idle_window(cfqd, cfqq, cic);
1694 1694
1695 cic->last_queue = jiffies; 1695 cic->last_queue = jiffies;
1696 1696
1697 if (cfqq == cfqd->active_queue) { 1697 if (cfqq == cfqd->active_queue) {
1698 /* 1698 /*
1699 * if we are waiting for a request for this queue, let it rip 1699 * if we are waiting for a request for this queue, let it rip
1700 * immediately and flag that we must not expire this queue 1700 * immediately and flag that we must not expire this queue
1701 * just now 1701 * just now
1702 */ 1702 */
1703 if (cfq_cfqq_wait_request(cfqq)) { 1703 if (cfq_cfqq_wait_request(cfqq)) {
1704 cfq_mark_cfqq_must_dispatch(cfqq); 1704 cfq_mark_cfqq_must_dispatch(cfqq);
1705 del_timer(&cfqd->idle_slice_timer); 1705 del_timer(&cfqd->idle_slice_timer);
1706 cfq_start_queueing(cfqd, cfqq); 1706 cfq_start_queueing(cfqd, cfqq);
1707 } 1707 }
1708 } else if (cfq_should_preempt(cfqd, cfqq, crq)) { 1708 } else if (cfq_should_preempt(cfqd, cfqq, crq)) {
1709 /* 1709 /*
1710 * not the active queue - expire current slice if it is 1710 * not the active queue - expire current slice if it is
1711 * idle and has expired it's mean thinktime or this new queue 1711 * idle and has expired it's mean thinktime or this new queue
1712 * has some old slice time left and is of higher priority 1712 * has some old slice time left and is of higher priority
1713 */ 1713 */
1714 cfq_preempt_queue(cfqd, cfqq); 1714 cfq_preempt_queue(cfqd, cfqq);
1715 cfq_mark_cfqq_must_dispatch(cfqq); 1715 cfq_mark_cfqq_must_dispatch(cfqq);
1716 cfq_start_queueing(cfqd, cfqq); 1716 cfq_start_queueing(cfqd, cfqq);
1717 } 1717 }
1718 } 1718 }
1719 1719
1720 static void cfq_insert_request(request_queue_t *q, struct request *rq) 1720 static void cfq_insert_request(request_queue_t *q, struct request *rq)
1721 { 1721 {
1722 struct cfq_data *cfqd = q->elevator->elevator_data; 1722 struct cfq_data *cfqd = q->elevator->elevator_data;
1723 struct cfq_rq *crq = RQ_DATA(rq); 1723 struct cfq_rq *crq = RQ_DATA(rq);
1724 struct cfq_queue *cfqq = crq->cfq_queue; 1724 struct cfq_queue *cfqq = crq->cfq_queue;
1725 1725
1726 cfq_init_prio_data(cfqq); 1726 cfq_init_prio_data(cfqq);
1727 1727
1728 cfq_add_crq_rb(crq); 1728 cfq_add_crq_rb(crq);
1729 1729
1730 list_add_tail(&rq->queuelist, &cfqq->fifo); 1730 list_add_tail(&rq->queuelist, &cfqq->fifo);
1731 1731
1732 if (rq_mergeable(rq)) 1732 if (rq_mergeable(rq))
1733 cfq_add_crq_hash(cfqd, crq); 1733 cfq_add_crq_hash(cfqd, crq);
1734 1734
1735 cfq_crq_enqueued(cfqd, cfqq, crq); 1735 cfq_crq_enqueued(cfqd, cfqq, crq);
1736 } 1736 }
1737 1737
1738 static void cfq_completed_request(request_queue_t *q, struct request *rq) 1738 static void cfq_completed_request(request_queue_t *q, struct request *rq)
1739 { 1739 {
1740 struct cfq_rq *crq = RQ_DATA(rq); 1740 struct cfq_rq *crq = RQ_DATA(rq);
1741 struct cfq_queue *cfqq = crq->cfq_queue; 1741 struct cfq_queue *cfqq = crq->cfq_queue;
1742 struct cfq_data *cfqd = cfqq->cfqd; 1742 struct cfq_data *cfqd = cfqq->cfqd;
1743 const int sync = cfq_crq_is_sync(crq); 1743 const int sync = cfq_crq_is_sync(crq);
1744 unsigned long now; 1744 unsigned long now;
1745 1745
1746 now = jiffies; 1746 now = jiffies;
1747 1747
1748 WARN_ON(!cfqd->rq_in_driver); 1748 WARN_ON(!cfqd->rq_in_driver);
1749 WARN_ON(!cfqq->on_dispatch[sync]); 1749 WARN_ON(!cfqq->on_dispatch[sync]);
1750 cfqd->rq_in_driver--; 1750 cfqd->rq_in_driver--;
1751 cfqq->on_dispatch[sync]--; 1751 cfqq->on_dispatch[sync]--;
1752 1752
1753 if (!cfq_class_idle(cfqq)) 1753 if (!cfq_class_idle(cfqq))
1754 cfqd->last_end_request = now; 1754 cfqd->last_end_request = now;
1755 1755
1756 if (!cfq_cfqq_dispatched(cfqq)) { 1756 if (!cfq_cfqq_dispatched(cfqq)) {
1757 if (cfq_cfqq_on_rr(cfqq)) { 1757 if (cfq_cfqq_on_rr(cfqq)) {
1758 cfqq->service_last = now; 1758 cfqq->service_last = now;
1759 cfq_resort_rr_list(cfqq, 0); 1759 cfq_resort_rr_list(cfqq, 0);
1760 } 1760 }
1761 cfq_schedule_dispatch(cfqd); 1761 cfq_schedule_dispatch(cfqd);
1762 } 1762 }
1763 1763
1764 if (cfq_crq_is_sync(crq)) 1764 if (cfq_crq_is_sync(crq))
1765 crq->io_context->last_end_request = now; 1765 crq->io_context->last_end_request = now;
1766 } 1766 }
1767 1767
1768 static struct request * 1768 static struct request *
1769 cfq_former_request(request_queue_t *q, struct request *rq) 1769 cfq_former_request(request_queue_t *q, struct request *rq)
1770 { 1770 {
1771 struct cfq_rq *crq = RQ_DATA(rq); 1771 struct cfq_rq *crq = RQ_DATA(rq);
1772 struct rb_node *rbprev = rb_prev(&crq->rb_node); 1772 struct rb_node *rbprev = rb_prev(&crq->rb_node);
1773 1773
1774 if (rbprev) 1774 if (rbprev)
1775 return rb_entry_crq(rbprev)->request; 1775 return rb_entry_crq(rbprev)->request;
1776 1776
1777 return NULL; 1777 return NULL;
1778 } 1778 }
1779 1779
1780 static struct request * 1780 static struct request *
1781 cfq_latter_request(request_queue_t *q, struct request *rq) 1781 cfq_latter_request(request_queue_t *q, struct request *rq)
1782 { 1782 {
1783 struct cfq_rq *crq = RQ_DATA(rq); 1783 struct cfq_rq *crq = RQ_DATA(rq);
1784 struct rb_node *rbnext = rb_next(&crq->rb_node); 1784 struct rb_node *rbnext = rb_next(&crq->rb_node);
1785 1785
1786 if (rbnext) 1786 if (rbnext)
1787 return rb_entry_crq(rbnext)->request; 1787 return rb_entry_crq(rbnext)->request;
1788 1788
1789 return NULL; 1789 return NULL;
1790 } 1790 }
1791 1791
1792 /* 1792 /*
1793 * we temporarily boost lower priority queues if they are holding fs exclusive 1793 * we temporarily boost lower priority queues if they are holding fs exclusive
1794 * resources. they are boosted to normal prio (CLASS_BE/4) 1794 * resources. they are boosted to normal prio (CLASS_BE/4)
1795 */ 1795 */
1796 static void cfq_prio_boost(struct cfq_queue *cfqq) 1796 static void cfq_prio_boost(struct cfq_queue *cfqq)
1797 { 1797 {
1798 const int ioprio_class = cfqq->ioprio_class; 1798 const int ioprio_class = cfqq->ioprio_class;
1799 const int ioprio = cfqq->ioprio; 1799 const int ioprio = cfqq->ioprio;
1800 1800
1801 if (has_fs_excl()) { 1801 if (has_fs_excl()) {
1802 /* 1802 /*
1803 * boost idle prio on transactions that would lock out other 1803 * boost idle prio on transactions that would lock out other
1804 * users of the filesystem 1804 * users of the filesystem
1805 */ 1805 */
1806 if (cfq_class_idle(cfqq)) 1806 if (cfq_class_idle(cfqq))
1807 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1807 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1808 if (cfqq->ioprio > IOPRIO_NORM) 1808 if (cfqq->ioprio > IOPRIO_NORM)
1809 cfqq->ioprio = IOPRIO_NORM; 1809 cfqq->ioprio = IOPRIO_NORM;
1810 } else { 1810 } else {
1811 /* 1811 /*
1812 * check if we need to unboost the queue 1812 * check if we need to unboost the queue
1813 */ 1813 */
1814 if (cfqq->ioprio_class != cfqq->org_ioprio_class) 1814 if (cfqq->ioprio_class != cfqq->org_ioprio_class)
1815 cfqq->ioprio_class = cfqq->org_ioprio_class; 1815 cfqq->ioprio_class = cfqq->org_ioprio_class;
1816 if (cfqq->ioprio != cfqq->org_ioprio) 1816 if (cfqq->ioprio != cfqq->org_ioprio)
1817 cfqq->ioprio = cfqq->org_ioprio; 1817 cfqq->ioprio = cfqq->org_ioprio;
1818 } 1818 }
1819 1819
1820 /* 1820 /*
1821 * refile between round-robin lists if we moved the priority class 1821 * refile between round-robin lists if we moved the priority class
1822 */ 1822 */
1823 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) && 1823 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) &&
1824 cfq_cfqq_on_rr(cfqq)) 1824 cfq_cfqq_on_rr(cfqq))
1825 cfq_resort_rr_list(cfqq, 0); 1825 cfq_resort_rr_list(cfqq, 0);
1826 } 1826 }
1827 1827
1828 static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) 1828 static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
1829 { 1829 {
1830 if (rw == READ || process_sync(task)) 1830 if (rw == READ || process_sync(task))
1831 return task->pid; 1831 return task->pid;
1832 1832
1833 return CFQ_KEY_ASYNC; 1833 return CFQ_KEY_ASYNC;
1834 } 1834 }
1835 1835
1836 static inline int 1836 static inline int
1837 __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1837 __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1838 struct task_struct *task, int rw) 1838 struct task_struct *task, int rw)
1839 { 1839 {
1840 #if 1 1840 #if 1
1841 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 1841 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
1842 !cfq_cfqq_must_alloc_slice(cfqq)) { 1842 !cfq_cfqq_must_alloc_slice(cfqq)) {
1843 cfq_mark_cfqq_must_alloc_slice(cfqq); 1843 cfq_mark_cfqq_must_alloc_slice(cfqq);
1844 return ELV_MQUEUE_MUST; 1844 return ELV_MQUEUE_MUST;
1845 } 1845 }
1846 1846
1847 return ELV_MQUEUE_MAY; 1847 return ELV_MQUEUE_MAY;
1848 #else 1848 #else
1849 if (!cfqq || task->flags & PF_MEMALLOC) 1849 if (!cfqq || task->flags & PF_MEMALLOC)
1850 return ELV_MQUEUE_MAY; 1850 return ELV_MQUEUE_MAY;
1851 if (!cfqq->allocated[rw] || cfq_cfqq_must_alloc(cfqq)) { 1851 if (!cfqq->allocated[rw] || cfq_cfqq_must_alloc(cfqq)) {
1852 if (cfq_cfqq_wait_request(cfqq)) 1852 if (cfq_cfqq_wait_request(cfqq))
1853 return ELV_MQUEUE_MUST; 1853 return ELV_MQUEUE_MUST;
1854 1854
1855 /* 1855 /*
1856 * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we 1856 * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we
1857 * can quickly flood the queue with writes from a single task 1857 * can quickly flood the queue with writes from a single task
1858 */ 1858 */
1859 if (rw == READ || !cfq_cfqq_must_alloc_slice(cfqq)) { 1859 if (rw == READ || !cfq_cfqq_must_alloc_slice(cfqq)) {
1860 cfq_mark_cfqq_must_alloc_slice(cfqq); 1860 cfq_mark_cfqq_must_alloc_slice(cfqq);
1861 return ELV_MQUEUE_MUST; 1861 return ELV_MQUEUE_MUST;
1862 } 1862 }
1863 1863
1864 return ELV_MQUEUE_MAY; 1864 return ELV_MQUEUE_MAY;
1865 } 1865 }
1866 if (cfq_class_idle(cfqq)) 1866 if (cfq_class_idle(cfqq))
1867 return ELV_MQUEUE_NO; 1867 return ELV_MQUEUE_NO;
1868 if (cfqq->allocated[rw] >= cfqd->max_queued) { 1868 if (cfqq->allocated[rw] >= cfqd->max_queued) {
1869 struct io_context *ioc = get_io_context(GFP_ATOMIC); 1869 struct io_context *ioc = get_io_context(GFP_ATOMIC);
1870 int ret = ELV_MQUEUE_NO; 1870 int ret = ELV_MQUEUE_NO;
1871 1871
1872 if (ioc && ioc->nr_batch_requests) 1872 if (ioc && ioc->nr_batch_requests)
1873 ret = ELV_MQUEUE_MAY; 1873 ret = ELV_MQUEUE_MAY;
1874 1874
1875 put_io_context(ioc); 1875 put_io_context(ioc);
1876 return ret; 1876 return ret;
1877 } 1877 }
1878 1878
1879 return ELV_MQUEUE_MAY; 1879 return ELV_MQUEUE_MAY;
1880 #endif 1880 #endif
1881 } 1881 }
1882 1882
1883 static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) 1883 static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
1884 { 1884 {
1885 struct cfq_data *cfqd = q->elevator->elevator_data; 1885 struct cfq_data *cfqd = q->elevator->elevator_data;
1886 struct task_struct *tsk = current; 1886 struct task_struct *tsk = current;
1887 struct cfq_queue *cfqq; 1887 struct cfq_queue *cfqq;
1888 1888
1889 /* 1889 /*
1890 * don't force setup of a queue from here, as a call to may_queue 1890 * don't force setup of a queue from here, as a call to may_queue
1891 * does not necessarily imply that a request actually will be queued. 1891 * does not necessarily imply that a request actually will be queued.
1892 * so just lookup a possibly existing queue, or return 'may queue' 1892 * so just lookup a possibly existing queue, or return 'may queue'
1893 * if that fails 1893 * if that fails
1894 */ 1894 */
1895 cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio); 1895 cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio);
1896 if (cfqq) { 1896 if (cfqq) {
1897 cfq_init_prio_data(cfqq); 1897 cfq_init_prio_data(cfqq);
1898 cfq_prio_boost(cfqq); 1898 cfq_prio_boost(cfqq);
1899 1899
1900 return __cfq_may_queue(cfqd, cfqq, tsk, rw); 1900 return __cfq_may_queue(cfqd, cfqq, tsk, rw);
1901 } 1901 }
1902 1902
1903 return ELV_MQUEUE_MAY; 1903 return ELV_MQUEUE_MAY;
1904 } 1904 }
1905 1905
1906 static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq) 1906 static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
1907 { 1907 {
1908 struct cfq_data *cfqd = q->elevator->elevator_data; 1908 struct cfq_data *cfqd = q->elevator->elevator_data;
1909 struct request_list *rl = &q->rq; 1909 struct request_list *rl = &q->rq;
1910 1910
1911 if (cfqq->allocated[READ] <= cfqd->max_queued || cfqd->rq_starved) { 1911 if (cfqq->allocated[READ] <= cfqd->max_queued || cfqd->rq_starved) {
1912 smp_mb(); 1912 smp_mb();
1913 if (waitqueue_active(&rl->wait[READ])) 1913 if (waitqueue_active(&rl->wait[READ]))
1914 wake_up(&rl->wait[READ]); 1914 wake_up(&rl->wait[READ]);
1915 } 1915 }
1916 1916
1917 if (cfqq->allocated[WRITE] <= cfqd->max_queued || cfqd->rq_starved) { 1917 if (cfqq->allocated[WRITE] <= cfqd->max_queued || cfqd->rq_starved) {
1918 smp_mb(); 1918 smp_mb();
1919 if (waitqueue_active(&rl->wait[WRITE])) 1919 if (waitqueue_active(&rl->wait[WRITE]))
1920 wake_up(&rl->wait[WRITE]); 1920 wake_up(&rl->wait[WRITE]);
1921 } 1921 }
1922 } 1922 }
1923 1923
1924 /* 1924 /*
1925 * queue lock held here 1925 * queue lock held here
1926 */ 1926 */
1927 static void cfq_put_request(request_queue_t *q, struct request *rq) 1927 static void cfq_put_request(request_queue_t *q, struct request *rq)
1928 { 1928 {
1929 struct cfq_data *cfqd = q->elevator->elevator_data; 1929 struct cfq_data *cfqd = q->elevator->elevator_data;
1930 struct cfq_rq *crq = RQ_DATA(rq); 1930 struct cfq_rq *crq = RQ_DATA(rq);
1931 1931
1932 if (crq) { 1932 if (crq) {
1933 struct cfq_queue *cfqq = crq->cfq_queue; 1933 struct cfq_queue *cfqq = crq->cfq_queue;
1934 const int rw = rq_data_dir(rq); 1934 const int rw = rq_data_dir(rq);
1935 1935
1936 BUG_ON(!cfqq->allocated[rw]); 1936 BUG_ON(!cfqq->allocated[rw]);
1937 cfqq->allocated[rw]--; 1937 cfqq->allocated[rw]--;
1938 1938
1939 put_io_context(crq->io_context->ioc); 1939 put_io_context(crq->io_context->ioc);
1940 1940
1941 mempool_free(crq, cfqd->crq_pool); 1941 mempool_free(crq, cfqd->crq_pool);
1942 rq->elevator_private = NULL; 1942 rq->elevator_private = NULL;
1943 1943
1944 cfq_check_waiters(q, cfqq); 1944 cfq_check_waiters(q, cfqq);
1945 cfq_put_queue(cfqq); 1945 cfq_put_queue(cfqq);
1946 } 1946 }
1947 } 1947 }
1948 1948
1949 /* 1949 /*
1950 * Allocate cfq data structures associated with this request. 1950 * Allocate cfq data structures associated with this request.
1951 */ 1951 */
1952 static int 1952 static int
1953 cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 1953 cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
1954 gfp_t gfp_mask) 1954 gfp_t gfp_mask)
1955 { 1955 {
1956 struct cfq_data *cfqd = q->elevator->elevator_data; 1956 struct cfq_data *cfqd = q->elevator->elevator_data;
1957 struct task_struct *tsk = current; 1957 struct task_struct *tsk = current;
1958 struct cfq_io_context *cic; 1958 struct cfq_io_context *cic;
1959 const int rw = rq_data_dir(rq); 1959 const int rw = rq_data_dir(rq);
1960 pid_t key = cfq_queue_pid(tsk, rw); 1960 pid_t key = cfq_queue_pid(tsk, rw);
1961 struct cfq_queue *cfqq; 1961 struct cfq_queue *cfqq;
1962 struct cfq_rq *crq; 1962 struct cfq_rq *crq;
1963 unsigned long flags; 1963 unsigned long flags;
1964 int is_sync = key != CFQ_KEY_ASYNC; 1964 int is_sync = key != CFQ_KEY_ASYNC;
1965 1965
1966 might_sleep_if(gfp_mask & __GFP_WAIT); 1966 might_sleep_if(gfp_mask & __GFP_WAIT);
1967 1967
1968 cic = cfq_get_io_context(cfqd, key, gfp_mask); 1968 cic = cfq_get_io_context(cfqd, key, gfp_mask);
1969 1969
1970 spin_lock_irqsave(q->queue_lock, flags); 1970 spin_lock_irqsave(q->queue_lock, flags);
1971 1971
1972 if (!cic) 1972 if (!cic)
1973 goto queue_fail; 1973 goto queue_fail;
1974 1974
1975 if (!cic->cfqq[is_sync]) { 1975 if (!cic->cfqq[is_sync]) {
1976 cfqq = cfq_get_queue(cfqd, key, tsk, gfp_mask); 1976 cfqq = cfq_get_queue(cfqd, key, tsk, gfp_mask);
1977 if (!cfqq) 1977 if (!cfqq)
1978 goto queue_fail; 1978 goto queue_fail;
1979 1979
1980 cic->cfqq[is_sync] = cfqq; 1980 cic->cfqq[is_sync] = cfqq;
1981 } else 1981 } else
1982 cfqq = cic->cfqq[is_sync]; 1982 cfqq = cic->cfqq[is_sync];
1983 1983
1984 cfqq->allocated[rw]++; 1984 cfqq->allocated[rw]++;
1985 cfq_clear_cfqq_must_alloc(cfqq); 1985 cfq_clear_cfqq_must_alloc(cfqq);
1986 cfqd->rq_starved = 0; 1986 cfqd->rq_starved = 0;
1987 atomic_inc(&cfqq->ref); 1987 atomic_inc(&cfqq->ref);
1988 spin_unlock_irqrestore(q->queue_lock, flags); 1988 spin_unlock_irqrestore(q->queue_lock, flags);
1989 1989
1990 crq = mempool_alloc(cfqd->crq_pool, gfp_mask); 1990 crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
1991 if (crq) { 1991 if (crq) {
1992 RB_CLEAR(&crq->rb_node); 1992 RB_CLEAR(&crq->rb_node);
1993 crq->rb_key = 0; 1993 crq->rb_key = 0;
1994 crq->request = rq; 1994 crq->request = rq;
1995 INIT_HLIST_NODE(&crq->hash); 1995 INIT_HLIST_NODE(&crq->hash);
1996 crq->cfq_queue = cfqq; 1996 crq->cfq_queue = cfqq;
1997 crq->io_context = cic; 1997 crq->io_context = cic;
1998 1998
1999 if (is_sync) 1999 if (is_sync)
2000 cfq_mark_crq_is_sync(crq); 2000 cfq_mark_crq_is_sync(crq);
2001 else 2001 else
2002 cfq_clear_crq_is_sync(crq); 2002 cfq_clear_crq_is_sync(crq);
2003 2003
2004 rq->elevator_private = crq; 2004 rq->elevator_private = crq;
2005 return 0; 2005 return 0;
2006 } 2006 }
2007 2007
2008 spin_lock_irqsave(q->queue_lock, flags); 2008 spin_lock_irqsave(q->queue_lock, flags);
2009 cfqq->allocated[rw]--; 2009 cfqq->allocated[rw]--;
2010 if (!(cfqq->allocated[0] + cfqq->allocated[1])) 2010 if (!(cfqq->allocated[0] + cfqq->allocated[1]))
2011 cfq_mark_cfqq_must_alloc(cfqq); 2011 cfq_mark_cfqq_must_alloc(cfqq);
2012 cfq_put_queue(cfqq); 2012 cfq_put_queue(cfqq);
2013 queue_fail: 2013 queue_fail:
2014 if (cic) 2014 if (cic)
2015 put_io_context(cic->ioc); 2015 put_io_context(cic->ioc);
2016 /* 2016 /*
2017 * mark us rq allocation starved. we need to kickstart the process 2017 * mark us rq allocation starved. we need to kickstart the process
2018 * ourselves if there are no pending requests that can do it for us. 2018 * ourselves if there are no pending requests that can do it for us.
2019 * that would be an extremely rare OOM situation 2019 * that would be an extremely rare OOM situation
2020 */ 2020 */
2021 cfqd->rq_starved = 1; 2021 cfqd->rq_starved = 1;
2022 cfq_schedule_dispatch(cfqd); 2022 cfq_schedule_dispatch(cfqd);
2023 spin_unlock_irqrestore(q->queue_lock, flags); 2023 spin_unlock_irqrestore(q->queue_lock, flags);
2024 return 1; 2024 return 1;
2025 } 2025 }
2026 2026
2027 static void cfq_kick_queue(void *data) 2027 static void cfq_kick_queue(void *data)
2028 { 2028 {
2029 request_queue_t *q = data; 2029 request_queue_t *q = data;
2030 struct cfq_data *cfqd = q->elevator->elevator_data; 2030 struct cfq_data *cfqd = q->elevator->elevator_data;
2031 unsigned long flags; 2031 unsigned long flags;
2032 2032
2033 spin_lock_irqsave(q->queue_lock, flags); 2033 spin_lock_irqsave(q->queue_lock, flags);
2034 2034
2035 if (cfqd->rq_starved) { 2035 if (cfqd->rq_starved) {
2036 struct request_list *rl = &q->rq; 2036 struct request_list *rl = &q->rq;
2037 2037
2038 /* 2038 /*
2039 * we aren't guaranteed to get a request after this, but we 2039 * we aren't guaranteed to get a request after this, but we
2040 * have to be opportunistic 2040 * have to be opportunistic
2041 */ 2041 */
2042 smp_mb(); 2042 smp_mb();
2043 if (waitqueue_active(&rl->wait[READ])) 2043 if (waitqueue_active(&rl->wait[READ]))
2044 wake_up(&rl->wait[READ]); 2044 wake_up(&rl->wait[READ]);
2045 if (waitqueue_active(&rl->wait[WRITE])) 2045 if (waitqueue_active(&rl->wait[WRITE]))
2046 wake_up(&rl->wait[WRITE]); 2046 wake_up(&rl->wait[WRITE]);
2047 } 2047 }
2048 2048
2049 blk_remove_plug(q); 2049 blk_remove_plug(q);
2050 q->request_fn(q); 2050 q->request_fn(q);
2051 spin_unlock_irqrestore(q->queue_lock, flags); 2051 spin_unlock_irqrestore(q->queue_lock, flags);
2052 } 2052 }
2053 2053
2054 /* 2054 /*
2055 * Timer running if the active_queue is currently idling inside its time slice 2055 * Timer running if the active_queue is currently idling inside its time slice
2056 */ 2056 */
2057 static void cfq_idle_slice_timer(unsigned long data) 2057 static void cfq_idle_slice_timer(unsigned long data)
2058 { 2058 {
2059 struct cfq_data *cfqd = (struct cfq_data *) data; 2059 struct cfq_data *cfqd = (struct cfq_data *) data;
2060 struct cfq_queue *cfqq; 2060 struct cfq_queue *cfqq;
2061 unsigned long flags; 2061 unsigned long flags;
2062 2062
2063 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2063 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2064 2064
2065 if ((cfqq = cfqd->active_queue) != NULL) { 2065 if ((cfqq = cfqd->active_queue) != NULL) {
2066 unsigned long now = jiffies; 2066 unsigned long now = jiffies;
2067 2067
2068 /* 2068 /*
2069 * expired 2069 * expired
2070 */ 2070 */
2071 if (time_after(now, cfqq->slice_end)) 2071 if (time_after(now, cfqq->slice_end))
2072 goto expire; 2072 goto expire;
2073 2073
2074 /* 2074 /*
2075 * only expire and reinvoke request handler, if there are 2075 * only expire and reinvoke request handler, if there are
2076 * other queues with pending requests 2076 * other queues with pending requests
2077 */ 2077 */
2078 if (!cfqd->busy_queues) { 2078 if (!cfqd->busy_queues) {
2079 cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end); 2079 cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
2080 add_timer(&cfqd->idle_slice_timer); 2080 add_timer(&cfqd->idle_slice_timer);
2081 goto out_cont; 2081 goto out_cont;
2082 } 2082 }
2083 2083
2084 /* 2084 /*
2085 * not expired and it has a request pending, let it dispatch 2085 * not expired and it has a request pending, let it dispatch
2086 */ 2086 */
2087 if (!RB_EMPTY(&cfqq->sort_list)) { 2087 if (!RB_EMPTY(&cfqq->sort_list)) {
2088 cfq_mark_cfqq_must_dispatch(cfqq); 2088 cfq_mark_cfqq_must_dispatch(cfqq);
2089 goto out_kick; 2089 goto out_kick;
2090 } 2090 }
2091 } 2091 }
2092 expire: 2092 expire:
2093 cfq_slice_expired(cfqd, 0); 2093 cfq_slice_expired(cfqd, 0);
2094 out_kick: 2094 out_kick:
2095 cfq_schedule_dispatch(cfqd); 2095 cfq_schedule_dispatch(cfqd);
2096 out_cont: 2096 out_cont:
2097 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2097 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2098 } 2098 }
2099 2099
2100 /* 2100 /*
2101 * Timer running if an idle class queue is waiting for service 2101 * Timer running if an idle class queue is waiting for service
2102 */ 2102 */
2103 static void cfq_idle_class_timer(unsigned long data) 2103 static void cfq_idle_class_timer(unsigned long data)
2104 { 2104 {
2105 struct cfq_data *cfqd = (struct cfq_data *) data; 2105 struct cfq_data *cfqd = (struct cfq_data *) data;
2106 unsigned long flags, end; 2106 unsigned long flags, end;
2107 2107
2108 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2108 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2109 2109
2110 /* 2110 /*
2111 * race with a non-idle queue, reset timer 2111 * race with a non-idle queue, reset timer
2112 */ 2112 */
2113 end = cfqd->last_end_request + CFQ_IDLE_GRACE; 2113 end = cfqd->last_end_request + CFQ_IDLE_GRACE;
2114 if (!time_after_eq(jiffies, end)) { 2114 if (!time_after_eq(jiffies, end)) {
2115 cfqd->idle_class_timer.expires = end; 2115 cfqd->idle_class_timer.expires = end;
2116 add_timer(&cfqd->idle_class_timer); 2116 add_timer(&cfqd->idle_class_timer);
2117 } else 2117 } else
2118 cfq_schedule_dispatch(cfqd); 2118 cfq_schedule_dispatch(cfqd);
2119 2119
2120 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2120 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2121 } 2121 }
2122 2122
2123 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 2123 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
2124 { 2124 {
2125 del_timer_sync(&cfqd->idle_slice_timer); 2125 del_timer_sync(&cfqd->idle_slice_timer);
2126 del_timer_sync(&cfqd->idle_class_timer); 2126 del_timer_sync(&cfqd->idle_class_timer);
2127 blk_sync_queue(cfqd->queue); 2127 blk_sync_queue(cfqd->queue);
2128 } 2128 }
2129 2129
2130 static void cfq_exit_queue(elevator_t *e) 2130 static void cfq_exit_queue(elevator_t *e)
2131 { 2131 {
2132 struct cfq_data *cfqd = e->elevator_data; 2132 struct cfq_data *cfqd = e->elevator_data;
2133 request_queue_t *q = cfqd->queue; 2133 request_queue_t *q = cfqd->queue;
2134 2134
2135 cfq_shutdown_timer_wq(cfqd); 2135 cfq_shutdown_timer_wq(cfqd);
2136 write_lock(&cfq_exit_lock); 2136 write_lock(&cfq_exit_lock);
2137 spin_lock_irq(q->queue_lock); 2137 spin_lock_irq(q->queue_lock);
2138 if (cfqd->active_queue) 2138 if (cfqd->active_queue)
2139 __cfq_slice_expired(cfqd, cfqd->active_queue, 0); 2139 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
2140 while(!list_empty(&cfqd->cic_list)) { 2140 while(!list_empty(&cfqd->cic_list)) {
2141 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 2141 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
2142 struct cfq_io_context, 2142 struct cfq_io_context,
2143 queue_list); 2143 queue_list);
2144 if (cic->cfqq[ASYNC]) { 2144 if (cic->cfqq[ASYNC]) {
2145 cfq_put_queue(cic->cfqq[ASYNC]); 2145 cfq_put_queue(cic->cfqq[ASYNC]);
2146 cic->cfqq[ASYNC] = NULL; 2146 cic->cfqq[ASYNC] = NULL;
2147 } 2147 }
2148 if (cic->cfqq[SYNC]) { 2148 if (cic->cfqq[SYNC]) {
2149 cfq_put_queue(cic->cfqq[SYNC]); 2149 cfq_put_queue(cic->cfqq[SYNC]);
2150 cic->cfqq[SYNC] = NULL; 2150 cic->cfqq[SYNC] = NULL;
2151 } 2151 }
2152 cic->key = NULL; 2152 cic->key = NULL;
2153 list_del_init(&cic->queue_list); 2153 list_del_init(&cic->queue_list);
2154 } 2154 }
2155 spin_unlock_irq(q->queue_lock); 2155 spin_unlock_irq(q->queue_lock);
2156 write_unlock(&cfq_exit_lock); 2156 write_unlock(&cfq_exit_lock);
2157 2157
2158 cfq_shutdown_timer_wq(cfqd); 2158 cfq_shutdown_timer_wq(cfqd);
2159 2159
2160 mempool_destroy(cfqd->crq_pool); 2160 mempool_destroy(cfqd->crq_pool);
2161 kfree(cfqd->crq_hash); 2161 kfree(cfqd->crq_hash);
2162 kfree(cfqd->cfq_hash); 2162 kfree(cfqd->cfq_hash);
2163 kfree(cfqd); 2163 kfree(cfqd);
2164 } 2164 }
2165 2165
2166 static int cfq_init_queue(request_queue_t *q, elevator_t *e) 2166 static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2167 { 2167 {
2168 struct cfq_data *cfqd; 2168 struct cfq_data *cfqd;
2169 int i; 2169 int i;
2170 2170
2171 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 2171 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
2172 if (!cfqd) 2172 if (!cfqd)
2173 return -ENOMEM; 2173 return -ENOMEM;
2174 2174
2175 memset(cfqd, 0, sizeof(*cfqd)); 2175 memset(cfqd, 0, sizeof(*cfqd));
2176 2176
2177 for (i = 0; i < CFQ_PRIO_LISTS; i++) 2177 for (i = 0; i < CFQ_PRIO_LISTS; i++)
2178 INIT_LIST_HEAD(&cfqd->rr_list[i]); 2178 INIT_LIST_HEAD(&cfqd->rr_list[i]);
2179 2179
2180 INIT_LIST_HEAD(&cfqd->busy_rr); 2180 INIT_LIST_HEAD(&cfqd->busy_rr);
2181 INIT_LIST_HEAD(&cfqd->cur_rr); 2181 INIT_LIST_HEAD(&cfqd->cur_rr);
2182 INIT_LIST_HEAD(&cfqd->idle_rr); 2182 INIT_LIST_HEAD(&cfqd->idle_rr);
2183 INIT_LIST_HEAD(&cfqd->empty_list); 2183 INIT_LIST_HEAD(&cfqd->empty_list);
2184 INIT_LIST_HEAD(&cfqd->cic_list); 2184 INIT_LIST_HEAD(&cfqd->cic_list);
2185 2185
2186 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); 2186 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
2187 if (!cfqd->crq_hash) 2187 if (!cfqd->crq_hash)
2188 goto out_crqhash; 2188 goto out_crqhash;
2189 2189
2190 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); 2190 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
2191 if (!cfqd->cfq_hash) 2191 if (!cfqd->cfq_hash)
2192 goto out_cfqhash; 2192 goto out_cfqhash;
2193 2193
2194 cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool); 2194 cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
2195 if (!cfqd->crq_pool) 2195 if (!cfqd->crq_pool)
2196 goto out_crqpool; 2196 goto out_crqpool;
2197 2197
2198 for (i = 0; i < CFQ_MHASH_ENTRIES; i++) 2198 for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
2199 INIT_HLIST_HEAD(&cfqd->crq_hash[i]); 2199 INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
2200 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 2200 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2201 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 2201 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2202 2202
2203 e->elevator_data = cfqd; 2203 e->elevator_data = cfqd;
2204 2204
2205 cfqd->queue = q; 2205 cfqd->queue = q;
2206 2206
2207 cfqd->max_queued = q->nr_requests / 4; 2207 cfqd->max_queued = q->nr_requests / 4;
2208 q->nr_batching = cfq_queued; 2208 q->nr_batching = cfq_queued;
2209 2209
2210 init_timer(&cfqd->idle_slice_timer); 2210 init_timer(&cfqd->idle_slice_timer);
2211 cfqd->idle_slice_timer.function = cfq_idle_slice_timer; 2211 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
2212 cfqd->idle_slice_timer.data = (unsigned long) cfqd; 2212 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
2213 2213
2214 init_timer(&cfqd->idle_class_timer); 2214 init_timer(&cfqd->idle_class_timer);
2215 cfqd->idle_class_timer.function = cfq_idle_class_timer; 2215 cfqd->idle_class_timer.function = cfq_idle_class_timer;
2216 cfqd->idle_class_timer.data = (unsigned long) cfqd; 2216 cfqd->idle_class_timer.data = (unsigned long) cfqd;
2217 2217
2218 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); 2218 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
2219 2219
2220 cfqd->cfq_queued = cfq_queued; 2220 cfqd->cfq_queued = cfq_queued;
2221 cfqd->cfq_quantum = cfq_quantum; 2221 cfqd->cfq_quantum = cfq_quantum;
2222 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 2222 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
2223 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 2223 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
2224 cfqd->cfq_back_max = cfq_back_max; 2224 cfqd->cfq_back_max = cfq_back_max;
2225 cfqd->cfq_back_penalty = cfq_back_penalty; 2225 cfqd->cfq_back_penalty = cfq_back_penalty;
2226 cfqd->cfq_slice[0] = cfq_slice_async; 2226 cfqd->cfq_slice[0] = cfq_slice_async;
2227 cfqd->cfq_slice[1] = cfq_slice_sync; 2227 cfqd->cfq_slice[1] = cfq_slice_sync;
2228 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2228 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2229 cfqd->cfq_slice_idle = cfq_slice_idle; 2229 cfqd->cfq_slice_idle = cfq_slice_idle;
2230 cfqd->cfq_max_depth = cfq_max_depth; 2230 cfqd->cfq_max_depth = cfq_max_depth;
2231 2231
2232 return 0; 2232 return 0;
2233 out_crqpool: 2233 out_crqpool:
2234 kfree(cfqd->cfq_hash); 2234 kfree(cfqd->cfq_hash);
2235 out_cfqhash: 2235 out_cfqhash:
2236 kfree(cfqd->crq_hash); 2236 kfree(cfqd->crq_hash);
2237 out_crqhash: 2237 out_crqhash:
2238 kfree(cfqd); 2238 kfree(cfqd);
2239 return -ENOMEM; 2239 return -ENOMEM;
2240 } 2240 }
2241 2241
2242 static void cfq_slab_kill(void) 2242 static void cfq_slab_kill(void)
2243 { 2243 {
2244 if (crq_pool) 2244 if (crq_pool)
2245 kmem_cache_destroy(crq_pool); 2245 kmem_cache_destroy(crq_pool);
2246 if (cfq_pool) 2246 if (cfq_pool)
2247 kmem_cache_destroy(cfq_pool); 2247 kmem_cache_destroy(cfq_pool);
2248 if (cfq_ioc_pool) 2248 if (cfq_ioc_pool)
2249 kmem_cache_destroy(cfq_ioc_pool); 2249 kmem_cache_destroy(cfq_ioc_pool);
2250 } 2250 }
2251 2251
2252 static int __init cfq_slab_setup(void) 2252 static int __init cfq_slab_setup(void)
2253 { 2253 {
2254 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, 2254 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
2255 NULL, NULL); 2255 NULL, NULL);
2256 if (!crq_pool) 2256 if (!crq_pool)
2257 goto fail; 2257 goto fail;
2258 2258
2259 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, 2259 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
2260 NULL, NULL); 2260 NULL, NULL);
2261 if (!cfq_pool) 2261 if (!cfq_pool)
2262 goto fail; 2262 goto fail;
2263 2263
2264 cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool", 2264 cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool",
2265 sizeof(struct cfq_io_context), 0, 0, NULL, NULL); 2265 sizeof(struct cfq_io_context), 0, 0, NULL, NULL);
2266 if (!cfq_ioc_pool) 2266 if (!cfq_ioc_pool)
2267 goto fail; 2267 goto fail;
2268 2268
2269 return 0; 2269 return 0;
2270 fail: 2270 fail:
2271 cfq_slab_kill(); 2271 cfq_slab_kill();
2272 return -ENOMEM; 2272 return -ENOMEM;
2273 } 2273 }
2274 2274
2275 /* 2275 /*
2276 * sysfs parts below --> 2276 * sysfs parts below -->
2277 */ 2277 */
2278 2278
2279 static ssize_t 2279 static ssize_t
2280 cfq_var_show(unsigned int var, char *page) 2280 cfq_var_show(unsigned int var, char *page)
2281 { 2281 {
2282 return sprintf(page, "%d\n", var); 2282 return sprintf(page, "%d\n", var);
2283 } 2283 }
2284 2284
2285 static ssize_t 2285 static ssize_t
2286 cfq_var_store(unsigned int *var, const char *page, size_t count) 2286 cfq_var_store(unsigned int *var, const char *page, size_t count)
2287 { 2287 {
2288 char *p = (char *) page; 2288 char *p = (char *) page;
2289 2289
2290 *var = simple_strtoul(p, &p, 10); 2290 *var = simple_strtoul(p, &p, 10);
2291 return count; 2291 return count;
2292 } 2292 }
2293 2293
2294 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 2294 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
2295 static ssize_t __FUNC(elevator_t *e, char *page) \ 2295 static ssize_t __FUNC(elevator_t *e, char *page) \
2296 { \ 2296 { \
2297 struct cfq_data *cfqd = e->elevator_data; \ 2297 struct cfq_data *cfqd = e->elevator_data; \
2298 unsigned int __data = __VAR; \ 2298 unsigned int __data = __VAR; \
2299 if (__CONV) \ 2299 if (__CONV) \
2300 __data = jiffies_to_msecs(__data); \ 2300 __data = jiffies_to_msecs(__data); \
2301 return cfq_var_show(__data, (page)); \ 2301 return cfq_var_show(__data, (page)); \
2302 } 2302 }
2303 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 2303 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
2304 SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); 2304 SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
2305 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 2305 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
2306 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 2306 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
2307 SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0); 2307 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
2308 SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0); 2308 SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
2309 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); 2309 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
2310 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 2310 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
2311 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 2311 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
2312 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 2312 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
2313 SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0); 2313 SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
2314 #undef SHOW_FUNCTION 2314 #undef SHOW_FUNCTION
2315 2315
2316 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 2316 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
2317 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 2317 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
2318 { \ 2318 { \
2319 struct cfq_data *cfqd = e->elevator_data; \ 2319 struct cfq_data *cfqd = e->elevator_data; \
2320 unsigned int __data; \ 2320 unsigned int __data; \
2321 int ret = cfq_var_store(&__data, (page), count); \ 2321 int ret = cfq_var_store(&__data, (page), count); \
2322 if (__data < (MIN)) \ 2322 if (__data < (MIN)) \
2323 __data = (MIN); \ 2323 __data = (MIN); \
2324 else if (__data > (MAX)) \ 2324 else if (__data > (MAX)) \
2325 __data = (MAX); \ 2325 __data = (MAX); \
2326 if (__CONV) \ 2326 if (__CONV) \
2327 *(__PTR) = msecs_to_jiffies(__data); \ 2327 *(__PTR) = msecs_to_jiffies(__data); \
2328 else \ 2328 else \
2329 *(__PTR) = __data; \ 2329 *(__PTR) = __data; \
2330 return ret; \ 2330 return ret; \
2331 } 2331 }
2332 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 2332 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
2333 STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); 2333 STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
2334 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); 2334 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
2335 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); 2335 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
2336 STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 2336 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
2337 STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); 2337 STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
2338 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); 2338 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
2339 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 2339 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
2340 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 2340 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
2341 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); 2341 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
2342 STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0); 2342 STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0);
2343 #undef STORE_FUNCTION 2343 #undef STORE_FUNCTION
2344 2344
2345 static struct elv_fs_entry cfq_quantum_entry = { 2345 #define CFQ_ATTR(name) \
2346 .attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR }, 2346 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
2347 .show = cfq_quantum_show,
2348 .store = cfq_quantum_store,
2349 };
2350 static struct elv_fs_entry cfq_queued_entry = {
2351 .attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR },
2352 .show = cfq_queued_show,
2353 .store = cfq_queued_store,
2354 };
2355 static struct elv_fs_entry cfq_fifo_expire_sync_entry = {
2356 .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR },
2357 .show = cfq_fifo_expire_sync_show,
2358 .store = cfq_fifo_expire_sync_store,
2359 };
2360 static struct elv_fs_entry cfq_fifo_expire_async_entry = {
2361 .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR },
2362 .show = cfq_fifo_expire_async_show,
2363 .store = cfq_fifo_expire_async_store,
2364 };
2365 static struct elv_fs_entry cfq_back_max_entry = {
2366 .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR },
2367 .show = cfq_back_max_show,
2368 .store = cfq_back_max_store,
2369 };
2370 static struct elv_fs_entry cfq_back_penalty_entry = {
2371 .attr = {.name = "back_seek_penalty", .mode = S_IRUGO | S_IWUSR },
2372 .show = cfq_back_penalty_show,
2373 .store = cfq_back_penalty_store,
2374 };
2375 static struct elv_fs_entry cfq_slice_sync_entry = {
2376 .attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR },
2377 .show = cfq_slice_sync_show,
2378 .store = cfq_slice_sync_store,
2379 };
2380 static struct elv_fs_entry cfq_slice_async_entry = {
2381 .attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR },
2382 .show = cfq_slice_async_show,
2383 .store = cfq_slice_async_store,
2384 };
2385 static struct elv_fs_entry cfq_slice_async_rq_entry = {
2386 .attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR },
2387 .show = cfq_slice_async_rq_show,
2388 .store = cfq_slice_async_rq_store,
2389 };
2390 static struct elv_fs_entry cfq_slice_idle_entry = {
2391 .attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR },
2392 .show = cfq_slice_idle_show,
2393 .store = cfq_slice_idle_store,
2394 };
2395 static struct elv_fs_entry cfq_max_depth_entry = {
2396 .attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR },
2397 .show = cfq_max_depth_show,
2398 .store = cfq_max_depth_store,
2399 };
2400 2347
2401 static struct attribute *cfq_attrs[] = { 2348 static struct elv_fs_entry cfq_attrs[] = {
2402 &cfq_quantum_entry.attr, 2349 CFQ_ATTR(quantum),
2403 &cfq_queued_entry.attr, 2350 CFQ_ATTR(queued),
2404 &cfq_fifo_expire_sync_entry.attr, 2351 CFQ_ATTR(fifo_expire_sync),
2405 &cfq_fifo_expire_async_entry.attr, 2352 CFQ_ATTR(fifo_expire_async),
2406 &cfq_back_max_entry.attr, 2353 CFQ_ATTR(back_seek_max),
2407 &cfq_back_penalty_entry.attr, 2354 CFQ_ATTR(back_seek_penalty),
2408 &cfq_slice_sync_entry.attr, 2355 CFQ_ATTR(slice_sync),
2409 &cfq_slice_async_entry.attr, 2356 CFQ_ATTR(slice_async),
2410 &cfq_slice_async_rq_entry.attr, 2357 CFQ_ATTR(slice_async_rq),
2411 &cfq_slice_idle_entry.attr, 2358 CFQ_ATTR(slice_idle),
2412 &cfq_max_depth_entry.attr, 2359 CFQ_ATTR(max_depth),
2413 NULL, 2360 __ATTR_NULL
2414 }; 2361 };
2415 2362
2416 static struct elevator_type iosched_cfq = { 2363 static struct elevator_type iosched_cfq = {
2417 .ops = { 2364 .ops = {
2418 .elevator_merge_fn = cfq_merge, 2365 .elevator_merge_fn = cfq_merge,
2419 .elevator_merged_fn = cfq_merged_request, 2366 .elevator_merged_fn = cfq_merged_request,
2420 .elevator_merge_req_fn = cfq_merged_requests, 2367 .elevator_merge_req_fn = cfq_merged_requests,
2421 .elevator_dispatch_fn = cfq_dispatch_requests, 2368 .elevator_dispatch_fn = cfq_dispatch_requests,
2422 .elevator_add_req_fn = cfq_insert_request, 2369 .elevator_add_req_fn = cfq_insert_request,
2423 .elevator_activate_req_fn = cfq_activate_request, 2370 .elevator_activate_req_fn = cfq_activate_request,
2424 .elevator_deactivate_req_fn = cfq_deactivate_request, 2371 .elevator_deactivate_req_fn = cfq_deactivate_request,
2425 .elevator_queue_empty_fn = cfq_queue_empty, 2372 .elevator_queue_empty_fn = cfq_queue_empty,
2426 .elevator_completed_req_fn = cfq_completed_request, 2373 .elevator_completed_req_fn = cfq_completed_request,
2427 .elevator_former_req_fn = cfq_former_request, 2374 .elevator_former_req_fn = cfq_former_request,
2428 .elevator_latter_req_fn = cfq_latter_request, 2375 .elevator_latter_req_fn = cfq_latter_request,
2429 .elevator_set_req_fn = cfq_set_request, 2376 .elevator_set_req_fn = cfq_set_request,
2430 .elevator_put_req_fn = cfq_put_request, 2377 .elevator_put_req_fn = cfq_put_request,
2431 .elevator_may_queue_fn = cfq_may_queue, 2378 .elevator_may_queue_fn = cfq_may_queue,
2432 .elevator_init_fn = cfq_init_queue, 2379 .elevator_init_fn = cfq_init_queue,
2433 .elevator_exit_fn = cfq_exit_queue, 2380 .elevator_exit_fn = cfq_exit_queue,
2434 .trim = cfq_trim, 2381 .trim = cfq_trim,
2435 }, 2382 },
2436 .elevator_attrs = cfq_attrs, 2383 .elevator_attrs = cfq_attrs,
2437 .elevator_name = "cfq", 2384 .elevator_name = "cfq",
2438 .elevator_owner = THIS_MODULE, 2385 .elevator_owner = THIS_MODULE,
2439 }; 2386 };
2440 2387
2441 static int __init cfq_init(void) 2388 static int __init cfq_init(void)
2442 { 2389 {
2443 int ret; 2390 int ret;
2444 2391
2445 /* 2392 /*
2446 * could be 0 on HZ < 1000 setups 2393 * could be 0 on HZ < 1000 setups
2447 */ 2394 */
2448 if (!cfq_slice_async) 2395 if (!cfq_slice_async)
2449 cfq_slice_async = 1; 2396 cfq_slice_async = 1;
2450 if (!cfq_slice_idle) 2397 if (!cfq_slice_idle)
2451 cfq_slice_idle = 1; 2398 cfq_slice_idle = 1;
2452 2399
2453 if (cfq_slab_setup()) 2400 if (cfq_slab_setup())
2454 return -ENOMEM; 2401 return -ENOMEM;
2455 2402
2456 ret = elv_register(&iosched_cfq); 2403 ret = elv_register(&iosched_cfq);
2457 if (ret) 2404 if (ret)
2458 cfq_slab_kill(); 2405 cfq_slab_kill();
2459 2406
2460 return ret; 2407 return ret;
2461 } 2408 }
2462 2409
2463 static void __exit cfq_exit(void) 2410 static void __exit cfq_exit(void)
2464 { 2411 {
2465 DECLARE_COMPLETION(all_gone); 2412 DECLARE_COMPLETION(all_gone);
2466 elv_unregister(&iosched_cfq); 2413 elv_unregister(&iosched_cfq);
2467 ioc_gone = &all_gone; 2414 ioc_gone = &all_gone;
2468 barrier(); 2415 barrier();
2469 if (atomic_read(&ioc_count)) 2416 if (atomic_read(&ioc_count))
2470 complete(ioc_gone); 2417 complete(ioc_gone);
2471 synchronize_rcu(); 2418 synchronize_rcu();
2472 cfq_slab_kill(); 2419 cfq_slab_kill();
2473 } 2420 }
2474 2421
2475 module_init(cfq_init); 2422 module_init(cfq_init);
2476 module_exit(cfq_exit); 2423 module_exit(cfq_exit);
2477 2424
2478 MODULE_AUTHOR("Jens Axboe"); 2425 MODULE_AUTHOR("Jens Axboe");
2479 MODULE_LICENSE("GPL"); 2426 MODULE_LICENSE("GPL");
2480 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); 2427 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");
2481 2428
block/deadline-iosched.c
1 /* 1 /*
2 * Deadline i/o scheduler. 2 * Deadline i/o scheduler.
3 * 3 *
4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
5 */ 5 */
6 #include <linux/kernel.h> 6 #include <linux/kernel.h>
7 #include <linux/fs.h> 7 #include <linux/fs.h>
8 #include <linux/blkdev.h> 8 #include <linux/blkdev.h>
9 #include <linux/elevator.h> 9 #include <linux/elevator.h>
10 #include <linux/bio.h> 10 #include <linux/bio.h>
11 #include <linux/config.h> 11 #include <linux/config.h>
12 #include <linux/module.h> 12 #include <linux/module.h>
13 #include <linux/slab.h> 13 #include <linux/slab.h>
14 #include <linux/init.h> 14 #include <linux/init.h>
15 #include <linux/compiler.h> 15 #include <linux/compiler.h>
16 #include <linux/hash.h> 16 #include <linux/hash.h>
17 #include <linux/rbtree.h> 17 #include <linux/rbtree.h>
18 18
19 /* 19 /*
20 * See Documentation/block/deadline-iosched.txt 20 * See Documentation/block/deadline-iosched.txt
21 */ 21 */
22 static const int read_expire = HZ / 2; /* max time before a read is submitted. */ 22 static const int read_expire = HZ / 2; /* max time before a read is submitted. */
23 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ 23 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
24 static const int writes_starved = 2; /* max times reads can starve a write */ 24 static const int writes_starved = 2; /* max times reads can starve a write */
25 static const int fifo_batch = 16; /* # of sequential requests treated as one 25 static const int fifo_batch = 16; /* # of sequential requests treated as one
26 by the above parameters. For throughput. */ 26 by the above parameters. For throughput. */
27 27
28 static const int deadline_hash_shift = 5; 28 static const int deadline_hash_shift = 5;
29 #define DL_HASH_BLOCK(sec) ((sec) >> 3) 29 #define DL_HASH_BLOCK(sec) ((sec) >> 3)
30 #define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift)) 30 #define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
31 #define DL_HASH_ENTRIES (1 << deadline_hash_shift) 31 #define DL_HASH_ENTRIES (1 << deadline_hash_shift)
32 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 32 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
33 #define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash) 33 #define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash)
34 #define ON_HASH(drq) (drq)->on_hash 34 #define ON_HASH(drq) (drq)->on_hash
35 35
36 struct deadline_data { 36 struct deadline_data {
37 /* 37 /*
38 * run time data 38 * run time data
39 */ 39 */
40 40
41 /* 41 /*
42 * requests (deadline_rq s) are present on both sort_list and fifo_list 42 * requests (deadline_rq s) are present on both sort_list and fifo_list
43 */ 43 */
44 struct rb_root sort_list[2]; 44 struct rb_root sort_list[2];
45 struct list_head fifo_list[2]; 45 struct list_head fifo_list[2];
46 46
47 /* 47 /*
48 * next in sort order. read, write or both are NULL 48 * next in sort order. read, write or both are NULL
49 */ 49 */
50 struct deadline_rq *next_drq[2]; 50 struct deadline_rq *next_drq[2];
51 struct list_head *hash; /* request hash */ 51 struct list_head *hash; /* request hash */
52 unsigned int batching; /* number of sequential requests made */ 52 unsigned int batching; /* number of sequential requests made */
53 sector_t last_sector; /* head position */ 53 sector_t last_sector; /* head position */
54 unsigned int starved; /* times reads have starved writes */ 54 unsigned int starved; /* times reads have starved writes */
55 55
56 /* 56 /*
57 * settings that change how the i/o scheduler behaves 57 * settings that change how the i/o scheduler behaves
58 */ 58 */
59 int fifo_expire[2]; 59 int fifo_expire[2];
60 int fifo_batch; 60 int fifo_batch;
61 int writes_starved; 61 int writes_starved;
62 int front_merges; 62 int front_merges;
63 63
64 mempool_t *drq_pool; 64 mempool_t *drq_pool;
65 }; 65 };
66 66
67 /* 67 /*
68 * pre-request data. 68 * pre-request data.
69 */ 69 */
70 struct deadline_rq { 70 struct deadline_rq {
71 /* 71 /*
72 * rbtree index, key is the starting offset 72 * rbtree index, key is the starting offset
73 */ 73 */
74 struct rb_node rb_node; 74 struct rb_node rb_node;
75 sector_t rb_key; 75 sector_t rb_key;
76 76
77 struct request *request; 77 struct request *request;
78 78
79 /* 79 /*
80 * request hash, key is the ending offset (for back merge lookup) 80 * request hash, key is the ending offset (for back merge lookup)
81 */ 81 */
82 struct list_head hash; 82 struct list_head hash;
83 char on_hash; 83 char on_hash;
84 84
85 /* 85 /*
86 * expire fifo 86 * expire fifo
87 */ 87 */
88 struct list_head fifo; 88 struct list_head fifo;
89 unsigned long expires; 89 unsigned long expires;
90 }; 90 };
91 91
92 static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq); 92 static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
93 93
94 static kmem_cache_t *drq_pool; 94 static kmem_cache_t *drq_pool;
95 95
96 #define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private) 96 #define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
97 97
98 /* 98 /*
99 * the back merge hash support functions 99 * the back merge hash support functions
100 */ 100 */
101 static inline void __deadline_del_drq_hash(struct deadline_rq *drq) 101 static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
102 { 102 {
103 drq->on_hash = 0; 103 drq->on_hash = 0;
104 list_del_init(&drq->hash); 104 list_del_init(&drq->hash);
105 } 105 }
106 106
107 static inline void deadline_del_drq_hash(struct deadline_rq *drq) 107 static inline void deadline_del_drq_hash(struct deadline_rq *drq)
108 { 108 {
109 if (ON_HASH(drq)) 109 if (ON_HASH(drq))
110 __deadline_del_drq_hash(drq); 110 __deadline_del_drq_hash(drq);
111 } 111 }
112 112
113 static inline void 113 static inline void
114 deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) 114 deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
115 { 115 {
116 struct request *rq = drq->request; 116 struct request *rq = drq->request;
117 117
118 BUG_ON(ON_HASH(drq)); 118 BUG_ON(ON_HASH(drq));
119 119
120 drq->on_hash = 1; 120 drq->on_hash = 1;
121 list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]); 121 list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
122 } 122 }
123 123
124 /* 124 /*
125 * move hot entry to front of chain 125 * move hot entry to front of chain
126 */ 126 */
127 static inline void 127 static inline void
128 deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) 128 deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
129 { 129 {
130 struct request *rq = drq->request; 130 struct request *rq = drq->request;
131 struct list_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))]; 131 struct list_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
132 132
133 if (ON_HASH(drq) && drq->hash.prev != head) { 133 if (ON_HASH(drq) && drq->hash.prev != head) {
134 list_del(&drq->hash); 134 list_del(&drq->hash);
135 list_add(&drq->hash, head); 135 list_add(&drq->hash, head);
136 } 136 }
137 } 137 }
138 138
139 static struct request * 139 static struct request *
140 deadline_find_drq_hash(struct deadline_data *dd, sector_t offset) 140 deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
141 { 141 {
142 struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)]; 142 struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
143 struct list_head *entry, *next = hash_list->next; 143 struct list_head *entry, *next = hash_list->next;
144 144
145 while ((entry = next) != hash_list) { 145 while ((entry = next) != hash_list) {
146 struct deadline_rq *drq = list_entry_hash(entry); 146 struct deadline_rq *drq = list_entry_hash(entry);
147 struct request *__rq = drq->request; 147 struct request *__rq = drq->request;
148 148
149 next = entry->next; 149 next = entry->next;
150 150
151 BUG_ON(!ON_HASH(drq)); 151 BUG_ON(!ON_HASH(drq));
152 152
153 if (!rq_mergeable(__rq)) { 153 if (!rq_mergeable(__rq)) {
154 __deadline_del_drq_hash(drq); 154 __deadline_del_drq_hash(drq);
155 continue; 155 continue;
156 } 156 }
157 157
158 if (rq_hash_key(__rq) == offset) 158 if (rq_hash_key(__rq) == offset)
159 return __rq; 159 return __rq;
160 } 160 }
161 161
162 return NULL; 162 return NULL;
163 } 163 }
164 164
165 /* 165 /*
166 * rb tree support functions 166 * rb tree support functions
167 */ 167 */
168 #define RB_NONE (2) 168 #define RB_NONE (2)
169 #define RB_EMPTY(root) ((root)->rb_node == NULL) 169 #define RB_EMPTY(root) ((root)->rb_node == NULL)
170 #define ON_RB(node) ((node)->rb_color != RB_NONE) 170 #define ON_RB(node) ((node)->rb_color != RB_NONE)
171 #define RB_CLEAR(node) ((node)->rb_color = RB_NONE) 171 #define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
172 #define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node) 172 #define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node)
173 #define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)]) 173 #define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)])
174 #define rq_rb_key(rq) (rq)->sector 174 #define rq_rb_key(rq) (rq)->sector
175 175
176 static struct deadline_rq * 176 static struct deadline_rq *
177 __deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 177 __deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
178 { 178 {
179 struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node; 179 struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
180 struct rb_node *parent = NULL; 180 struct rb_node *parent = NULL;
181 struct deadline_rq *__drq; 181 struct deadline_rq *__drq;
182 182
183 while (*p) { 183 while (*p) {
184 parent = *p; 184 parent = *p;
185 __drq = rb_entry_drq(parent); 185 __drq = rb_entry_drq(parent);
186 186
187 if (drq->rb_key < __drq->rb_key) 187 if (drq->rb_key < __drq->rb_key)
188 p = &(*p)->rb_left; 188 p = &(*p)->rb_left;
189 else if (drq->rb_key > __drq->rb_key) 189 else if (drq->rb_key > __drq->rb_key)
190 p = &(*p)->rb_right; 190 p = &(*p)->rb_right;
191 else 191 else
192 return __drq; 192 return __drq;
193 } 193 }
194 194
195 rb_link_node(&drq->rb_node, parent, p); 195 rb_link_node(&drq->rb_node, parent, p);
196 return NULL; 196 return NULL;
197 } 197 }
198 198
199 static void 199 static void
200 deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 200 deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
201 { 201 {
202 struct deadline_rq *__alias; 202 struct deadline_rq *__alias;
203 203
204 drq->rb_key = rq_rb_key(drq->request); 204 drq->rb_key = rq_rb_key(drq->request);
205 205
206 retry: 206 retry:
207 __alias = __deadline_add_drq_rb(dd, drq); 207 __alias = __deadline_add_drq_rb(dd, drq);
208 if (!__alias) { 208 if (!__alias) {
209 rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); 209 rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
210 return; 210 return;
211 } 211 }
212 212
213 deadline_move_request(dd, __alias); 213 deadline_move_request(dd, __alias);
214 goto retry; 214 goto retry;
215 } 215 }
216 216
217 static inline void 217 static inline void
218 deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 218 deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
219 { 219 {
220 const int data_dir = rq_data_dir(drq->request); 220 const int data_dir = rq_data_dir(drq->request);
221 221
222 if (dd->next_drq[data_dir] == drq) { 222 if (dd->next_drq[data_dir] == drq) {
223 struct rb_node *rbnext = rb_next(&drq->rb_node); 223 struct rb_node *rbnext = rb_next(&drq->rb_node);
224 224
225 dd->next_drq[data_dir] = NULL; 225 dd->next_drq[data_dir] = NULL;
226 if (rbnext) 226 if (rbnext)
227 dd->next_drq[data_dir] = rb_entry_drq(rbnext); 227 dd->next_drq[data_dir] = rb_entry_drq(rbnext);
228 } 228 }
229 229
230 BUG_ON(!ON_RB(&drq->rb_node)); 230 BUG_ON(!ON_RB(&drq->rb_node));
231 rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); 231 rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
232 RB_CLEAR(&drq->rb_node); 232 RB_CLEAR(&drq->rb_node);
233 } 233 }
234 234
235 static struct request * 235 static struct request *
236 deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir) 236 deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
237 { 237 {
238 struct rb_node *n = dd->sort_list[data_dir].rb_node; 238 struct rb_node *n = dd->sort_list[data_dir].rb_node;
239 struct deadline_rq *drq; 239 struct deadline_rq *drq;
240 240
241 while (n) { 241 while (n) {
242 drq = rb_entry_drq(n); 242 drq = rb_entry_drq(n);
243 243
244 if (sector < drq->rb_key) 244 if (sector < drq->rb_key)
245 n = n->rb_left; 245 n = n->rb_left;
246 else if (sector > drq->rb_key) 246 else if (sector > drq->rb_key)
247 n = n->rb_right; 247 n = n->rb_right;
248 else 248 else
249 return drq->request; 249 return drq->request;
250 } 250 }
251 251
252 return NULL; 252 return NULL;
253 } 253 }
254 254
255 /* 255 /*
256 * deadline_find_first_drq finds the first (lowest sector numbered) request 256 * deadline_find_first_drq finds the first (lowest sector numbered) request
257 * for the specified data_dir. Used to sweep back to the start of the disk 257 * for the specified data_dir. Used to sweep back to the start of the disk
258 * (1-way elevator) after we process the last (highest sector) request. 258 * (1-way elevator) after we process the last (highest sector) request.
259 */ 259 */
260 static struct deadline_rq * 260 static struct deadline_rq *
261 deadline_find_first_drq(struct deadline_data *dd, int data_dir) 261 deadline_find_first_drq(struct deadline_data *dd, int data_dir)
262 { 262 {
263 struct rb_node *n = dd->sort_list[data_dir].rb_node; 263 struct rb_node *n = dd->sort_list[data_dir].rb_node;
264 264
265 for (;;) { 265 for (;;) {
266 if (n->rb_left == NULL) 266 if (n->rb_left == NULL)
267 return rb_entry_drq(n); 267 return rb_entry_drq(n);
268 268
269 n = n->rb_left; 269 n = n->rb_left;
270 } 270 }
271 } 271 }
272 272
273 /* 273 /*
274 * add drq to rbtree and fifo 274 * add drq to rbtree and fifo
275 */ 275 */
276 static void 276 static void
277 deadline_add_request(struct request_queue *q, struct request *rq) 277 deadline_add_request(struct request_queue *q, struct request *rq)
278 { 278 {
279 struct deadline_data *dd = q->elevator->elevator_data; 279 struct deadline_data *dd = q->elevator->elevator_data;
280 struct deadline_rq *drq = RQ_DATA(rq); 280 struct deadline_rq *drq = RQ_DATA(rq);
281 281
282 const int data_dir = rq_data_dir(drq->request); 282 const int data_dir = rq_data_dir(drq->request);
283 283
284 deadline_add_drq_rb(dd, drq); 284 deadline_add_drq_rb(dd, drq);
285 /* 285 /*
286 * set expire time (only used for reads) and add to fifo list 286 * set expire time (only used for reads) and add to fifo list
287 */ 287 */
288 drq->expires = jiffies + dd->fifo_expire[data_dir]; 288 drq->expires = jiffies + dd->fifo_expire[data_dir];
289 list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]); 289 list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
290 290
291 if (rq_mergeable(rq)) 291 if (rq_mergeable(rq))
292 deadline_add_drq_hash(dd, drq); 292 deadline_add_drq_hash(dd, drq);
293 } 293 }
294 294
295 /* 295 /*
296 * remove rq from rbtree, fifo, and hash 296 * remove rq from rbtree, fifo, and hash
297 */ 297 */
298 static void deadline_remove_request(request_queue_t *q, struct request *rq) 298 static void deadline_remove_request(request_queue_t *q, struct request *rq)
299 { 299 {
300 struct deadline_rq *drq = RQ_DATA(rq); 300 struct deadline_rq *drq = RQ_DATA(rq);
301 struct deadline_data *dd = q->elevator->elevator_data; 301 struct deadline_data *dd = q->elevator->elevator_data;
302 302
303 list_del_init(&drq->fifo); 303 list_del_init(&drq->fifo);
304 deadline_del_drq_rb(dd, drq); 304 deadline_del_drq_rb(dd, drq);
305 deadline_del_drq_hash(drq); 305 deadline_del_drq_hash(drq);
306 } 306 }
307 307
308 static int 308 static int
309 deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) 309 deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
310 { 310 {
311 struct deadline_data *dd = q->elevator->elevator_data; 311 struct deadline_data *dd = q->elevator->elevator_data;
312 struct request *__rq; 312 struct request *__rq;
313 int ret; 313 int ret;
314 314
315 /* 315 /*
316 * see if the merge hash can satisfy a back merge 316 * see if the merge hash can satisfy a back merge
317 */ 317 */
318 __rq = deadline_find_drq_hash(dd, bio->bi_sector); 318 __rq = deadline_find_drq_hash(dd, bio->bi_sector);
319 if (__rq) { 319 if (__rq) {
320 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); 320 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
321 321
322 if (elv_rq_merge_ok(__rq, bio)) { 322 if (elv_rq_merge_ok(__rq, bio)) {
323 ret = ELEVATOR_BACK_MERGE; 323 ret = ELEVATOR_BACK_MERGE;
324 goto out; 324 goto out;
325 } 325 }
326 } 326 }
327 327
328 /* 328 /*
329 * check for front merge 329 * check for front merge
330 */ 330 */
331 if (dd->front_merges) { 331 if (dd->front_merges) {
332 sector_t rb_key = bio->bi_sector + bio_sectors(bio); 332 sector_t rb_key = bio->bi_sector + bio_sectors(bio);
333 333
334 __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio)); 334 __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
335 if (__rq) { 335 if (__rq) {
336 BUG_ON(rb_key != rq_rb_key(__rq)); 336 BUG_ON(rb_key != rq_rb_key(__rq));
337 337
338 if (elv_rq_merge_ok(__rq, bio)) { 338 if (elv_rq_merge_ok(__rq, bio)) {
339 ret = ELEVATOR_FRONT_MERGE; 339 ret = ELEVATOR_FRONT_MERGE;
340 goto out; 340 goto out;
341 } 341 }
342 } 342 }
343 } 343 }
344 344
345 return ELEVATOR_NO_MERGE; 345 return ELEVATOR_NO_MERGE;
346 out: 346 out:
347 if (ret) 347 if (ret)
348 deadline_hot_drq_hash(dd, RQ_DATA(__rq)); 348 deadline_hot_drq_hash(dd, RQ_DATA(__rq));
349 *req = __rq; 349 *req = __rq;
350 return ret; 350 return ret;
351 } 351 }
352 352
353 static void deadline_merged_request(request_queue_t *q, struct request *req) 353 static void deadline_merged_request(request_queue_t *q, struct request *req)
354 { 354 {
355 struct deadline_data *dd = q->elevator->elevator_data; 355 struct deadline_data *dd = q->elevator->elevator_data;
356 struct deadline_rq *drq = RQ_DATA(req); 356 struct deadline_rq *drq = RQ_DATA(req);
357 357
358 /* 358 /*
359 * hash always needs to be repositioned, key is end sector 359 * hash always needs to be repositioned, key is end sector
360 */ 360 */
361 deadline_del_drq_hash(drq); 361 deadline_del_drq_hash(drq);
362 deadline_add_drq_hash(dd, drq); 362 deadline_add_drq_hash(dd, drq);
363 363
364 /* 364 /*
365 * if the merge was a front merge, we need to reposition request 365 * if the merge was a front merge, we need to reposition request
366 */ 366 */
367 if (rq_rb_key(req) != drq->rb_key) { 367 if (rq_rb_key(req) != drq->rb_key) {
368 deadline_del_drq_rb(dd, drq); 368 deadline_del_drq_rb(dd, drq);
369 deadline_add_drq_rb(dd, drq); 369 deadline_add_drq_rb(dd, drq);
370 } 370 }
371 } 371 }
372 372
373 static void 373 static void
374 deadline_merged_requests(request_queue_t *q, struct request *req, 374 deadline_merged_requests(request_queue_t *q, struct request *req,
375 struct request *next) 375 struct request *next)
376 { 376 {
377 struct deadline_data *dd = q->elevator->elevator_data; 377 struct deadline_data *dd = q->elevator->elevator_data;
378 struct deadline_rq *drq = RQ_DATA(req); 378 struct deadline_rq *drq = RQ_DATA(req);
379 struct deadline_rq *dnext = RQ_DATA(next); 379 struct deadline_rq *dnext = RQ_DATA(next);
380 380
381 BUG_ON(!drq); 381 BUG_ON(!drq);
382 BUG_ON(!dnext); 382 BUG_ON(!dnext);
383 383
384 /* 384 /*
385 * reposition drq (this is the merged request) in hash, and in rbtree 385 * reposition drq (this is the merged request) in hash, and in rbtree
386 * in case of a front merge 386 * in case of a front merge
387 */ 387 */
388 deadline_del_drq_hash(drq); 388 deadline_del_drq_hash(drq);
389 deadline_add_drq_hash(dd, drq); 389 deadline_add_drq_hash(dd, drq);
390 390
391 if (rq_rb_key(req) != drq->rb_key) { 391 if (rq_rb_key(req) != drq->rb_key) {
392 deadline_del_drq_rb(dd, drq); 392 deadline_del_drq_rb(dd, drq);
393 deadline_add_drq_rb(dd, drq); 393 deadline_add_drq_rb(dd, drq);
394 } 394 }
395 395
396 /* 396 /*
397 * if dnext expires before drq, assign its expire time to drq 397 * if dnext expires before drq, assign its expire time to drq
398 * and move into dnext position (dnext will be deleted) in fifo 398 * and move into dnext position (dnext will be deleted) in fifo
399 */ 399 */
400 if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) { 400 if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
401 if (time_before(dnext->expires, drq->expires)) { 401 if (time_before(dnext->expires, drq->expires)) {
402 list_move(&drq->fifo, &dnext->fifo); 402 list_move(&drq->fifo, &dnext->fifo);
403 drq->expires = dnext->expires; 403 drq->expires = dnext->expires;
404 } 404 }
405 } 405 }
406 406
407 /* 407 /*
408 * kill knowledge of next, this one is a goner 408 * kill knowledge of next, this one is a goner
409 */ 409 */
410 deadline_remove_request(q, next); 410 deadline_remove_request(q, next);
411 } 411 }
412 412
413 /* 413 /*
414 * move request from sort list to dispatch queue. 414 * move request from sort list to dispatch queue.
415 */ 415 */
416 static inline void 416 static inline void
417 deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq) 417 deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
418 { 418 {
419 request_queue_t *q = drq->request->q; 419 request_queue_t *q = drq->request->q;
420 420
421 deadline_remove_request(q, drq->request); 421 deadline_remove_request(q, drq->request);
422 elv_dispatch_add_tail(q, drq->request); 422 elv_dispatch_add_tail(q, drq->request);
423 } 423 }
424 424
425 /* 425 /*
426 * move an entry to dispatch queue 426 * move an entry to dispatch queue
427 */ 427 */
428 static void 428 static void
429 deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq) 429 deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq)
430 { 430 {
431 const int data_dir = rq_data_dir(drq->request); 431 const int data_dir = rq_data_dir(drq->request);
432 struct rb_node *rbnext = rb_next(&drq->rb_node); 432 struct rb_node *rbnext = rb_next(&drq->rb_node);
433 433
434 dd->next_drq[READ] = NULL; 434 dd->next_drq[READ] = NULL;
435 dd->next_drq[WRITE] = NULL; 435 dd->next_drq[WRITE] = NULL;
436 436
437 if (rbnext) 437 if (rbnext)
438 dd->next_drq[data_dir] = rb_entry_drq(rbnext); 438 dd->next_drq[data_dir] = rb_entry_drq(rbnext);
439 439
440 dd->last_sector = drq->request->sector + drq->request->nr_sectors; 440 dd->last_sector = drq->request->sector + drq->request->nr_sectors;
441 441
442 /* 442 /*
443 * take it off the sort and fifo list, move 443 * take it off the sort and fifo list, move
444 * to dispatch queue 444 * to dispatch queue
445 */ 445 */
446 deadline_move_to_dispatch(dd, drq); 446 deadline_move_to_dispatch(dd, drq);
447 } 447 }
448 448
449 #define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo) 449 #define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
450 450
451 /* 451 /*
452 * deadline_check_fifo returns 0 if there are no expired reads on the fifo, 452 * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
453 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) 453 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
454 */ 454 */
455 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) 455 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
456 { 456 {
457 struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next); 457 struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
458 458
459 /* 459 /*
460 * drq is expired! 460 * drq is expired!
461 */ 461 */
462 if (time_after(jiffies, drq->expires)) 462 if (time_after(jiffies, drq->expires))
463 return 1; 463 return 1;
464 464
465 return 0; 465 return 0;
466 } 466 }
467 467
468 /* 468 /*
469 * deadline_dispatch_requests selects the best request according to 469 * deadline_dispatch_requests selects the best request according to
470 * read/write expire, fifo_batch, etc 470 * read/write expire, fifo_batch, etc
471 */ 471 */
472 static int deadline_dispatch_requests(request_queue_t *q, int force) 472 static int deadline_dispatch_requests(request_queue_t *q, int force)
473 { 473 {
474 struct deadline_data *dd = q->elevator->elevator_data; 474 struct deadline_data *dd = q->elevator->elevator_data;
475 const int reads = !list_empty(&dd->fifo_list[READ]); 475 const int reads = !list_empty(&dd->fifo_list[READ]);
476 const int writes = !list_empty(&dd->fifo_list[WRITE]); 476 const int writes = !list_empty(&dd->fifo_list[WRITE]);
477 struct deadline_rq *drq; 477 struct deadline_rq *drq;
478 int data_dir; 478 int data_dir;
479 479
480 /* 480 /*
481 * batches are currently reads XOR writes 481 * batches are currently reads XOR writes
482 */ 482 */
483 if (dd->next_drq[WRITE]) 483 if (dd->next_drq[WRITE])
484 drq = dd->next_drq[WRITE]; 484 drq = dd->next_drq[WRITE];
485 else 485 else
486 drq = dd->next_drq[READ]; 486 drq = dd->next_drq[READ];
487 487
488 if (drq) { 488 if (drq) {
489 /* we have a "next request" */ 489 /* we have a "next request" */
490 490
491 if (dd->last_sector != drq->request->sector) 491 if (dd->last_sector != drq->request->sector)
492 /* end the batch on a non sequential request */ 492 /* end the batch on a non sequential request */
493 dd->batching += dd->fifo_batch; 493 dd->batching += dd->fifo_batch;
494 494
495 if (dd->batching < dd->fifo_batch) 495 if (dd->batching < dd->fifo_batch)
496 /* we are still entitled to batch */ 496 /* we are still entitled to batch */
497 goto dispatch_request; 497 goto dispatch_request;
498 } 498 }
499 499
500 /* 500 /*
501 * at this point we are not running a batch. select the appropriate 501 * at this point we are not running a batch. select the appropriate
502 * data direction (read / write) 502 * data direction (read / write)
503 */ 503 */
504 504
505 if (reads) { 505 if (reads) {
506 BUG_ON(RB_EMPTY(&dd->sort_list[READ])); 506 BUG_ON(RB_EMPTY(&dd->sort_list[READ]));
507 507
508 if (writes && (dd->starved++ >= dd->writes_starved)) 508 if (writes && (dd->starved++ >= dd->writes_starved))
509 goto dispatch_writes; 509 goto dispatch_writes;
510 510
511 data_dir = READ; 511 data_dir = READ;
512 512
513 goto dispatch_find_request; 513 goto dispatch_find_request;
514 } 514 }
515 515
516 /* 516 /*
517 * there are either no reads or writes have been starved 517 * there are either no reads or writes have been starved
518 */ 518 */
519 519
520 if (writes) { 520 if (writes) {
521 dispatch_writes: 521 dispatch_writes:
522 BUG_ON(RB_EMPTY(&dd->sort_list[WRITE])); 522 BUG_ON(RB_EMPTY(&dd->sort_list[WRITE]));
523 523
524 dd->starved = 0; 524 dd->starved = 0;
525 525
526 data_dir = WRITE; 526 data_dir = WRITE;
527 527
528 goto dispatch_find_request; 528 goto dispatch_find_request;
529 } 529 }
530 530
531 return 0; 531 return 0;
532 532
533 dispatch_find_request: 533 dispatch_find_request:
534 /* 534 /*
535 * we are not running a batch, find best request for selected data_dir 535 * we are not running a batch, find best request for selected data_dir
536 */ 536 */
537 if (deadline_check_fifo(dd, data_dir)) { 537 if (deadline_check_fifo(dd, data_dir)) {
538 /* An expired request exists - satisfy it */ 538 /* An expired request exists - satisfy it */
539 dd->batching = 0; 539 dd->batching = 0;
540 drq = list_entry_fifo(dd->fifo_list[data_dir].next); 540 drq = list_entry_fifo(dd->fifo_list[data_dir].next);
541 541
542 } else if (dd->next_drq[data_dir]) { 542 } else if (dd->next_drq[data_dir]) {
543 /* 543 /*
544 * The last req was the same dir and we have a next request in 544 * The last req was the same dir and we have a next request in
545 * sort order. No expired requests so continue on from here. 545 * sort order. No expired requests so continue on from here.
546 */ 546 */
547 drq = dd->next_drq[data_dir]; 547 drq = dd->next_drq[data_dir];
548 } else { 548 } else {
549 /* 549 /*
550 * The last req was the other direction or we have run out of 550 * The last req was the other direction or we have run out of
551 * higher-sectored requests. Go back to the lowest sectored 551 * higher-sectored requests. Go back to the lowest sectored
552 * request (1 way elevator) and start a new batch. 552 * request (1 way elevator) and start a new batch.
553 */ 553 */
554 dd->batching = 0; 554 dd->batching = 0;
555 drq = deadline_find_first_drq(dd, data_dir); 555 drq = deadline_find_first_drq(dd, data_dir);
556 } 556 }
557 557
558 dispatch_request: 558 dispatch_request:
559 /* 559 /*
560 * drq is the selected appropriate request. 560 * drq is the selected appropriate request.
561 */ 561 */
562 dd->batching++; 562 dd->batching++;
563 deadline_move_request(dd, drq); 563 deadline_move_request(dd, drq);
564 564
565 return 1; 565 return 1;
566 } 566 }
567 567
568 static int deadline_queue_empty(request_queue_t *q) 568 static int deadline_queue_empty(request_queue_t *q)
569 { 569 {
570 struct deadline_data *dd = q->elevator->elevator_data; 570 struct deadline_data *dd = q->elevator->elevator_data;
571 571
572 return list_empty(&dd->fifo_list[WRITE]) 572 return list_empty(&dd->fifo_list[WRITE])
573 && list_empty(&dd->fifo_list[READ]); 573 && list_empty(&dd->fifo_list[READ]);
574 } 574 }
575 575
576 static struct request * 576 static struct request *
577 deadline_former_request(request_queue_t *q, struct request *rq) 577 deadline_former_request(request_queue_t *q, struct request *rq)
578 { 578 {
579 struct deadline_rq *drq = RQ_DATA(rq); 579 struct deadline_rq *drq = RQ_DATA(rq);
580 struct rb_node *rbprev = rb_prev(&drq->rb_node); 580 struct rb_node *rbprev = rb_prev(&drq->rb_node);
581 581
582 if (rbprev) 582 if (rbprev)
583 return rb_entry_drq(rbprev)->request; 583 return rb_entry_drq(rbprev)->request;
584 584
585 return NULL; 585 return NULL;
586 } 586 }
587 587
588 static struct request * 588 static struct request *
589 deadline_latter_request(request_queue_t *q, struct request *rq) 589 deadline_latter_request(request_queue_t *q, struct request *rq)
590 { 590 {
591 struct deadline_rq *drq = RQ_DATA(rq); 591 struct deadline_rq *drq = RQ_DATA(rq);
592 struct rb_node *rbnext = rb_next(&drq->rb_node); 592 struct rb_node *rbnext = rb_next(&drq->rb_node);
593 593
594 if (rbnext) 594 if (rbnext)
595 return rb_entry_drq(rbnext)->request; 595 return rb_entry_drq(rbnext)->request;
596 596
597 return NULL; 597 return NULL;
598 } 598 }
599 599
600 static void deadline_exit_queue(elevator_t *e) 600 static void deadline_exit_queue(elevator_t *e)
601 { 601 {
602 struct deadline_data *dd = e->elevator_data; 602 struct deadline_data *dd = e->elevator_data;
603 603
604 BUG_ON(!list_empty(&dd->fifo_list[READ])); 604 BUG_ON(!list_empty(&dd->fifo_list[READ]));
605 BUG_ON(!list_empty(&dd->fifo_list[WRITE])); 605 BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
606 606
607 mempool_destroy(dd->drq_pool); 607 mempool_destroy(dd->drq_pool);
608 kfree(dd->hash); 608 kfree(dd->hash);
609 kfree(dd); 609 kfree(dd);
610 } 610 }
611 611
612 /* 612 /*
613 * initialize elevator private data (deadline_data), and alloc a drq for 613 * initialize elevator private data (deadline_data), and alloc a drq for
614 * each request on the free lists 614 * each request on the free lists
615 */ 615 */
616 static int deadline_init_queue(request_queue_t *q, elevator_t *e) 616 static int deadline_init_queue(request_queue_t *q, elevator_t *e)
617 { 617 {
618 struct deadline_data *dd; 618 struct deadline_data *dd;
619 int i; 619 int i;
620 620
621 if (!drq_pool) 621 if (!drq_pool)
622 return -ENOMEM; 622 return -ENOMEM;
623 623
624 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); 624 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
625 if (!dd) 625 if (!dd)
626 return -ENOMEM; 626 return -ENOMEM;
627 memset(dd, 0, sizeof(*dd)); 627 memset(dd, 0, sizeof(*dd));
628 628
629 dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES, 629 dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES,
630 GFP_KERNEL, q->node); 630 GFP_KERNEL, q->node);
631 if (!dd->hash) { 631 if (!dd->hash) {
632 kfree(dd); 632 kfree(dd);
633 return -ENOMEM; 633 return -ENOMEM;
634 } 634 }
635 635
636 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 636 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
637 mempool_free_slab, drq_pool, q->node); 637 mempool_free_slab, drq_pool, q->node);
638 if (!dd->drq_pool) { 638 if (!dd->drq_pool) {
639 kfree(dd->hash); 639 kfree(dd->hash);
640 kfree(dd); 640 kfree(dd);
641 return -ENOMEM; 641 return -ENOMEM;
642 } 642 }
643 643
644 for (i = 0; i < DL_HASH_ENTRIES; i++) 644 for (i = 0; i < DL_HASH_ENTRIES; i++)
645 INIT_LIST_HEAD(&dd->hash[i]); 645 INIT_LIST_HEAD(&dd->hash[i]);
646 646
647 INIT_LIST_HEAD(&dd->fifo_list[READ]); 647 INIT_LIST_HEAD(&dd->fifo_list[READ]);
648 INIT_LIST_HEAD(&dd->fifo_list[WRITE]); 648 INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
649 dd->sort_list[READ] = RB_ROOT; 649 dd->sort_list[READ] = RB_ROOT;
650 dd->sort_list[WRITE] = RB_ROOT; 650 dd->sort_list[WRITE] = RB_ROOT;
651 dd->fifo_expire[READ] = read_expire; 651 dd->fifo_expire[READ] = read_expire;
652 dd->fifo_expire[WRITE] = write_expire; 652 dd->fifo_expire[WRITE] = write_expire;
653 dd->writes_starved = writes_starved; 653 dd->writes_starved = writes_starved;
654 dd->front_merges = 1; 654 dd->front_merges = 1;
655 dd->fifo_batch = fifo_batch; 655 dd->fifo_batch = fifo_batch;
656 e->elevator_data = dd; 656 e->elevator_data = dd;
657 return 0; 657 return 0;
658 } 658 }
659 659
660 static void deadline_put_request(request_queue_t *q, struct request *rq) 660 static void deadline_put_request(request_queue_t *q, struct request *rq)
661 { 661 {
662 struct deadline_data *dd = q->elevator->elevator_data; 662 struct deadline_data *dd = q->elevator->elevator_data;
663 struct deadline_rq *drq = RQ_DATA(rq); 663 struct deadline_rq *drq = RQ_DATA(rq);
664 664
665 mempool_free(drq, dd->drq_pool); 665 mempool_free(drq, dd->drq_pool);
666 rq->elevator_private = NULL; 666 rq->elevator_private = NULL;
667 } 667 }
668 668
669 static int 669 static int
670 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 670 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
671 gfp_t gfp_mask) 671 gfp_t gfp_mask)
672 { 672 {
673 struct deadline_data *dd = q->elevator->elevator_data; 673 struct deadline_data *dd = q->elevator->elevator_data;
674 struct deadline_rq *drq; 674 struct deadline_rq *drq;
675 675
676 drq = mempool_alloc(dd->drq_pool, gfp_mask); 676 drq = mempool_alloc(dd->drq_pool, gfp_mask);
677 if (drq) { 677 if (drq) {
678 memset(drq, 0, sizeof(*drq)); 678 memset(drq, 0, sizeof(*drq));
679 RB_CLEAR(&drq->rb_node); 679 RB_CLEAR(&drq->rb_node);
680 drq->request = rq; 680 drq->request = rq;
681 681
682 INIT_LIST_HEAD(&drq->hash); 682 INIT_LIST_HEAD(&drq->hash);
683 drq->on_hash = 0; 683 drq->on_hash = 0;
684 684
685 INIT_LIST_HEAD(&drq->fifo); 685 INIT_LIST_HEAD(&drq->fifo);
686 686
687 rq->elevator_private = drq; 687 rq->elevator_private = drq;
688 return 0; 688 return 0;
689 } 689 }
690 690
691 return 1; 691 return 1;
692 } 692 }
693 693
694 /* 694 /*
695 * sysfs parts below 695 * sysfs parts below
696 */ 696 */
697 697
698 static ssize_t 698 static ssize_t
699 deadline_var_show(int var, char *page) 699 deadline_var_show(int var, char *page)
700 { 700 {
701 return sprintf(page, "%d\n", var); 701 return sprintf(page, "%d\n", var);
702 } 702 }
703 703
704 static ssize_t 704 static ssize_t
705 deadline_var_store(int *var, const char *page, size_t count) 705 deadline_var_store(int *var, const char *page, size_t count)
706 { 706 {
707 char *p = (char *) page; 707 char *p = (char *) page;
708 708
709 *var = simple_strtol(p, &p, 10); 709 *var = simple_strtol(p, &p, 10);
710 return count; 710 return count;
711 } 711 }
712 712
713 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 713 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
714 static ssize_t __FUNC(elevator_t *e, char *page) \ 714 static ssize_t __FUNC(elevator_t *e, char *page) \
715 { \ 715 { \
716 struct deadline_data *dd = e->elevator_data; \ 716 struct deadline_data *dd = e->elevator_data; \
717 int __data = __VAR; \ 717 int __data = __VAR; \
718 if (__CONV) \ 718 if (__CONV) \
719 __data = jiffies_to_msecs(__data); \ 719 __data = jiffies_to_msecs(__data); \
720 return deadline_var_show(__data, (page)); \ 720 return deadline_var_show(__data, (page)); \
721 } 721 }
722 SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ], 1); 722 SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
723 SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE], 1); 723 SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
724 SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved, 0); 724 SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
725 SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges, 0); 725 SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
726 SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch, 0); 726 SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
727 #undef SHOW_FUNCTION 727 #undef SHOW_FUNCTION
728 728
729 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 729 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
730 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 730 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
731 { \ 731 { \
732 struct deadline_data *dd = e->elevator_data; \ 732 struct deadline_data *dd = e->elevator_data; \
733 int __data; \ 733 int __data; \
734 int ret = deadline_var_store(&__data, (page), count); \ 734 int ret = deadline_var_store(&__data, (page), count); \
735 if (__data < (MIN)) \ 735 if (__data < (MIN)) \
736 __data = (MIN); \ 736 __data = (MIN); \
737 else if (__data > (MAX)) \ 737 else if (__data > (MAX)) \
738 __data = (MAX); \ 738 __data = (MAX); \
739 if (__CONV) \ 739 if (__CONV) \
740 *(__PTR) = msecs_to_jiffies(__data); \ 740 *(__PTR) = msecs_to_jiffies(__data); \
741 else \ 741 else \
742 *(__PTR) = __data; \ 742 *(__PTR) = __data; \
743 return ret; \ 743 return ret; \
744 } 744 }
745 STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); 745 STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
746 STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); 746 STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
747 STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); 747 STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
748 STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1, 0); 748 STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
749 STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX, 0); 749 STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
750 #undef STORE_FUNCTION 750 #undef STORE_FUNCTION
751 751
752 static struct elv_fs_entry deadline_readexpire_entry = { 752 #define DD_ATTR(name) \
753 .attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR }, 753 __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
754 .show = deadline_readexpire_show, 754 deadline_##name##_store)
755 .store = deadline_readexpire_store,
756 };
757 static struct elv_fs_entry deadline_writeexpire_entry = {
758 .attr = {.name = "write_expire", .mode = S_IRUGO | S_IWUSR },
759 .show = deadline_writeexpire_show,
760 .store = deadline_writeexpire_store,
761 };
762 static struct elv_fs_entry deadline_writesstarved_entry = {
763 .attr = {.name = "writes_starved", .mode = S_IRUGO | S_IWUSR },
764 .show = deadline_writesstarved_show,
765 .store = deadline_writesstarved_store,
766 };
767 static struct elv_fs_entry deadline_frontmerges_entry = {
768 .attr = {.name = "front_merges", .mode = S_IRUGO | S_IWUSR },
769 .show = deadline_frontmerges_show,
770 .store = deadline_frontmerges_store,
771 };
772 static struct elv_fs_entry deadline_fifobatch_entry = {
773 .attr = {.name = "fifo_batch", .mode = S_IRUGO | S_IWUSR },
774 .show = deadline_fifobatch_show,
775 .store = deadline_fifobatch_store,
776 };
777 755
778 static struct attribute *deadline_attrs[] = { 756 static struct elv_fs_entry deadline_attrs[] = {
779 &deadline_readexpire_entry.attr, 757 DD_ATTR(read_expire),
780 &deadline_writeexpire_entry.attr, 758 DD_ATTR(write_expire),
781 &deadline_writesstarved_entry.attr, 759 DD_ATTR(writes_starved),
782 &deadline_frontmerges_entry.attr, 760 DD_ATTR(front_merges),
783 &deadline_fifobatch_entry.attr, 761 DD_ATTR(fifo_batch),
784 NULL, 762 __ATTR_NULL
785 }; 763 };
786 764
787 static struct elevator_type iosched_deadline = { 765 static struct elevator_type iosched_deadline = {
788 .ops = { 766 .ops = {
789 .elevator_merge_fn = deadline_merge, 767 .elevator_merge_fn = deadline_merge,
790 .elevator_merged_fn = deadline_merged_request, 768 .elevator_merged_fn = deadline_merged_request,
791 .elevator_merge_req_fn = deadline_merged_requests, 769 .elevator_merge_req_fn = deadline_merged_requests,
792 .elevator_dispatch_fn = deadline_dispatch_requests, 770 .elevator_dispatch_fn = deadline_dispatch_requests,
793 .elevator_add_req_fn = deadline_add_request, 771 .elevator_add_req_fn = deadline_add_request,
794 .elevator_queue_empty_fn = deadline_queue_empty, 772 .elevator_queue_empty_fn = deadline_queue_empty,
795 .elevator_former_req_fn = deadline_former_request, 773 .elevator_former_req_fn = deadline_former_request,
796 .elevator_latter_req_fn = deadline_latter_request, 774 .elevator_latter_req_fn = deadline_latter_request,
797 .elevator_set_req_fn = deadline_set_request, 775 .elevator_set_req_fn = deadline_set_request,
798 .elevator_put_req_fn = deadline_put_request, 776 .elevator_put_req_fn = deadline_put_request,
799 .elevator_init_fn = deadline_init_queue, 777 .elevator_init_fn = deadline_init_queue,
800 .elevator_exit_fn = deadline_exit_queue, 778 .elevator_exit_fn = deadline_exit_queue,
801 }, 779 },
802 780
803 .elevator_attrs = deadline_attrs, 781 .elevator_attrs = deadline_attrs,
804 .elevator_name = "deadline", 782 .elevator_name = "deadline",
805 .elevator_owner = THIS_MODULE, 783 .elevator_owner = THIS_MODULE,
806 }; 784 };
807 785
808 static int __init deadline_init(void) 786 static int __init deadline_init(void)
809 { 787 {
810 int ret; 788 int ret;
811 789
812 drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq), 790 drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
813 0, 0, NULL, NULL); 791 0, 0, NULL, NULL);
814 792
815 if (!drq_pool) 793 if (!drq_pool)
816 return -ENOMEM; 794 return -ENOMEM;
817 795
818 ret = elv_register(&iosched_deadline); 796 ret = elv_register(&iosched_deadline);
819 if (ret) 797 if (ret)
820 kmem_cache_destroy(drq_pool); 798 kmem_cache_destroy(drq_pool);
821 799
822 return ret; 800 return ret;
823 } 801 }
824 802
825 static void __exit deadline_exit(void) 803 static void __exit deadline_exit(void)
826 { 804 {
827 kmem_cache_destroy(drq_pool); 805 kmem_cache_destroy(drq_pool);
828 elv_unregister(&iosched_deadline); 806 elv_unregister(&iosched_deadline);
829 } 807 }
830 808
831 module_init(deadline_init); 809 module_init(deadline_init);
832 module_exit(deadline_exit); 810 module_exit(deadline_exit);
833 811
834 MODULE_AUTHOR("Jens Axboe"); 812 MODULE_AUTHOR("Jens Axboe");
835 MODULE_LICENSE("GPL"); 813 MODULE_LICENSE("GPL");
836 MODULE_DESCRIPTION("deadline IO scheduler"); 814 MODULE_DESCRIPTION("deadline IO scheduler");
837 815
1 /* 1 /*
2 * Block device elevator/IO-scheduler. 2 * Block device elevator/IO-scheduler.
3 * 3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * 5 *
6 * 30042000 Jens Axboe <axboe@suse.de> : 6 * 30042000 Jens Axboe <axboe@suse.de> :
7 * 7 *
8 * Split the elevator a bit so that it is possible to choose a different 8 * Split the elevator a bit so that it is possible to choose a different
9 * one or even write a new "plug in". There are three pieces: 9 * one or even write a new "plug in". There are three pieces:
10 * - elevator_fn, inserts a new request in the queue list 10 * - elevator_fn, inserts a new request in the queue list
11 * - elevator_merge_fn, decides whether a new buffer can be merged with 11 * - elevator_merge_fn, decides whether a new buffer can be merged with
12 * an existing request 12 * an existing request
13 * - elevator_dequeue_fn, called when a request is taken off the active list 13 * - elevator_dequeue_fn, called when a request is taken off the active list
14 * 14 *
15 * 20082000 Dave Jones <davej@suse.de> : 15 * 20082000 Dave Jones <davej@suse.de> :
16 * Removed tests for max-bomb-segments, which was breaking elvtune 16 * Removed tests for max-bomb-segments, which was breaking elvtune
17 * when run without -bN 17 * when run without -bN
18 * 18 *
19 * Jens: 19 * Jens:
20 * - Rework again to work with bio instead of buffer_heads 20 * - Rework again to work with bio instead of buffer_heads
21 * - loose bi_dev comparisons, partition handling is right now 21 * - loose bi_dev comparisons, partition handling is right now
22 * - completely modularize elevator setup and teardown 22 * - completely modularize elevator setup and teardown
23 * 23 *
24 */ 24 */
25 #include <linux/kernel.h> 25 #include <linux/kernel.h>
26 #include <linux/fs.h> 26 #include <linux/fs.h>
27 #include <linux/blkdev.h> 27 #include <linux/blkdev.h>
28 #include <linux/elevator.h> 28 #include <linux/elevator.h>
29 #include <linux/bio.h> 29 #include <linux/bio.h>
30 #include <linux/config.h> 30 #include <linux/config.h>
31 #include <linux/module.h> 31 #include <linux/module.h>
32 #include <linux/slab.h> 32 #include <linux/slab.h>
33 #include <linux/init.h> 33 #include <linux/init.h>
34 #include <linux/compiler.h> 34 #include <linux/compiler.h>
35 #include <linux/delay.h> 35 #include <linux/delay.h>
36 36
37 #include <asm/uaccess.h> 37 #include <asm/uaccess.h>
38 38
39 static DEFINE_SPINLOCK(elv_list_lock); 39 static DEFINE_SPINLOCK(elv_list_lock);
40 static LIST_HEAD(elv_list); 40 static LIST_HEAD(elv_list);
41 41
42 /* 42 /*
43 * can we safely merge with this request? 43 * can we safely merge with this request?
44 */ 44 */
45 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 45 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
46 { 46 {
47 if (!rq_mergeable(rq)) 47 if (!rq_mergeable(rq))
48 return 0; 48 return 0;
49 49
50 /* 50 /*
51 * different data direction or already started, don't merge 51 * different data direction or already started, don't merge
52 */ 52 */
53 if (bio_data_dir(bio) != rq_data_dir(rq)) 53 if (bio_data_dir(bio) != rq_data_dir(rq))
54 return 0; 54 return 0;
55 55
56 /* 56 /*
57 * same device and no special stuff set, merge is ok 57 * same device and no special stuff set, merge is ok
58 */ 58 */
59 if (rq->rq_disk == bio->bi_bdev->bd_disk && 59 if (rq->rq_disk == bio->bi_bdev->bd_disk &&
60 !rq->waiting && !rq->special) 60 !rq->waiting && !rq->special)
61 return 1; 61 return 1;
62 62
63 return 0; 63 return 0;
64 } 64 }
65 EXPORT_SYMBOL(elv_rq_merge_ok); 65 EXPORT_SYMBOL(elv_rq_merge_ok);
66 66
67 static inline int elv_try_merge(struct request *__rq, struct bio *bio) 67 static inline int elv_try_merge(struct request *__rq, struct bio *bio)
68 { 68 {
69 int ret = ELEVATOR_NO_MERGE; 69 int ret = ELEVATOR_NO_MERGE;
70 70
71 /* 71 /*
72 * we can merge and sequence is ok, check if it's possible 72 * we can merge and sequence is ok, check if it's possible
73 */ 73 */
74 if (elv_rq_merge_ok(__rq, bio)) { 74 if (elv_rq_merge_ok(__rq, bio)) {
75 if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 75 if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
76 ret = ELEVATOR_BACK_MERGE; 76 ret = ELEVATOR_BACK_MERGE;
77 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 77 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
78 ret = ELEVATOR_FRONT_MERGE; 78 ret = ELEVATOR_FRONT_MERGE;
79 } 79 }
80 80
81 return ret; 81 return ret;
82 } 82 }
83 83
84 static struct elevator_type *elevator_find(const char *name) 84 static struct elevator_type *elevator_find(const char *name)
85 { 85 {
86 struct elevator_type *e = NULL; 86 struct elevator_type *e = NULL;
87 struct list_head *entry; 87 struct list_head *entry;
88 88
89 list_for_each(entry, &elv_list) { 89 list_for_each(entry, &elv_list) {
90 struct elevator_type *__e; 90 struct elevator_type *__e;
91 91
92 __e = list_entry(entry, struct elevator_type, list); 92 __e = list_entry(entry, struct elevator_type, list);
93 93
94 if (!strcmp(__e->elevator_name, name)) { 94 if (!strcmp(__e->elevator_name, name)) {
95 e = __e; 95 e = __e;
96 break; 96 break;
97 } 97 }
98 } 98 }
99 99
100 return e; 100 return e;
101 } 101 }
102 102
103 static void elevator_put(struct elevator_type *e) 103 static void elevator_put(struct elevator_type *e)
104 { 104 {
105 module_put(e->elevator_owner); 105 module_put(e->elevator_owner);
106 } 106 }
107 107
108 static struct elevator_type *elevator_get(const char *name) 108 static struct elevator_type *elevator_get(const char *name)
109 { 109 {
110 struct elevator_type *e; 110 struct elevator_type *e;
111 111
112 spin_lock_irq(&elv_list_lock); 112 spin_lock_irq(&elv_list_lock);
113 113
114 e = elevator_find(name); 114 e = elevator_find(name);
115 if (e && !try_module_get(e->elevator_owner)) 115 if (e && !try_module_get(e->elevator_owner))
116 e = NULL; 116 e = NULL;
117 117
118 spin_unlock_irq(&elv_list_lock); 118 spin_unlock_irq(&elv_list_lock);
119 119
120 return e; 120 return e;
121 } 121 }
122 122
123 static int elevator_attach(request_queue_t *q, struct elevator_queue *eq) 123 static int elevator_attach(request_queue_t *q, struct elevator_queue *eq)
124 { 124 {
125 int ret = 0; 125 int ret = 0;
126 126
127 q->elevator = eq; 127 q->elevator = eq;
128 128
129 if (eq->ops->elevator_init_fn) 129 if (eq->ops->elevator_init_fn)
130 ret = eq->ops->elevator_init_fn(q, eq); 130 ret = eq->ops->elevator_init_fn(q, eq);
131 131
132 return ret; 132 return ret;
133 } 133 }
134 134
135 static char chosen_elevator[16]; 135 static char chosen_elevator[16];
136 136
137 static int __init elevator_setup(char *str) 137 static int __init elevator_setup(char *str)
138 { 138 {
139 /* 139 /*
140 * Be backwards-compatible with previous kernels, so users 140 * Be backwards-compatible with previous kernels, so users
141 * won't get the wrong elevator. 141 * won't get the wrong elevator.
142 */ 142 */
143 if (!strcmp(str, "as")) 143 if (!strcmp(str, "as"))
144 strcpy(chosen_elevator, "anticipatory"); 144 strcpy(chosen_elevator, "anticipatory");
145 else 145 else
146 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 146 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
147 return 0; 147 return 0;
148 } 148 }
149 149
150 __setup("elevator=", elevator_setup); 150 __setup("elevator=", elevator_setup);
151 151
152 static struct kobj_type elv_ktype; 152 static struct kobj_type elv_ktype;
153 153
154 static elevator_t *elevator_alloc(struct elevator_type *e) 154 static elevator_t *elevator_alloc(struct elevator_type *e)
155 { 155 {
156 elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); 156 elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
157 if (eq) { 157 if (eq) {
158 memset(eq, 0, sizeof(*eq)); 158 memset(eq, 0, sizeof(*eq));
159 eq->ops = &e->ops; 159 eq->ops = &e->ops;
160 eq->elevator_type = e; 160 eq->elevator_type = e;
161 kobject_init(&eq->kobj); 161 kobject_init(&eq->kobj);
162 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); 162 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
163 eq->kobj.ktype = &elv_ktype; 163 eq->kobj.ktype = &elv_ktype;
164 mutex_init(&eq->sysfs_lock); 164 mutex_init(&eq->sysfs_lock);
165 } else { 165 } else {
166 elevator_put(e); 166 elevator_put(e);
167 } 167 }
168 return eq; 168 return eq;
169 } 169 }
170 170
171 static void elevator_release(struct kobject *kobj) 171 static void elevator_release(struct kobject *kobj)
172 { 172 {
173 elevator_t *e = container_of(kobj, elevator_t, kobj); 173 elevator_t *e = container_of(kobj, elevator_t, kobj);
174 elevator_put(e->elevator_type); 174 elevator_put(e->elevator_type);
175 kfree(e); 175 kfree(e);
176 } 176 }
177 177
178 int elevator_init(request_queue_t *q, char *name) 178 int elevator_init(request_queue_t *q, char *name)
179 { 179 {
180 struct elevator_type *e = NULL; 180 struct elevator_type *e = NULL;
181 struct elevator_queue *eq; 181 struct elevator_queue *eq;
182 int ret = 0; 182 int ret = 0;
183 183
184 INIT_LIST_HEAD(&q->queue_head); 184 INIT_LIST_HEAD(&q->queue_head);
185 q->last_merge = NULL; 185 q->last_merge = NULL;
186 q->end_sector = 0; 186 q->end_sector = 0;
187 q->boundary_rq = NULL; 187 q->boundary_rq = NULL;
188 188
189 if (name && !(e = elevator_get(name))) 189 if (name && !(e = elevator_get(name)))
190 return -EINVAL; 190 return -EINVAL;
191 191
192 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) 192 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
193 printk("I/O scheduler %s not found\n", chosen_elevator); 193 printk("I/O scheduler %s not found\n", chosen_elevator);
194 194
195 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { 195 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
196 printk("Default I/O scheduler not found, using no-op\n"); 196 printk("Default I/O scheduler not found, using no-op\n");
197 e = elevator_get("noop"); 197 e = elevator_get("noop");
198 } 198 }
199 199
200 eq = elevator_alloc(e); 200 eq = elevator_alloc(e);
201 if (!eq) 201 if (!eq)
202 return -ENOMEM; 202 return -ENOMEM;
203 203
204 ret = elevator_attach(q, eq); 204 ret = elevator_attach(q, eq);
205 if (ret) 205 if (ret)
206 kobject_put(&eq->kobj); 206 kobject_put(&eq->kobj);
207 207
208 return ret; 208 return ret;
209 } 209 }
210 210
211 void elevator_exit(elevator_t *e) 211 void elevator_exit(elevator_t *e)
212 { 212 {
213 mutex_lock(&e->sysfs_lock); 213 mutex_lock(&e->sysfs_lock);
214 if (e->ops->elevator_exit_fn) 214 if (e->ops->elevator_exit_fn)
215 e->ops->elevator_exit_fn(e); 215 e->ops->elevator_exit_fn(e);
216 e->ops = NULL; 216 e->ops = NULL;
217 mutex_unlock(&e->sysfs_lock); 217 mutex_unlock(&e->sysfs_lock);
218 218
219 kobject_put(&e->kobj); 219 kobject_put(&e->kobj);
220 } 220 }
221 221
222 /* 222 /*
223 * Insert rq into dispatch queue of q. Queue lock must be held on 223 * Insert rq into dispatch queue of q. Queue lock must be held on
224 * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be 224 * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be
225 * appended to the dispatch queue. To be used by specific elevators. 225 * appended to the dispatch queue. To be used by specific elevators.
226 */ 226 */
227 void elv_dispatch_sort(request_queue_t *q, struct request *rq) 227 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
228 { 228 {
229 sector_t boundary; 229 sector_t boundary;
230 struct list_head *entry; 230 struct list_head *entry;
231 231
232 if (q->last_merge == rq) 232 if (q->last_merge == rq)
233 q->last_merge = NULL; 233 q->last_merge = NULL;
234 q->nr_sorted--; 234 q->nr_sorted--;
235 235
236 boundary = q->end_sector; 236 boundary = q->end_sector;
237 237
238 list_for_each_prev(entry, &q->queue_head) { 238 list_for_each_prev(entry, &q->queue_head) {
239 struct request *pos = list_entry_rq(entry); 239 struct request *pos = list_entry_rq(entry);
240 240
241 if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 241 if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
242 break; 242 break;
243 if (rq->sector >= boundary) { 243 if (rq->sector >= boundary) {
244 if (pos->sector < boundary) 244 if (pos->sector < boundary)
245 continue; 245 continue;
246 } else { 246 } else {
247 if (pos->sector >= boundary) 247 if (pos->sector >= boundary)
248 break; 248 break;
249 } 249 }
250 if (rq->sector >= pos->sector) 250 if (rq->sector >= pos->sector)
251 break; 251 break;
252 } 252 }
253 253
254 list_add(&rq->queuelist, entry); 254 list_add(&rq->queuelist, entry);
255 } 255 }
256 256
257 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 257 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
258 { 258 {
259 elevator_t *e = q->elevator; 259 elevator_t *e = q->elevator;
260 int ret; 260 int ret;
261 261
262 if (q->last_merge) { 262 if (q->last_merge) {
263 ret = elv_try_merge(q->last_merge, bio); 263 ret = elv_try_merge(q->last_merge, bio);
264 if (ret != ELEVATOR_NO_MERGE) { 264 if (ret != ELEVATOR_NO_MERGE) {
265 *req = q->last_merge; 265 *req = q->last_merge;
266 return ret; 266 return ret;
267 } 267 }
268 } 268 }
269 269
270 if (e->ops->elevator_merge_fn) 270 if (e->ops->elevator_merge_fn)
271 return e->ops->elevator_merge_fn(q, req, bio); 271 return e->ops->elevator_merge_fn(q, req, bio);
272 272
273 return ELEVATOR_NO_MERGE; 273 return ELEVATOR_NO_MERGE;
274 } 274 }
275 275
276 void elv_merged_request(request_queue_t *q, struct request *rq) 276 void elv_merged_request(request_queue_t *q, struct request *rq)
277 { 277 {
278 elevator_t *e = q->elevator; 278 elevator_t *e = q->elevator;
279 279
280 if (e->ops->elevator_merged_fn) 280 if (e->ops->elevator_merged_fn)
281 e->ops->elevator_merged_fn(q, rq); 281 e->ops->elevator_merged_fn(q, rq);
282 282
283 q->last_merge = rq; 283 q->last_merge = rq;
284 } 284 }
285 285
286 void elv_merge_requests(request_queue_t *q, struct request *rq, 286 void elv_merge_requests(request_queue_t *q, struct request *rq,
287 struct request *next) 287 struct request *next)
288 { 288 {
289 elevator_t *e = q->elevator; 289 elevator_t *e = q->elevator;
290 290
291 if (e->ops->elevator_merge_req_fn) 291 if (e->ops->elevator_merge_req_fn)
292 e->ops->elevator_merge_req_fn(q, rq, next); 292 e->ops->elevator_merge_req_fn(q, rq, next);
293 q->nr_sorted--; 293 q->nr_sorted--;
294 294
295 q->last_merge = rq; 295 q->last_merge = rq;
296 } 296 }
297 297
298 void elv_requeue_request(request_queue_t *q, struct request *rq) 298 void elv_requeue_request(request_queue_t *q, struct request *rq)
299 { 299 {
300 elevator_t *e = q->elevator; 300 elevator_t *e = q->elevator;
301 301
302 /* 302 /*
303 * it already went through dequeue, we need to decrement the 303 * it already went through dequeue, we need to decrement the
304 * in_flight count again 304 * in_flight count again
305 */ 305 */
306 if (blk_account_rq(rq)) { 306 if (blk_account_rq(rq)) {
307 q->in_flight--; 307 q->in_flight--;
308 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) 308 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
309 e->ops->elevator_deactivate_req_fn(q, rq); 309 e->ops->elevator_deactivate_req_fn(q, rq);
310 } 310 }
311 311
312 rq->flags &= ~REQ_STARTED; 312 rq->flags &= ~REQ_STARTED;
313 313
314 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 314 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
315 } 315 }
316 316
317 static void elv_drain_elevator(request_queue_t *q) 317 static void elv_drain_elevator(request_queue_t *q)
318 { 318 {
319 static int printed; 319 static int printed;
320 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 320 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
321 ; 321 ;
322 if (q->nr_sorted == 0) 322 if (q->nr_sorted == 0)
323 return; 323 return;
324 if (printed++ < 10) { 324 if (printed++ < 10) {
325 printk(KERN_ERR "%s: forced dispatching is broken " 325 printk(KERN_ERR "%s: forced dispatching is broken "
326 "(nr_sorted=%u), please report this\n", 326 "(nr_sorted=%u), please report this\n",
327 q->elevator->elevator_type->elevator_name, q->nr_sorted); 327 q->elevator->elevator_type->elevator_name, q->nr_sorted);
328 } 328 }
329 } 329 }
330 330
331 void elv_insert(request_queue_t *q, struct request *rq, int where) 331 void elv_insert(request_queue_t *q, struct request *rq, int where)
332 { 332 {
333 struct list_head *pos; 333 struct list_head *pos;
334 unsigned ordseq; 334 unsigned ordseq;
335 335
336 rq->q = q; 336 rq->q = q;
337 337
338 switch (where) { 338 switch (where) {
339 case ELEVATOR_INSERT_FRONT: 339 case ELEVATOR_INSERT_FRONT:
340 rq->flags |= REQ_SOFTBARRIER; 340 rq->flags |= REQ_SOFTBARRIER;
341 341
342 list_add(&rq->queuelist, &q->queue_head); 342 list_add(&rq->queuelist, &q->queue_head);
343 break; 343 break;
344 344
345 case ELEVATOR_INSERT_BACK: 345 case ELEVATOR_INSERT_BACK:
346 rq->flags |= REQ_SOFTBARRIER; 346 rq->flags |= REQ_SOFTBARRIER;
347 elv_drain_elevator(q); 347 elv_drain_elevator(q);
348 list_add_tail(&rq->queuelist, &q->queue_head); 348 list_add_tail(&rq->queuelist, &q->queue_head);
349 /* 349 /*
350 * We kick the queue here for the following reasons. 350 * We kick the queue here for the following reasons.
351 * - The elevator might have returned NULL previously 351 * - The elevator might have returned NULL previously
352 * to delay requests and returned them now. As the 352 * to delay requests and returned them now. As the
353 * queue wasn't empty before this request, ll_rw_blk 353 * queue wasn't empty before this request, ll_rw_blk
354 * won't run the queue on return, resulting in hang. 354 * won't run the queue on return, resulting in hang.
355 * - Usually, back inserted requests won't be merged 355 * - Usually, back inserted requests won't be merged
356 * with anything. There's no point in delaying queue 356 * with anything. There's no point in delaying queue
357 * processing. 357 * processing.
358 */ 358 */
359 blk_remove_plug(q); 359 blk_remove_plug(q);
360 q->request_fn(q); 360 q->request_fn(q);
361 break; 361 break;
362 362
363 case ELEVATOR_INSERT_SORT: 363 case ELEVATOR_INSERT_SORT:
364 BUG_ON(!blk_fs_request(rq)); 364 BUG_ON(!blk_fs_request(rq));
365 rq->flags |= REQ_SORTED; 365 rq->flags |= REQ_SORTED;
366 q->nr_sorted++; 366 q->nr_sorted++;
367 if (q->last_merge == NULL && rq_mergeable(rq)) 367 if (q->last_merge == NULL && rq_mergeable(rq))
368 q->last_merge = rq; 368 q->last_merge = rq;
369 /* 369 /*
370 * Some ioscheds (cfq) run q->request_fn directly, so 370 * Some ioscheds (cfq) run q->request_fn directly, so
371 * rq cannot be accessed after calling 371 * rq cannot be accessed after calling
372 * elevator_add_req_fn. 372 * elevator_add_req_fn.
373 */ 373 */
374 q->elevator->ops->elevator_add_req_fn(q, rq); 374 q->elevator->ops->elevator_add_req_fn(q, rq);
375 break; 375 break;
376 376
377 case ELEVATOR_INSERT_REQUEUE: 377 case ELEVATOR_INSERT_REQUEUE:
378 /* 378 /*
379 * If ordered flush isn't in progress, we do front 379 * If ordered flush isn't in progress, we do front
380 * insertion; otherwise, requests should be requeued 380 * insertion; otherwise, requests should be requeued
381 * in ordseq order. 381 * in ordseq order.
382 */ 382 */
383 rq->flags |= REQ_SOFTBARRIER; 383 rq->flags |= REQ_SOFTBARRIER;
384 384
385 if (q->ordseq == 0) { 385 if (q->ordseq == 0) {
386 list_add(&rq->queuelist, &q->queue_head); 386 list_add(&rq->queuelist, &q->queue_head);
387 break; 387 break;
388 } 388 }
389 389
390 ordseq = blk_ordered_req_seq(rq); 390 ordseq = blk_ordered_req_seq(rq);
391 391
392 list_for_each(pos, &q->queue_head) { 392 list_for_each(pos, &q->queue_head) {
393 struct request *pos_rq = list_entry_rq(pos); 393 struct request *pos_rq = list_entry_rq(pos);
394 if (ordseq <= blk_ordered_req_seq(pos_rq)) 394 if (ordseq <= blk_ordered_req_seq(pos_rq))
395 break; 395 break;
396 } 396 }
397 397
398 list_add_tail(&rq->queuelist, pos); 398 list_add_tail(&rq->queuelist, pos);
399 break; 399 break;
400 400
401 default: 401 default:
402 printk(KERN_ERR "%s: bad insertion point %d\n", 402 printk(KERN_ERR "%s: bad insertion point %d\n",
403 __FUNCTION__, where); 403 __FUNCTION__, where);
404 BUG(); 404 BUG();
405 } 405 }
406 406
407 if (blk_queue_plugged(q)) { 407 if (blk_queue_plugged(q)) {
408 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 408 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
409 - q->in_flight; 409 - q->in_flight;
410 410
411 if (nrq >= q->unplug_thresh) 411 if (nrq >= q->unplug_thresh)
412 __generic_unplug_device(q); 412 __generic_unplug_device(q);
413 } 413 }
414 } 414 }
415 415
416 void __elv_add_request(request_queue_t *q, struct request *rq, int where, 416 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
417 int plug) 417 int plug)
418 { 418 {
419 if (q->ordcolor) 419 if (q->ordcolor)
420 rq->flags |= REQ_ORDERED_COLOR; 420 rq->flags |= REQ_ORDERED_COLOR;
421 421
422 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 422 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
423 /* 423 /*
424 * toggle ordered color 424 * toggle ordered color
425 */ 425 */
426 if (blk_barrier_rq(rq)) 426 if (blk_barrier_rq(rq))
427 q->ordcolor ^= 1; 427 q->ordcolor ^= 1;
428 428
429 /* 429 /*
430 * barriers implicitly indicate back insertion 430 * barriers implicitly indicate back insertion
431 */ 431 */
432 if (where == ELEVATOR_INSERT_SORT) 432 if (where == ELEVATOR_INSERT_SORT)
433 where = ELEVATOR_INSERT_BACK; 433 where = ELEVATOR_INSERT_BACK;
434 434
435 /* 435 /*
436 * this request is scheduling boundary, update 436 * this request is scheduling boundary, update
437 * end_sector 437 * end_sector
438 */ 438 */
439 if (blk_fs_request(rq)) { 439 if (blk_fs_request(rq)) {
440 q->end_sector = rq_end_sector(rq); 440 q->end_sector = rq_end_sector(rq);
441 q->boundary_rq = rq; 441 q->boundary_rq = rq;
442 } 442 }
443 } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 443 } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
444 where = ELEVATOR_INSERT_BACK; 444 where = ELEVATOR_INSERT_BACK;
445 445
446 if (plug) 446 if (plug)
447 blk_plug_device(q); 447 blk_plug_device(q);
448 448
449 elv_insert(q, rq, where); 449 elv_insert(q, rq, where);
450 } 450 }
451 451
452 void elv_add_request(request_queue_t *q, struct request *rq, int where, 452 void elv_add_request(request_queue_t *q, struct request *rq, int where,
453 int plug) 453 int plug)
454 { 454 {
455 unsigned long flags; 455 unsigned long flags;
456 456
457 spin_lock_irqsave(q->queue_lock, flags); 457 spin_lock_irqsave(q->queue_lock, flags);
458 __elv_add_request(q, rq, where, plug); 458 __elv_add_request(q, rq, where, plug);
459 spin_unlock_irqrestore(q->queue_lock, flags); 459 spin_unlock_irqrestore(q->queue_lock, flags);
460 } 460 }
461 461
462 static inline struct request *__elv_next_request(request_queue_t *q) 462 static inline struct request *__elv_next_request(request_queue_t *q)
463 { 463 {
464 struct request *rq; 464 struct request *rq;
465 465
466 while (1) { 466 while (1) {
467 while (!list_empty(&q->queue_head)) { 467 while (!list_empty(&q->queue_head)) {
468 rq = list_entry_rq(q->queue_head.next); 468 rq = list_entry_rq(q->queue_head.next);
469 if (blk_do_ordered(q, &rq)) 469 if (blk_do_ordered(q, &rq))
470 return rq; 470 return rq;
471 } 471 }
472 472
473 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 473 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
474 return NULL; 474 return NULL;
475 } 475 }
476 } 476 }
477 477
478 struct request *elv_next_request(request_queue_t *q) 478 struct request *elv_next_request(request_queue_t *q)
479 { 479 {
480 struct request *rq; 480 struct request *rq;
481 int ret; 481 int ret;
482 482
483 while ((rq = __elv_next_request(q)) != NULL) { 483 while ((rq = __elv_next_request(q)) != NULL) {
484 if (!(rq->flags & REQ_STARTED)) { 484 if (!(rq->flags & REQ_STARTED)) {
485 elevator_t *e = q->elevator; 485 elevator_t *e = q->elevator;
486 486
487 /* 487 /*
488 * This is the first time the device driver 488 * This is the first time the device driver
489 * sees this request (possibly after 489 * sees this request (possibly after
490 * requeueing). Notify IO scheduler. 490 * requeueing). Notify IO scheduler.
491 */ 491 */
492 if (blk_sorted_rq(rq) && 492 if (blk_sorted_rq(rq) &&
493 e->ops->elevator_activate_req_fn) 493 e->ops->elevator_activate_req_fn)
494 e->ops->elevator_activate_req_fn(q, rq); 494 e->ops->elevator_activate_req_fn(q, rq);
495 495
496 /* 496 /*
497 * just mark as started even if we don't start 497 * just mark as started even if we don't start
498 * it, a request that has been delayed should 498 * it, a request that has been delayed should
499 * not be passed by new incoming requests 499 * not be passed by new incoming requests
500 */ 500 */
501 rq->flags |= REQ_STARTED; 501 rq->flags |= REQ_STARTED;
502 } 502 }
503 503
504 if (!q->boundary_rq || q->boundary_rq == rq) { 504 if (!q->boundary_rq || q->boundary_rq == rq) {
505 q->end_sector = rq_end_sector(rq); 505 q->end_sector = rq_end_sector(rq);
506 q->boundary_rq = NULL; 506 q->boundary_rq = NULL;
507 } 507 }
508 508
509 if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) 509 if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
510 break; 510 break;
511 511
512 ret = q->prep_rq_fn(q, rq); 512 ret = q->prep_rq_fn(q, rq);
513 if (ret == BLKPREP_OK) { 513 if (ret == BLKPREP_OK) {
514 break; 514 break;
515 } else if (ret == BLKPREP_DEFER) { 515 } else if (ret == BLKPREP_DEFER) {
516 /* 516 /*
517 * the request may have been (partially) prepped. 517 * the request may have been (partially) prepped.
518 * we need to keep this request in the front to 518 * we need to keep this request in the front to
519 * avoid resource deadlock. REQ_STARTED will 519 * avoid resource deadlock. REQ_STARTED will
520 * prevent other fs requests from passing this one. 520 * prevent other fs requests from passing this one.
521 */ 521 */
522 rq = NULL; 522 rq = NULL;
523 break; 523 break;
524 } else if (ret == BLKPREP_KILL) { 524 } else if (ret == BLKPREP_KILL) {
525 int nr_bytes = rq->hard_nr_sectors << 9; 525 int nr_bytes = rq->hard_nr_sectors << 9;
526 526
527 if (!nr_bytes) 527 if (!nr_bytes)
528 nr_bytes = rq->data_len; 528 nr_bytes = rq->data_len;
529 529
530 blkdev_dequeue_request(rq); 530 blkdev_dequeue_request(rq);
531 rq->flags |= REQ_QUIET; 531 rq->flags |= REQ_QUIET;
532 end_that_request_chunk(rq, 0, nr_bytes); 532 end_that_request_chunk(rq, 0, nr_bytes);
533 end_that_request_last(rq, 0); 533 end_that_request_last(rq, 0);
534 } else { 534 } else {
535 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 535 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
536 ret); 536 ret);
537 break; 537 break;
538 } 538 }
539 } 539 }
540 540
541 return rq; 541 return rq;
542 } 542 }
543 543
544 void elv_dequeue_request(request_queue_t *q, struct request *rq) 544 void elv_dequeue_request(request_queue_t *q, struct request *rq)
545 { 545 {
546 BUG_ON(list_empty(&rq->queuelist)); 546 BUG_ON(list_empty(&rq->queuelist));
547 547
548 list_del_init(&rq->queuelist); 548 list_del_init(&rq->queuelist);
549 549
550 /* 550 /*
551 * the time frame between a request being removed from the lists 551 * the time frame between a request being removed from the lists
552 * and to it is freed is accounted as io that is in progress at 552 * and to it is freed is accounted as io that is in progress at
553 * the driver side. 553 * the driver side.
554 */ 554 */
555 if (blk_account_rq(rq)) 555 if (blk_account_rq(rq))
556 q->in_flight++; 556 q->in_flight++;
557 } 557 }
558 558
559 int elv_queue_empty(request_queue_t *q) 559 int elv_queue_empty(request_queue_t *q)
560 { 560 {
561 elevator_t *e = q->elevator; 561 elevator_t *e = q->elevator;
562 562
563 if (!list_empty(&q->queue_head)) 563 if (!list_empty(&q->queue_head))
564 return 0; 564 return 0;
565 565
566 if (e->ops->elevator_queue_empty_fn) 566 if (e->ops->elevator_queue_empty_fn)
567 return e->ops->elevator_queue_empty_fn(q); 567 return e->ops->elevator_queue_empty_fn(q);
568 568
569 return 1; 569 return 1;
570 } 570 }
571 571
572 struct request *elv_latter_request(request_queue_t *q, struct request *rq) 572 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
573 { 573 {
574 elevator_t *e = q->elevator; 574 elevator_t *e = q->elevator;
575 575
576 if (e->ops->elevator_latter_req_fn) 576 if (e->ops->elevator_latter_req_fn)
577 return e->ops->elevator_latter_req_fn(q, rq); 577 return e->ops->elevator_latter_req_fn(q, rq);
578 return NULL; 578 return NULL;
579 } 579 }
580 580
581 struct request *elv_former_request(request_queue_t *q, struct request *rq) 581 struct request *elv_former_request(request_queue_t *q, struct request *rq)
582 { 582 {
583 elevator_t *e = q->elevator; 583 elevator_t *e = q->elevator;
584 584
585 if (e->ops->elevator_former_req_fn) 585 if (e->ops->elevator_former_req_fn)
586 return e->ops->elevator_former_req_fn(q, rq); 586 return e->ops->elevator_former_req_fn(q, rq);
587 return NULL; 587 return NULL;
588 } 588 }
589 589
590 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 590 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
591 gfp_t gfp_mask) 591 gfp_t gfp_mask)
592 { 592 {
593 elevator_t *e = q->elevator; 593 elevator_t *e = q->elevator;
594 594
595 if (e->ops->elevator_set_req_fn) 595 if (e->ops->elevator_set_req_fn)
596 return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); 596 return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
597 597
598 rq->elevator_private = NULL; 598 rq->elevator_private = NULL;
599 return 0; 599 return 0;
600 } 600 }
601 601
602 void elv_put_request(request_queue_t *q, struct request *rq) 602 void elv_put_request(request_queue_t *q, struct request *rq)
603 { 603 {
604 elevator_t *e = q->elevator; 604 elevator_t *e = q->elevator;
605 605
606 if (e->ops->elevator_put_req_fn) 606 if (e->ops->elevator_put_req_fn)
607 e->ops->elevator_put_req_fn(q, rq); 607 e->ops->elevator_put_req_fn(q, rq);
608 } 608 }
609 609
610 int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) 610 int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
611 { 611 {
612 elevator_t *e = q->elevator; 612 elevator_t *e = q->elevator;
613 613
614 if (e->ops->elevator_may_queue_fn) 614 if (e->ops->elevator_may_queue_fn)
615 return e->ops->elevator_may_queue_fn(q, rw, bio); 615 return e->ops->elevator_may_queue_fn(q, rw, bio);
616 616
617 return ELV_MQUEUE_MAY; 617 return ELV_MQUEUE_MAY;
618 } 618 }
619 619
620 void elv_completed_request(request_queue_t *q, struct request *rq) 620 void elv_completed_request(request_queue_t *q, struct request *rq)
621 { 621 {
622 elevator_t *e = q->elevator; 622 elevator_t *e = q->elevator;
623 623
624 /* 624 /*
625 * request is released from the driver, io must be done 625 * request is released from the driver, io must be done
626 */ 626 */
627 if (blk_account_rq(rq)) { 627 if (blk_account_rq(rq)) {
628 q->in_flight--; 628 q->in_flight--;
629 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 629 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
630 e->ops->elevator_completed_req_fn(q, rq); 630 e->ops->elevator_completed_req_fn(q, rq);
631 } 631 }
632 632
633 /* 633 /*
634 * Check if the queue is waiting for fs requests to be 634 * Check if the queue is waiting for fs requests to be
635 * drained for flush sequence. 635 * drained for flush sequence.
636 */ 636 */
637 if (unlikely(q->ordseq)) { 637 if (unlikely(q->ordseq)) {
638 struct request *first_rq = list_entry_rq(q->queue_head.next); 638 struct request *first_rq = list_entry_rq(q->queue_head.next);
639 if (q->in_flight == 0 && 639 if (q->in_flight == 0 &&
640 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 640 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
641 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 641 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
642 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 642 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
643 q->request_fn(q); 643 q->request_fn(q);
644 } 644 }
645 } 645 }
646 } 646 }
647 647
648 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 648 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
649 649
650 static ssize_t 650 static ssize_t
651 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 651 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
652 { 652 {
653 elevator_t *e = container_of(kobj, elevator_t, kobj); 653 elevator_t *e = container_of(kobj, elevator_t, kobj);
654 struct elv_fs_entry *entry = to_elv(attr); 654 struct elv_fs_entry *entry = to_elv(attr);
655 ssize_t error; 655 ssize_t error;
656 656
657 if (!entry->show) 657 if (!entry->show)
658 return -EIO; 658 return -EIO;
659 659
660 mutex_lock(&e->sysfs_lock); 660 mutex_lock(&e->sysfs_lock);
661 error = e->ops ? entry->show(e, page) : -ENOENT; 661 error = e->ops ? entry->show(e, page) : -ENOENT;
662 mutex_unlock(&e->sysfs_lock); 662 mutex_unlock(&e->sysfs_lock);
663 return error; 663 return error;
664 } 664 }
665 665
666 static ssize_t 666 static ssize_t
667 elv_attr_store(struct kobject *kobj, struct attribute *attr, 667 elv_attr_store(struct kobject *kobj, struct attribute *attr,
668 const char *page, size_t length) 668 const char *page, size_t length)
669 { 669 {
670 elevator_t *e = container_of(kobj, elevator_t, kobj); 670 elevator_t *e = container_of(kobj, elevator_t, kobj);
671 struct elv_fs_entry *entry = to_elv(attr); 671 struct elv_fs_entry *entry = to_elv(attr);
672 ssize_t error; 672 ssize_t error;
673 673
674 if (!entry->store) 674 if (!entry->store)
675 return -EIO; 675 return -EIO;
676 676
677 mutex_lock(&e->sysfs_lock); 677 mutex_lock(&e->sysfs_lock);
678 error = e->ops ? entry->store(e, page, length) : -ENOENT; 678 error = e->ops ? entry->store(e, page, length) : -ENOENT;
679 mutex_unlock(&e->sysfs_lock); 679 mutex_unlock(&e->sysfs_lock);
680 return error; 680 return error;
681 } 681 }
682 682
683 static struct sysfs_ops elv_sysfs_ops = { 683 static struct sysfs_ops elv_sysfs_ops = {
684 .show = elv_attr_show, 684 .show = elv_attr_show,
685 .store = elv_attr_store, 685 .store = elv_attr_store,
686 }; 686 };
687 687
688 static struct kobj_type elv_ktype = { 688 static struct kobj_type elv_ktype = {
689 .sysfs_ops = &elv_sysfs_ops, 689 .sysfs_ops = &elv_sysfs_ops,
690 .release = elevator_release, 690 .release = elevator_release,
691 }; 691 };
692 692
693 int elv_register_queue(struct request_queue *q) 693 int elv_register_queue(struct request_queue *q)
694 { 694 {
695 elevator_t *e = q->elevator; 695 elevator_t *e = q->elevator;
696 int error; 696 int error;
697 697
698 e->kobj.parent = &q->kobj; 698 e->kobj.parent = &q->kobj;
699 699
700 error = kobject_add(&e->kobj); 700 error = kobject_add(&e->kobj);
701 if (!error) { 701 if (!error) {
702 struct attribute **attr = e->elevator_type->elevator_attrs; 702 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
703 if (attr) { 703 if (attr) {
704 while (*attr) { 704 while (attr->attr.name) {
705 if (sysfs_create_file(&e->kobj,*attr++)) 705 if (sysfs_create_file(&e->kobj, &attr->attr))
706 break; 706 break;
707 attr++;
707 } 708 }
708 } 709 }
709 kobject_uevent(&e->kobj, KOBJ_ADD); 710 kobject_uevent(&e->kobj, KOBJ_ADD);
710 } 711 }
711 return error; 712 return error;
712 } 713 }
713 714
714 void elv_unregister_queue(struct request_queue *q) 715 void elv_unregister_queue(struct request_queue *q)
715 { 716 {
716 if (q) { 717 if (q) {
717 elevator_t *e = q->elevator; 718 elevator_t *e = q->elevator;
718 kobject_uevent(&e->kobj, KOBJ_REMOVE); 719 kobject_uevent(&e->kobj, KOBJ_REMOVE);
719 kobject_del(&e->kobj); 720 kobject_del(&e->kobj);
720 } 721 }
721 } 722 }
722 723
723 int elv_register(struct elevator_type *e) 724 int elv_register(struct elevator_type *e)
724 { 725 {
725 spin_lock_irq(&elv_list_lock); 726 spin_lock_irq(&elv_list_lock);
726 if (elevator_find(e->elevator_name)) 727 if (elevator_find(e->elevator_name))
727 BUG(); 728 BUG();
728 list_add_tail(&e->list, &elv_list); 729 list_add_tail(&e->list, &elv_list);
729 spin_unlock_irq(&elv_list_lock); 730 spin_unlock_irq(&elv_list_lock);
730 731
731 printk(KERN_INFO "io scheduler %s registered", e->elevator_name); 732 printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
732 if (!strcmp(e->elevator_name, chosen_elevator) || 733 if (!strcmp(e->elevator_name, chosen_elevator) ||
733 (!*chosen_elevator && 734 (!*chosen_elevator &&
734 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 735 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
735 printk(" (default)"); 736 printk(" (default)");
736 printk("\n"); 737 printk("\n");
737 return 0; 738 return 0;
738 } 739 }
739 EXPORT_SYMBOL_GPL(elv_register); 740 EXPORT_SYMBOL_GPL(elv_register);
740 741
741 void elv_unregister(struct elevator_type *e) 742 void elv_unregister(struct elevator_type *e)
742 { 743 {
743 struct task_struct *g, *p; 744 struct task_struct *g, *p;
744 745
745 /* 746 /*
746 * Iterate every thread in the process to remove the io contexts. 747 * Iterate every thread in the process to remove the io contexts.
747 */ 748 */
748 if (e->ops.trim) { 749 if (e->ops.trim) {
749 read_lock(&tasklist_lock); 750 read_lock(&tasklist_lock);
750 do_each_thread(g, p) { 751 do_each_thread(g, p) {
751 task_lock(p); 752 task_lock(p);
752 e->ops.trim(p->io_context); 753 e->ops.trim(p->io_context);
753 task_unlock(p); 754 task_unlock(p);
754 } while_each_thread(g, p); 755 } while_each_thread(g, p);
755 read_unlock(&tasklist_lock); 756 read_unlock(&tasklist_lock);
756 } 757 }
757 758
758 spin_lock_irq(&elv_list_lock); 759 spin_lock_irq(&elv_list_lock);
759 list_del_init(&e->list); 760 list_del_init(&e->list);
760 spin_unlock_irq(&elv_list_lock); 761 spin_unlock_irq(&elv_list_lock);
761 } 762 }
762 EXPORT_SYMBOL_GPL(elv_unregister); 763 EXPORT_SYMBOL_GPL(elv_unregister);
763 764
764 /* 765 /*
765 * switch to new_e io scheduler. be careful not to introduce deadlocks - 766 * switch to new_e io scheduler. be careful not to introduce deadlocks -
766 * we don't free the old io scheduler, before we have allocated what we 767 * we don't free the old io scheduler, before we have allocated what we
767 * need for the new one. this way we have a chance of going back to the old 768 * need for the new one. this way we have a chance of going back to the old
768 * one, if the new one fails init for some reason. 769 * one, if the new one fails init for some reason.
769 */ 770 */
770 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) 771 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
771 { 772 {
772 elevator_t *old_elevator, *e; 773 elevator_t *old_elevator, *e;
773 774
774 /* 775 /*
775 * Allocate new elevator 776 * Allocate new elevator
776 */ 777 */
777 e = elevator_alloc(new_e); 778 e = elevator_alloc(new_e);
778 if (!e) 779 if (!e)
779 return 0; 780 return 0;
780 781
781 /* 782 /*
782 * Turn on BYPASS and drain all requests w/ elevator private data 783 * Turn on BYPASS and drain all requests w/ elevator private data
783 */ 784 */
784 spin_lock_irq(q->queue_lock); 785 spin_lock_irq(q->queue_lock);
785 786
786 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 787 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
787 788
788 elv_drain_elevator(q); 789 elv_drain_elevator(q);
789 790
790 while (q->rq.elvpriv) { 791 while (q->rq.elvpriv) {
791 blk_remove_plug(q); 792 blk_remove_plug(q);
792 q->request_fn(q); 793 q->request_fn(q);
793 spin_unlock_irq(q->queue_lock); 794 spin_unlock_irq(q->queue_lock);
794 msleep(10); 795 msleep(10);
795 spin_lock_irq(q->queue_lock); 796 spin_lock_irq(q->queue_lock);
796 elv_drain_elevator(q); 797 elv_drain_elevator(q);
797 } 798 }
798 799
799 spin_unlock_irq(q->queue_lock); 800 spin_unlock_irq(q->queue_lock);
800 801
801 /* 802 /*
802 * unregister old elevator data 803 * unregister old elevator data
803 */ 804 */
804 elv_unregister_queue(q); 805 elv_unregister_queue(q);
805 old_elevator = q->elevator; 806 old_elevator = q->elevator;
806 807
807 /* 808 /*
808 * attach and start new elevator 809 * attach and start new elevator
809 */ 810 */
810 if (elevator_attach(q, e)) 811 if (elevator_attach(q, e))
811 goto fail; 812 goto fail;
812 813
813 if (elv_register_queue(q)) 814 if (elv_register_queue(q))
814 goto fail_register; 815 goto fail_register;
815 816
816 /* 817 /*
817 * finally exit old elevator and turn off BYPASS. 818 * finally exit old elevator and turn off BYPASS.
818 */ 819 */
819 elevator_exit(old_elevator); 820 elevator_exit(old_elevator);
820 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 821 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
821 return 1; 822 return 1;
822 823
823 fail_register: 824 fail_register:
824 /* 825 /*
825 * switch failed, exit the new io scheduler and reattach the old 826 * switch failed, exit the new io scheduler and reattach the old
826 * one again (along with re-adding the sysfs dir) 827 * one again (along with re-adding the sysfs dir)
827 */ 828 */
828 elevator_exit(e); 829 elevator_exit(e);
829 e = NULL; 830 e = NULL;
830 fail: 831 fail:
831 q->elevator = old_elevator; 832 q->elevator = old_elevator;
832 elv_register_queue(q); 833 elv_register_queue(q);
833 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 834 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
834 if (e) 835 if (e)
835 kobject_put(&e->kobj); 836 kobject_put(&e->kobj);
836 return 0; 837 return 0;
837 } 838 }
838 839
839 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) 840 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
840 { 841 {
841 char elevator_name[ELV_NAME_MAX]; 842 char elevator_name[ELV_NAME_MAX];
842 size_t len; 843 size_t len;
843 struct elevator_type *e; 844 struct elevator_type *e;
844 845
845 elevator_name[sizeof(elevator_name) - 1] = '\0'; 846 elevator_name[sizeof(elevator_name) - 1] = '\0';
846 strncpy(elevator_name, name, sizeof(elevator_name) - 1); 847 strncpy(elevator_name, name, sizeof(elevator_name) - 1);
847 len = strlen(elevator_name); 848 len = strlen(elevator_name);
848 849
849 if (len && elevator_name[len - 1] == '\n') 850 if (len && elevator_name[len - 1] == '\n')
850 elevator_name[len - 1] = '\0'; 851 elevator_name[len - 1] = '\0';
851 852
852 e = elevator_get(elevator_name); 853 e = elevator_get(elevator_name);
853 if (!e) { 854 if (!e) {
854 printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 855 printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
855 return -EINVAL; 856 return -EINVAL;
856 } 857 }
857 858
858 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { 859 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
859 elevator_put(e); 860 elevator_put(e);
860 return count; 861 return count;
861 } 862 }
862 863
863 if (!elevator_switch(q, e)) 864 if (!elevator_switch(q, e))
864 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); 865 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
865 return count; 866 return count;
866 } 867 }
867 868
868 ssize_t elv_iosched_show(request_queue_t *q, char *name) 869 ssize_t elv_iosched_show(request_queue_t *q, char *name)
869 { 870 {
870 elevator_t *e = q->elevator; 871 elevator_t *e = q->elevator;
871 struct elevator_type *elv = e->elevator_type; 872 struct elevator_type *elv = e->elevator_type;
872 struct list_head *entry; 873 struct list_head *entry;
873 int len = 0; 874 int len = 0;
874 875
875 spin_lock_irq(q->queue_lock); 876 spin_lock_irq(q->queue_lock);
876 list_for_each(entry, &elv_list) { 877 list_for_each(entry, &elv_list) {
877 struct elevator_type *__e; 878 struct elevator_type *__e;
878 879
879 __e = list_entry(entry, struct elevator_type, list); 880 __e = list_entry(entry, struct elevator_type, list);
880 if (!strcmp(elv->elevator_name, __e->elevator_name)) 881 if (!strcmp(elv->elevator_name, __e->elevator_name))
881 len += sprintf(name+len, "[%s] ", elv->elevator_name); 882 len += sprintf(name+len, "[%s] ", elv->elevator_name);
882 else 883 else
883 len += sprintf(name+len, "%s ", __e->elevator_name); 884 len += sprintf(name+len, "%s ", __e->elevator_name);
884 } 885 }
885 spin_unlock_irq(q->queue_lock); 886 spin_unlock_irq(q->queue_lock);
886 887
887 len += sprintf(len+name, "\n"); 888 len += sprintf(len+name, "\n");
888 return len; 889 return len;
889 } 890 }
890 891
891 EXPORT_SYMBOL(elv_dispatch_sort); 892 EXPORT_SYMBOL(elv_dispatch_sort);
892 EXPORT_SYMBOL(elv_add_request); 893 EXPORT_SYMBOL(elv_add_request);
893 EXPORT_SYMBOL(__elv_add_request); 894 EXPORT_SYMBOL(__elv_add_request);
894 EXPORT_SYMBOL(elv_requeue_request); 895 EXPORT_SYMBOL(elv_requeue_request);
895 EXPORT_SYMBOL(elv_next_request); 896 EXPORT_SYMBOL(elv_next_request);
896 EXPORT_SYMBOL(elv_dequeue_request); 897 EXPORT_SYMBOL(elv_dequeue_request);
897 EXPORT_SYMBOL(elv_queue_empty); 898 EXPORT_SYMBOL(elv_queue_empty);
898 EXPORT_SYMBOL(elv_completed_request); 899 EXPORT_SYMBOL(elv_completed_request);
899 EXPORT_SYMBOL(elevator_exit); 900 EXPORT_SYMBOL(elevator_exit);
900 EXPORT_SYMBOL(elevator_init); 901 EXPORT_SYMBOL(elevator_init);
901 902
include/linux/elevator.h
1 #ifndef _LINUX_ELEVATOR_H 1 #ifndef _LINUX_ELEVATOR_H
2 #define _LINUX_ELEVATOR_H 2 #define _LINUX_ELEVATOR_H
3 3
4 typedef int (elevator_merge_fn) (request_queue_t *, struct request **, 4 typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
5 struct bio *); 5 struct bio *);
6 6
7 typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *); 7 typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *);
8 8
9 typedef void (elevator_merged_fn) (request_queue_t *, struct request *); 9 typedef void (elevator_merged_fn) (request_queue_t *, struct request *);
10 10
11 typedef int (elevator_dispatch_fn) (request_queue_t *, int); 11 typedef int (elevator_dispatch_fn) (request_queue_t *, int);
12 12
13 typedef void (elevator_add_req_fn) (request_queue_t *, struct request *); 13 typedef void (elevator_add_req_fn) (request_queue_t *, struct request *);
14 typedef int (elevator_queue_empty_fn) (request_queue_t *); 14 typedef int (elevator_queue_empty_fn) (request_queue_t *);
15 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); 15 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
16 typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); 16 typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
17 typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); 17 typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *);
18 18
19 typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t); 19 typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t);
20 typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); 20 typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
21 typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *); 21 typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *);
22 typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); 22 typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
23 23
24 typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); 24 typedef int (elevator_init_fn) (request_queue_t *, elevator_t *);
25 typedef void (elevator_exit_fn) (elevator_t *); 25 typedef void (elevator_exit_fn) (elevator_t *);
26 26
27 struct elevator_ops 27 struct elevator_ops
28 { 28 {
29 elevator_merge_fn *elevator_merge_fn; 29 elevator_merge_fn *elevator_merge_fn;
30 elevator_merged_fn *elevator_merged_fn; 30 elevator_merged_fn *elevator_merged_fn;
31 elevator_merge_req_fn *elevator_merge_req_fn; 31 elevator_merge_req_fn *elevator_merge_req_fn;
32 32
33 elevator_dispatch_fn *elevator_dispatch_fn; 33 elevator_dispatch_fn *elevator_dispatch_fn;
34 elevator_add_req_fn *elevator_add_req_fn; 34 elevator_add_req_fn *elevator_add_req_fn;
35 elevator_activate_req_fn *elevator_activate_req_fn; 35 elevator_activate_req_fn *elevator_activate_req_fn;
36 elevator_deactivate_req_fn *elevator_deactivate_req_fn; 36 elevator_deactivate_req_fn *elevator_deactivate_req_fn;
37 37
38 elevator_queue_empty_fn *elevator_queue_empty_fn; 38 elevator_queue_empty_fn *elevator_queue_empty_fn;
39 elevator_completed_req_fn *elevator_completed_req_fn; 39 elevator_completed_req_fn *elevator_completed_req_fn;
40 40
41 elevator_request_list_fn *elevator_former_req_fn; 41 elevator_request_list_fn *elevator_former_req_fn;
42 elevator_request_list_fn *elevator_latter_req_fn; 42 elevator_request_list_fn *elevator_latter_req_fn;
43 43
44 elevator_set_req_fn *elevator_set_req_fn; 44 elevator_set_req_fn *elevator_set_req_fn;
45 elevator_put_req_fn *elevator_put_req_fn; 45 elevator_put_req_fn *elevator_put_req_fn;
46 46
47 elevator_may_queue_fn *elevator_may_queue_fn; 47 elevator_may_queue_fn *elevator_may_queue_fn;
48 48
49 elevator_init_fn *elevator_init_fn; 49 elevator_init_fn *elevator_init_fn;
50 elevator_exit_fn *elevator_exit_fn; 50 elevator_exit_fn *elevator_exit_fn;
51 void (*trim)(struct io_context *); 51 void (*trim)(struct io_context *);
52 }; 52 };
53 53
54 #define ELV_NAME_MAX (16) 54 #define ELV_NAME_MAX (16)
55 55
56 struct elv_fs_entry {
57 struct attribute attr;
58 ssize_t (*show)(elevator_t *, char *);
59 ssize_t (*store)(elevator_t *, const char *, size_t);
60 };
61
56 /* 62 /*
57 * identifies an elevator type, such as AS or deadline 63 * identifies an elevator type, such as AS or deadline
58 */ 64 */
59 struct elevator_type 65 struct elevator_type
60 { 66 {
61 struct list_head list; 67 struct list_head list;
62 struct elevator_ops ops; 68 struct elevator_ops ops;
63 struct elevator_type *elevator_type; 69 struct elevator_type *elevator_type;
64 struct attribute **elevator_attrs; 70 struct elv_fs_entry *elevator_attrs;
65 char elevator_name[ELV_NAME_MAX]; 71 char elevator_name[ELV_NAME_MAX];
66 struct module *elevator_owner; 72 struct module *elevator_owner;
67 }; 73 };
68 74
69 /* 75 /*
70 * each queue has an elevator_queue associated with it 76 * each queue has an elevator_queue associated with it
71 */ 77 */
72 struct elevator_queue 78 struct elevator_queue
73 { 79 {
74 struct elevator_ops *ops; 80 struct elevator_ops *ops;
75 void *elevator_data; 81 void *elevator_data;
76 struct kobject kobj; 82 struct kobject kobj;
77 struct elevator_type *elevator_type; 83 struct elevator_type *elevator_type;
78 struct mutex sysfs_lock; 84 struct mutex sysfs_lock;
79 }; 85 };
80 86
81 /* 87 /*
82 * block elevator interface 88 * block elevator interface
83 */ 89 */
84 extern void elv_dispatch_sort(request_queue_t *, struct request *); 90 extern void elv_dispatch_sort(request_queue_t *, struct request *);
85 extern void elv_add_request(request_queue_t *, struct request *, int, int); 91 extern void elv_add_request(request_queue_t *, struct request *, int, int);
86 extern void __elv_add_request(request_queue_t *, struct request *, int, int); 92 extern void __elv_add_request(request_queue_t *, struct request *, int, int);
87 extern void elv_insert(request_queue_t *, struct request *, int); 93 extern void elv_insert(request_queue_t *, struct request *, int);
88 extern int elv_merge(request_queue_t *, struct request **, struct bio *); 94 extern int elv_merge(request_queue_t *, struct request **, struct bio *);
89 extern void elv_merge_requests(request_queue_t *, struct request *, 95 extern void elv_merge_requests(request_queue_t *, struct request *,
90 struct request *); 96 struct request *);
91 extern void elv_merged_request(request_queue_t *, struct request *); 97 extern void elv_merged_request(request_queue_t *, struct request *);
92 extern void elv_dequeue_request(request_queue_t *, struct request *); 98 extern void elv_dequeue_request(request_queue_t *, struct request *);
93 extern void elv_requeue_request(request_queue_t *, struct request *); 99 extern void elv_requeue_request(request_queue_t *, struct request *);
94 extern int elv_queue_empty(request_queue_t *); 100 extern int elv_queue_empty(request_queue_t *);
95 extern struct request *elv_next_request(struct request_queue *q); 101 extern struct request *elv_next_request(struct request_queue *q);
96 extern struct request *elv_former_request(request_queue_t *, struct request *); 102 extern struct request *elv_former_request(request_queue_t *, struct request *);
97 extern struct request *elv_latter_request(request_queue_t *, struct request *); 103 extern struct request *elv_latter_request(request_queue_t *, struct request *);
98 extern int elv_register_queue(request_queue_t *q); 104 extern int elv_register_queue(request_queue_t *q);
99 extern void elv_unregister_queue(request_queue_t *q); 105 extern void elv_unregister_queue(request_queue_t *q);
100 extern int elv_may_queue(request_queue_t *, int, struct bio *); 106 extern int elv_may_queue(request_queue_t *, int, struct bio *);
101 extern void elv_completed_request(request_queue_t *, struct request *); 107 extern void elv_completed_request(request_queue_t *, struct request *);
102 extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t); 108 extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t);
103 extern void elv_put_request(request_queue_t *, struct request *); 109 extern void elv_put_request(request_queue_t *, struct request *);
104 110
105 /* 111 /*
106 * io scheduler registration 112 * io scheduler registration
107 */ 113 */
108 extern int elv_register(struct elevator_type *); 114 extern int elv_register(struct elevator_type *);
109 extern void elv_unregister(struct elevator_type *); 115 extern void elv_unregister(struct elevator_type *);
110 116
111 /* 117 /*
112 * io scheduler sysfs switching 118 * io scheduler sysfs switching
113 */ 119 */
114 extern ssize_t elv_iosched_show(request_queue_t *, char *); 120 extern ssize_t elv_iosched_show(request_queue_t *, char *);
115 extern ssize_t elv_iosched_store(request_queue_t *, const char *, size_t); 121 extern ssize_t elv_iosched_store(request_queue_t *, const char *, size_t);
116 122
117 extern int elevator_init(request_queue_t *, char *); 123 extern int elevator_init(request_queue_t *, char *);
118 extern void elevator_exit(elevator_t *); 124 extern void elevator_exit(elevator_t *);
119 extern int elv_rq_merge_ok(struct request *, struct bio *); 125 extern int elv_rq_merge_ok(struct request *, struct bio *);
120 126
121 /* 127 /*
122 * Return values from elevator merger 128 * Return values from elevator merger
123 */ 129 */
124 #define ELEVATOR_NO_MERGE 0 130 #define ELEVATOR_NO_MERGE 0
125 #define ELEVATOR_FRONT_MERGE 1 131 #define ELEVATOR_FRONT_MERGE 1
126 #define ELEVATOR_BACK_MERGE 2 132 #define ELEVATOR_BACK_MERGE 2
127 133
128 /* 134 /*
129 * Insertion selection 135 * Insertion selection
130 */ 136 */
131 #define ELEVATOR_INSERT_FRONT 1 137 #define ELEVATOR_INSERT_FRONT 1
132 #define ELEVATOR_INSERT_BACK 2 138 #define ELEVATOR_INSERT_BACK 2
133 #define ELEVATOR_INSERT_SORT 3 139 #define ELEVATOR_INSERT_SORT 3
134 #define ELEVATOR_INSERT_REQUEUE 4 140 #define ELEVATOR_INSERT_REQUEUE 4
135 141
136 /* 142 /*
137 * return values from elevator_may_queue_fn 143 * return values from elevator_may_queue_fn
138 */ 144 */
139 enum { 145 enum {
140 ELV_MQUEUE_MAY, 146 ELV_MQUEUE_MAY,
141 ELV_MQUEUE_NO, 147 ELV_MQUEUE_NO,
142 ELV_MQUEUE_MUST, 148 ELV_MQUEUE_MUST,
143 };
144
145 struct elv_fs_entry {
146 struct attribute attr;
147 ssize_t (*show)(elevator_t *, char *);
148 ssize_t (*store)(elevator_t *, const char *, size_t);