Commit c1b707d253fe918b92882cff1dbd926b47e14fd2

Authored by Jens Axboe
Committed by Linus Torvalds
1 parent 0261d6886e

[PATCH] CFQ: bad locking in changed_ioprio()

When the ioprio code recently got juggled a bit, a bug was introduced.
changed_ioprio() is no longer called with interrupts disabled, so using
plain spin_lock() on the queue_lock is a bug.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 1 changed file with 3 additions and 2 deletions Inline Diff

1 /* 1 /*
2 * CFQ, or complete fairness queueing, disk scheduler. 2 * CFQ, or complete fairness queueing, disk scheduler.
3 * 3 *
4 * Based on ideas from a previously unfinished io 4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 * 6 *
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 */ 8 */
9 #include <linux/module.h> 9 #include <linux/module.h>
10 #include <linux/blkdev.h> 10 #include <linux/blkdev.h>
11 #include <linux/elevator.h> 11 #include <linux/elevator.h>
12 #include <linux/hash.h> 12 #include <linux/hash.h>
13 #include <linux/rbtree.h> 13 #include <linux/rbtree.h>
14 #include <linux/ioprio.h> 14 #include <linux/ioprio.h>
15 15
16 /* 16 /*
17 * tunables 17 * tunables
18 */ 18 */
19 static const int cfq_quantum = 4; /* max queue in one round of service */ 19 static const int cfq_quantum = 4; /* max queue in one round of service */
20 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 20 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
21 static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 21 static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
22 static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ 22 static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
23 23
24 static const int cfq_slice_sync = HZ / 10; 24 static const int cfq_slice_sync = HZ / 10;
25 static int cfq_slice_async = HZ / 25; 25 static int cfq_slice_async = HZ / 25;
26 static const int cfq_slice_async_rq = 2; 26 static const int cfq_slice_async_rq = 2;
27 static int cfq_slice_idle = HZ / 125; 27 static int cfq_slice_idle = HZ / 125;
28 28
29 #define CFQ_IDLE_GRACE (HZ / 10) 29 #define CFQ_IDLE_GRACE (HZ / 10)
30 #define CFQ_SLICE_SCALE (5) 30 #define CFQ_SLICE_SCALE (5)
31 31
32 #define CFQ_KEY_ASYNC (0) 32 #define CFQ_KEY_ASYNC (0)
33 33
34 /* 34 /*
35 * for the hash of cfqq inside the cfqd 35 * for the hash of cfqq inside the cfqd
36 */ 36 */
37 #define CFQ_QHASH_SHIFT 6 37 #define CFQ_QHASH_SHIFT 6
38 #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) 38 #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
39 #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) 39 #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
40 40
41 #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) 41 #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
42 42
43 #define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private) 43 #define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private)
44 #define RQ_CFQQ(rq) ((rq)->elevator_private2) 44 #define RQ_CFQQ(rq) ((rq)->elevator_private2)
45 45
46 static kmem_cache_t *cfq_pool; 46 static kmem_cache_t *cfq_pool;
47 static kmem_cache_t *cfq_ioc_pool; 47 static kmem_cache_t *cfq_ioc_pool;
48 48
49 static DEFINE_PER_CPU(unsigned long, ioc_count); 49 static DEFINE_PER_CPU(unsigned long, ioc_count);
50 static struct completion *ioc_gone; 50 static struct completion *ioc_gone;
51 51
52 #define CFQ_PRIO_LISTS IOPRIO_BE_NR 52 #define CFQ_PRIO_LISTS IOPRIO_BE_NR
53 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 53 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
54 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 54 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
55 55
56 #define ASYNC (0) 56 #define ASYNC (0)
57 #define SYNC (1) 57 #define SYNC (1)
58 58
59 #define cfq_cfqq_dispatched(cfqq) \ 59 #define cfq_cfqq_dispatched(cfqq) \
60 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC]) 60 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])
61 61
62 #define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) 62 #define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)
63 63
64 #define cfq_cfqq_sync(cfqq) \ 64 #define cfq_cfqq_sync(cfqq) \
65 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) 65 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
66 66
67 #define sample_valid(samples) ((samples) > 80) 67 #define sample_valid(samples) ((samples) > 80)
68 68
69 /* 69 /*
70 * Per block device queue structure 70 * Per block device queue structure
71 */ 71 */
72 struct cfq_data { 72 struct cfq_data {
73 request_queue_t *queue; 73 request_queue_t *queue;
74 74
75 /* 75 /*
76 * rr list of queues with requests and the count of them 76 * rr list of queues with requests and the count of them
77 */ 77 */
78 struct list_head rr_list[CFQ_PRIO_LISTS]; 78 struct list_head rr_list[CFQ_PRIO_LISTS];
79 struct list_head busy_rr; 79 struct list_head busy_rr;
80 struct list_head cur_rr; 80 struct list_head cur_rr;
81 struct list_head idle_rr; 81 struct list_head idle_rr;
82 unsigned int busy_queues; 82 unsigned int busy_queues;
83 83
84 /* 84 /*
85 * cfqq lookup hash 85 * cfqq lookup hash
86 */ 86 */
87 struct hlist_head *cfq_hash; 87 struct hlist_head *cfq_hash;
88 88
89 int rq_in_driver; 89 int rq_in_driver;
90 int hw_tag; 90 int hw_tag;
91 91
92 /* 92 /*
93 * idle window management 93 * idle window management
94 */ 94 */
95 struct timer_list idle_slice_timer; 95 struct timer_list idle_slice_timer;
96 struct work_struct unplug_work; 96 struct work_struct unplug_work;
97 97
98 struct cfq_queue *active_queue; 98 struct cfq_queue *active_queue;
99 struct cfq_io_context *active_cic; 99 struct cfq_io_context *active_cic;
100 int cur_prio, cur_end_prio; 100 int cur_prio, cur_end_prio;
101 unsigned int dispatch_slice; 101 unsigned int dispatch_slice;
102 102
103 struct timer_list idle_class_timer; 103 struct timer_list idle_class_timer;
104 104
105 sector_t last_sector; 105 sector_t last_sector;
106 unsigned long last_end_request; 106 unsigned long last_end_request;
107 107
108 /* 108 /*
109 * tunables, see top of file 109 * tunables, see top of file
110 */ 110 */
111 unsigned int cfq_quantum; 111 unsigned int cfq_quantum;
112 unsigned int cfq_fifo_expire[2]; 112 unsigned int cfq_fifo_expire[2];
113 unsigned int cfq_back_penalty; 113 unsigned int cfq_back_penalty;
114 unsigned int cfq_back_max; 114 unsigned int cfq_back_max;
115 unsigned int cfq_slice[2]; 115 unsigned int cfq_slice[2];
116 unsigned int cfq_slice_async_rq; 116 unsigned int cfq_slice_async_rq;
117 unsigned int cfq_slice_idle; 117 unsigned int cfq_slice_idle;
118 118
119 struct list_head cic_list; 119 struct list_head cic_list;
120 }; 120 };
121 121
122 /* 122 /*
123 * Per process-grouping structure 123 * Per process-grouping structure
124 */ 124 */
125 struct cfq_queue { 125 struct cfq_queue {
126 /* reference count */ 126 /* reference count */
127 atomic_t ref; 127 atomic_t ref;
128 /* parent cfq_data */ 128 /* parent cfq_data */
129 struct cfq_data *cfqd; 129 struct cfq_data *cfqd;
130 /* cfqq lookup hash */ 130 /* cfqq lookup hash */
131 struct hlist_node cfq_hash; 131 struct hlist_node cfq_hash;
132 /* hash key */ 132 /* hash key */
133 unsigned int key; 133 unsigned int key;
134 /* member of the rr/busy/cur/idle cfqd list */ 134 /* member of the rr/busy/cur/idle cfqd list */
135 struct list_head cfq_list; 135 struct list_head cfq_list;
136 /* sorted list of pending requests */ 136 /* sorted list of pending requests */
137 struct rb_root sort_list; 137 struct rb_root sort_list;
138 /* if fifo isn't expired, next request to serve */ 138 /* if fifo isn't expired, next request to serve */
139 struct request *next_rq; 139 struct request *next_rq;
140 /* requests queued in sort_list */ 140 /* requests queued in sort_list */
141 int queued[2]; 141 int queued[2];
142 /* currently allocated requests */ 142 /* currently allocated requests */
143 int allocated[2]; 143 int allocated[2];
144 /* pending metadata requests */ 144 /* pending metadata requests */
145 int meta_pending; 145 int meta_pending;
146 /* fifo list of requests in sort_list */ 146 /* fifo list of requests in sort_list */
147 struct list_head fifo; 147 struct list_head fifo;
148 148
149 unsigned long slice_start; 149 unsigned long slice_start;
150 unsigned long slice_end; 150 unsigned long slice_end;
151 unsigned long slice_left; 151 unsigned long slice_left;
152 152
153 /* number of requests that are on the dispatch list */ 153 /* number of requests that are on the dispatch list */
154 int on_dispatch[2]; 154 int on_dispatch[2];
155 155
156 /* io prio of this group */ 156 /* io prio of this group */
157 unsigned short ioprio, org_ioprio; 157 unsigned short ioprio, org_ioprio;
158 unsigned short ioprio_class, org_ioprio_class; 158 unsigned short ioprio_class, org_ioprio_class;
159 159
160 /* various state flags, see below */ 160 /* various state flags, see below */
161 unsigned int flags; 161 unsigned int flags;
162 }; 162 };
163 163
164 enum cfqq_state_flags { 164 enum cfqq_state_flags {
165 CFQ_CFQQ_FLAG_on_rr = 0, 165 CFQ_CFQQ_FLAG_on_rr = 0,
166 CFQ_CFQQ_FLAG_wait_request, 166 CFQ_CFQQ_FLAG_wait_request,
167 CFQ_CFQQ_FLAG_must_alloc, 167 CFQ_CFQQ_FLAG_must_alloc,
168 CFQ_CFQQ_FLAG_must_alloc_slice, 168 CFQ_CFQQ_FLAG_must_alloc_slice,
169 CFQ_CFQQ_FLAG_must_dispatch, 169 CFQ_CFQQ_FLAG_must_dispatch,
170 CFQ_CFQQ_FLAG_fifo_expire, 170 CFQ_CFQQ_FLAG_fifo_expire,
171 CFQ_CFQQ_FLAG_idle_window, 171 CFQ_CFQQ_FLAG_idle_window,
172 CFQ_CFQQ_FLAG_prio_changed, 172 CFQ_CFQQ_FLAG_prio_changed,
173 CFQ_CFQQ_FLAG_queue_new, 173 CFQ_CFQQ_FLAG_queue_new,
174 }; 174 };
175 175
176 #define CFQ_CFQQ_FNS(name) \ 176 #define CFQ_CFQQ_FNS(name) \
177 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ 177 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
178 { \ 178 { \
179 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ 179 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
180 } \ 180 } \
181 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ 181 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
182 { \ 182 { \
183 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ 183 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
184 } \ 184 } \
185 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ 185 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
186 { \ 186 { \
187 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ 187 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
188 } 188 }
189 189
190 CFQ_CFQQ_FNS(on_rr); 190 CFQ_CFQQ_FNS(on_rr);
191 CFQ_CFQQ_FNS(wait_request); 191 CFQ_CFQQ_FNS(wait_request);
192 CFQ_CFQQ_FNS(must_alloc); 192 CFQ_CFQQ_FNS(must_alloc);
193 CFQ_CFQQ_FNS(must_alloc_slice); 193 CFQ_CFQQ_FNS(must_alloc_slice);
194 CFQ_CFQQ_FNS(must_dispatch); 194 CFQ_CFQQ_FNS(must_dispatch);
195 CFQ_CFQQ_FNS(fifo_expire); 195 CFQ_CFQQ_FNS(fifo_expire);
196 CFQ_CFQQ_FNS(idle_window); 196 CFQ_CFQQ_FNS(idle_window);
197 CFQ_CFQQ_FNS(prio_changed); 197 CFQ_CFQQ_FNS(prio_changed);
198 CFQ_CFQQ_FNS(queue_new); 198 CFQ_CFQQ_FNS(queue_new);
199 #undef CFQ_CFQQ_FNS 199 #undef CFQ_CFQQ_FNS
200 200
201 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); 201 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
202 static void cfq_dispatch_insert(request_queue_t *, struct request *); 202 static void cfq_dispatch_insert(request_queue_t *, struct request *);
203 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); 203 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
204 204
205 /* 205 /*
206 * scheduler run of queue, if there are requests pending and no one in the 206 * scheduler run of queue, if there are requests pending and no one in the
207 * driver that will restart queueing 207 * driver that will restart queueing
208 */ 208 */
209 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 209 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
210 { 210 {
211 if (cfqd->busy_queues) 211 if (cfqd->busy_queues)
212 kblockd_schedule_work(&cfqd->unplug_work); 212 kblockd_schedule_work(&cfqd->unplug_work);
213 } 213 }
214 214
215 static int cfq_queue_empty(request_queue_t *q) 215 static int cfq_queue_empty(request_queue_t *q)
216 { 216 {
217 struct cfq_data *cfqd = q->elevator->elevator_data; 217 struct cfq_data *cfqd = q->elevator->elevator_data;
218 218
219 return !cfqd->busy_queues; 219 return !cfqd->busy_queues;
220 } 220 }
221 221
222 static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) 222 static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
223 { 223 {
224 if (rw == READ || rw == WRITE_SYNC) 224 if (rw == READ || rw == WRITE_SYNC)
225 return task->pid; 225 return task->pid;
226 226
227 return CFQ_KEY_ASYNC; 227 return CFQ_KEY_ASYNC;
228 } 228 }
229 229
230 /* 230 /*
231 * Lifted from AS - choose which of rq1 and rq2 that is best served now. 231 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
232 * We choose the request that is closest to the head right now. Distance 232 * We choose the request that is closest to the head right now. Distance
233 * behind the head is penalized and only allowed to a certain extent. 233 * behind the head is penalized and only allowed to a certain extent.
234 */ 234 */
235 static struct request * 235 static struct request *
236 cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2) 236 cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
237 { 237 {
238 sector_t last, s1, s2, d1 = 0, d2 = 0; 238 sector_t last, s1, s2, d1 = 0, d2 = 0;
239 unsigned long back_max; 239 unsigned long back_max;
240 #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ 240 #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */
241 #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ 241 #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
242 unsigned wrap = 0; /* bit mask: requests behind the disk head? */ 242 unsigned wrap = 0; /* bit mask: requests behind the disk head? */
243 243
244 if (rq1 == NULL || rq1 == rq2) 244 if (rq1 == NULL || rq1 == rq2)
245 return rq2; 245 return rq2;
246 if (rq2 == NULL) 246 if (rq2 == NULL)
247 return rq1; 247 return rq1;
248 248
249 if (rq_is_sync(rq1) && !rq_is_sync(rq2)) 249 if (rq_is_sync(rq1) && !rq_is_sync(rq2))
250 return rq1; 250 return rq1;
251 else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) 251 else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
252 return rq2; 252 return rq2;
253 if (rq_is_meta(rq1) && !rq_is_meta(rq2)) 253 if (rq_is_meta(rq1) && !rq_is_meta(rq2))
254 return rq1; 254 return rq1;
255 else if (rq_is_meta(rq2) && !rq_is_meta(rq1)) 255 else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
256 return rq2; 256 return rq2;
257 257
258 s1 = rq1->sector; 258 s1 = rq1->sector;
259 s2 = rq2->sector; 259 s2 = rq2->sector;
260 260
261 last = cfqd->last_sector; 261 last = cfqd->last_sector;
262 262
263 /* 263 /*
264 * by definition, 1KiB is 2 sectors 264 * by definition, 1KiB is 2 sectors
265 */ 265 */
266 back_max = cfqd->cfq_back_max * 2; 266 back_max = cfqd->cfq_back_max * 2;
267 267
268 /* 268 /*
269 * Strict one way elevator _except_ in the case where we allow 269 * Strict one way elevator _except_ in the case where we allow
270 * short backward seeks which are biased as twice the cost of a 270 * short backward seeks which are biased as twice the cost of a
271 * similar forward seek. 271 * similar forward seek.
272 */ 272 */
273 if (s1 >= last) 273 if (s1 >= last)
274 d1 = s1 - last; 274 d1 = s1 - last;
275 else if (s1 + back_max >= last) 275 else if (s1 + back_max >= last)
276 d1 = (last - s1) * cfqd->cfq_back_penalty; 276 d1 = (last - s1) * cfqd->cfq_back_penalty;
277 else 277 else
278 wrap |= CFQ_RQ1_WRAP; 278 wrap |= CFQ_RQ1_WRAP;
279 279
280 if (s2 >= last) 280 if (s2 >= last)
281 d2 = s2 - last; 281 d2 = s2 - last;
282 else if (s2 + back_max >= last) 282 else if (s2 + back_max >= last)
283 d2 = (last - s2) * cfqd->cfq_back_penalty; 283 d2 = (last - s2) * cfqd->cfq_back_penalty;
284 else 284 else
285 wrap |= CFQ_RQ2_WRAP; 285 wrap |= CFQ_RQ2_WRAP;
286 286
287 /* Found required data */ 287 /* Found required data */
288 288
289 /* 289 /*
290 * By doing switch() on the bit mask "wrap" we avoid having to 290 * By doing switch() on the bit mask "wrap" we avoid having to
291 * check two variables for all permutations: --> faster! 291 * check two variables for all permutations: --> faster!
292 */ 292 */
293 switch (wrap) { 293 switch (wrap) {
294 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ 294 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
295 if (d1 < d2) 295 if (d1 < d2)
296 return rq1; 296 return rq1;
297 else if (d2 < d1) 297 else if (d2 < d1)
298 return rq2; 298 return rq2;
299 else { 299 else {
300 if (s1 >= s2) 300 if (s1 >= s2)
301 return rq1; 301 return rq1;
302 else 302 else
303 return rq2; 303 return rq2;
304 } 304 }
305 305
306 case CFQ_RQ2_WRAP: 306 case CFQ_RQ2_WRAP:
307 return rq1; 307 return rq1;
308 case CFQ_RQ1_WRAP: 308 case CFQ_RQ1_WRAP:
309 return rq2; 309 return rq2;
310 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */ 310 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
311 default: 311 default:
312 /* 312 /*
313 * Since both rqs are wrapped, 313 * Since both rqs are wrapped,
314 * start with the one that's further behind head 314 * start with the one that's further behind head
315 * (--> only *one* back seek required), 315 * (--> only *one* back seek required),
316 * since back seek takes more time than forward. 316 * since back seek takes more time than forward.
317 */ 317 */
318 if (s1 <= s2) 318 if (s1 <= s2)
319 return rq1; 319 return rq1;
320 else 320 else
321 return rq2; 321 return rq2;
322 } 322 }
323 } 323 }
324 324
325 /* 325 /*
326 * would be nice to take fifo expire time into account as well 326 * would be nice to take fifo expire time into account as well
327 */ 327 */
328 static struct request * 328 static struct request *
329 cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 329 cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
330 struct request *last) 330 struct request *last)
331 { 331 {
332 struct rb_node *rbnext = rb_next(&last->rb_node); 332 struct rb_node *rbnext = rb_next(&last->rb_node);
333 struct rb_node *rbprev = rb_prev(&last->rb_node); 333 struct rb_node *rbprev = rb_prev(&last->rb_node);
334 struct request *next = NULL, *prev = NULL; 334 struct request *next = NULL, *prev = NULL;
335 335
336 BUG_ON(RB_EMPTY_NODE(&last->rb_node)); 336 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
337 337
338 if (rbprev) 338 if (rbprev)
339 prev = rb_entry_rq(rbprev); 339 prev = rb_entry_rq(rbprev);
340 340
341 if (rbnext) 341 if (rbnext)
342 next = rb_entry_rq(rbnext); 342 next = rb_entry_rq(rbnext);
343 else { 343 else {
344 rbnext = rb_first(&cfqq->sort_list); 344 rbnext = rb_first(&cfqq->sort_list);
345 if (rbnext && rbnext != &last->rb_node) 345 if (rbnext && rbnext != &last->rb_node)
346 next = rb_entry_rq(rbnext); 346 next = rb_entry_rq(rbnext);
347 } 347 }
348 348
349 return cfq_choose_req(cfqd, next, prev); 349 return cfq_choose_req(cfqd, next, prev);
350 } 350 }
351 351
352 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) 352 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
353 { 353 {
354 struct cfq_data *cfqd = cfqq->cfqd; 354 struct cfq_data *cfqd = cfqq->cfqd;
355 struct list_head *list; 355 struct list_head *list;
356 356
357 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 357 BUG_ON(!cfq_cfqq_on_rr(cfqq));
358 358
359 list_del(&cfqq->cfq_list); 359 list_del(&cfqq->cfq_list);
360 360
361 if (cfq_class_rt(cfqq)) 361 if (cfq_class_rt(cfqq))
362 list = &cfqd->cur_rr; 362 list = &cfqd->cur_rr;
363 else if (cfq_class_idle(cfqq)) 363 else if (cfq_class_idle(cfqq))
364 list = &cfqd->idle_rr; 364 list = &cfqd->idle_rr;
365 else { 365 else {
366 /* 366 /*
367 * if cfqq has requests in flight, don't allow it to be 367 * if cfqq has requests in flight, don't allow it to be
368 * found in cfq_set_active_queue before it has finished them. 368 * found in cfq_set_active_queue before it has finished them.
369 * this is done to increase fairness between a process that 369 * this is done to increase fairness between a process that
370 * has lots of io pending vs one that only generates one 370 * has lots of io pending vs one that only generates one
371 * sporadically or synchronously 371 * sporadically or synchronously
372 */ 372 */
373 if (cfq_cfqq_dispatched(cfqq)) 373 if (cfq_cfqq_dispatched(cfqq))
374 list = &cfqd->busy_rr; 374 list = &cfqd->busy_rr;
375 else 375 else
376 list = &cfqd->rr_list[cfqq->ioprio]; 376 list = &cfqd->rr_list[cfqq->ioprio];
377 } 377 }
378 378
379 /* 379 /*
380 * If this queue was preempted or is new (never been serviced), let 380 * If this queue was preempted or is new (never been serviced), let
381 * it be added first for fairness but beind other new queues. 381 * it be added first for fairness but beind other new queues.
382 * Otherwise, just add to the back of the list. 382 * Otherwise, just add to the back of the list.
383 */ 383 */
384 if (preempted || cfq_cfqq_queue_new(cfqq)) { 384 if (preempted || cfq_cfqq_queue_new(cfqq)) {
385 struct list_head *n = list; 385 struct list_head *n = list;
386 struct cfq_queue *__cfqq; 386 struct cfq_queue *__cfqq;
387 387
388 while (n->next != list) { 388 while (n->next != list) {
389 __cfqq = list_entry_cfqq(n->next); 389 __cfqq = list_entry_cfqq(n->next);
390 if (!cfq_cfqq_queue_new(__cfqq)) 390 if (!cfq_cfqq_queue_new(__cfqq))
391 break; 391 break;
392 392
393 n = n->next; 393 n = n->next;
394 } 394 }
395 395
396 list = n; 396 list = n;
397 } 397 }
398 398
399 list_add_tail(&cfqq->cfq_list, list); 399 list_add_tail(&cfqq->cfq_list, list);
400 } 400 }
401 401
402 /* 402 /*
403 * add to busy list of queues for service, trying to be fair in ordering 403 * add to busy list of queues for service, trying to be fair in ordering
404 * the pending list according to last request service 404 * the pending list according to last request service
405 */ 405 */
406 static inline void 406 static inline void
407 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 407 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
408 { 408 {
409 BUG_ON(cfq_cfqq_on_rr(cfqq)); 409 BUG_ON(cfq_cfqq_on_rr(cfqq));
410 cfq_mark_cfqq_on_rr(cfqq); 410 cfq_mark_cfqq_on_rr(cfqq);
411 cfqd->busy_queues++; 411 cfqd->busy_queues++;
412 412
413 cfq_resort_rr_list(cfqq, 0); 413 cfq_resort_rr_list(cfqq, 0);
414 } 414 }
415 415
416 static inline void 416 static inline void
417 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 417 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
418 { 418 {
419 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 419 BUG_ON(!cfq_cfqq_on_rr(cfqq));
420 cfq_clear_cfqq_on_rr(cfqq); 420 cfq_clear_cfqq_on_rr(cfqq);
421 list_del_init(&cfqq->cfq_list); 421 list_del_init(&cfqq->cfq_list);
422 422
423 BUG_ON(!cfqd->busy_queues); 423 BUG_ON(!cfqd->busy_queues);
424 cfqd->busy_queues--; 424 cfqd->busy_queues--;
425 } 425 }
426 426
427 /* 427 /*
428 * rb tree support functions 428 * rb tree support functions
429 */ 429 */
430 static inline void cfq_del_rq_rb(struct request *rq) 430 static inline void cfq_del_rq_rb(struct request *rq)
431 { 431 {
432 struct cfq_queue *cfqq = RQ_CFQQ(rq); 432 struct cfq_queue *cfqq = RQ_CFQQ(rq);
433 struct cfq_data *cfqd = cfqq->cfqd; 433 struct cfq_data *cfqd = cfqq->cfqd;
434 const int sync = rq_is_sync(rq); 434 const int sync = rq_is_sync(rq);
435 435
436 BUG_ON(!cfqq->queued[sync]); 436 BUG_ON(!cfqq->queued[sync]);
437 cfqq->queued[sync]--; 437 cfqq->queued[sync]--;
438 438
439 elv_rb_del(&cfqq->sort_list, rq); 439 elv_rb_del(&cfqq->sort_list, rq);
440 440
441 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) 441 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
442 cfq_del_cfqq_rr(cfqd, cfqq); 442 cfq_del_cfqq_rr(cfqd, cfqq);
443 } 443 }
444 444
445 static void cfq_add_rq_rb(struct request *rq) 445 static void cfq_add_rq_rb(struct request *rq)
446 { 446 {
447 struct cfq_queue *cfqq = RQ_CFQQ(rq); 447 struct cfq_queue *cfqq = RQ_CFQQ(rq);
448 struct cfq_data *cfqd = cfqq->cfqd; 448 struct cfq_data *cfqd = cfqq->cfqd;
449 struct request *__alias; 449 struct request *__alias;
450 450
451 cfqq->queued[rq_is_sync(rq)]++; 451 cfqq->queued[rq_is_sync(rq)]++;
452 452
453 /* 453 /*
454 * looks a little odd, but the first insert might return an alias. 454 * looks a little odd, but the first insert might return an alias.
455 * if that happens, put the alias on the dispatch list 455 * if that happens, put the alias on the dispatch list
456 */ 456 */
457 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) 457 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
458 cfq_dispatch_insert(cfqd->queue, __alias); 458 cfq_dispatch_insert(cfqd->queue, __alias);
459 } 459 }
460 460
461 static inline void 461 static inline void
462 cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) 462 cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
463 { 463 {
464 elv_rb_del(&cfqq->sort_list, rq); 464 elv_rb_del(&cfqq->sort_list, rq);
465 cfqq->queued[rq_is_sync(rq)]--; 465 cfqq->queued[rq_is_sync(rq)]--;
466 cfq_add_rq_rb(rq); 466 cfq_add_rq_rb(rq);
467 } 467 }
468 468
469 static struct request * 469 static struct request *
470 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) 470 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
471 { 471 {
472 struct task_struct *tsk = current; 472 struct task_struct *tsk = current;
473 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio)); 473 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
474 struct cfq_queue *cfqq; 474 struct cfq_queue *cfqq;
475 475
476 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); 476 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
477 if (cfqq) { 477 if (cfqq) {
478 sector_t sector = bio->bi_sector + bio_sectors(bio); 478 sector_t sector = bio->bi_sector + bio_sectors(bio);
479 479
480 return elv_rb_find(&cfqq->sort_list, sector); 480 return elv_rb_find(&cfqq->sort_list, sector);
481 } 481 }
482 482
483 return NULL; 483 return NULL;
484 } 484 }
485 485
486 static void cfq_activate_request(request_queue_t *q, struct request *rq) 486 static void cfq_activate_request(request_queue_t *q, struct request *rq)
487 { 487 {
488 struct cfq_data *cfqd = q->elevator->elevator_data; 488 struct cfq_data *cfqd = q->elevator->elevator_data;
489 489
490 cfqd->rq_in_driver++; 490 cfqd->rq_in_driver++;
491 491
492 /* 492 /*
493 * If the depth is larger 1, it really could be queueing. But lets 493 * If the depth is larger 1, it really could be queueing. But lets
494 * make the mark a little higher - idling could still be good for 494 * make the mark a little higher - idling could still be good for
495 * low queueing, and a low queueing number could also just indicate 495 * low queueing, and a low queueing number could also just indicate
496 * a SCSI mid layer like behaviour where limit+1 is often seen. 496 * a SCSI mid layer like behaviour where limit+1 is often seen.
497 */ 497 */
498 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) 498 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
499 cfqd->hw_tag = 1; 499 cfqd->hw_tag = 1;
500 } 500 }
501 501
502 static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 502 static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
503 { 503 {
504 struct cfq_data *cfqd = q->elevator->elevator_data; 504 struct cfq_data *cfqd = q->elevator->elevator_data;
505 505
506 WARN_ON(!cfqd->rq_in_driver); 506 WARN_ON(!cfqd->rq_in_driver);
507 cfqd->rq_in_driver--; 507 cfqd->rq_in_driver--;
508 } 508 }
509 509
510 static void cfq_remove_request(struct request *rq) 510 static void cfq_remove_request(struct request *rq)
511 { 511 {
512 struct cfq_queue *cfqq = RQ_CFQQ(rq); 512 struct cfq_queue *cfqq = RQ_CFQQ(rq);
513 513
514 if (cfqq->next_rq == rq) 514 if (cfqq->next_rq == rq)
515 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq); 515 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
516 516
517 list_del_init(&rq->queuelist); 517 list_del_init(&rq->queuelist);
518 cfq_del_rq_rb(rq); 518 cfq_del_rq_rb(rq);
519 519
520 if (rq_is_meta(rq)) { 520 if (rq_is_meta(rq)) {
521 WARN_ON(!cfqq->meta_pending); 521 WARN_ON(!cfqq->meta_pending);
522 cfqq->meta_pending--; 522 cfqq->meta_pending--;
523 } 523 }
524 } 524 }
525 525
526 static int 526 static int
527 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) 527 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
528 { 528 {
529 struct cfq_data *cfqd = q->elevator->elevator_data; 529 struct cfq_data *cfqd = q->elevator->elevator_data;
530 struct request *__rq; 530 struct request *__rq;
531 531
532 __rq = cfq_find_rq_fmerge(cfqd, bio); 532 __rq = cfq_find_rq_fmerge(cfqd, bio);
533 if (__rq && elv_rq_merge_ok(__rq, bio)) { 533 if (__rq && elv_rq_merge_ok(__rq, bio)) {
534 *req = __rq; 534 *req = __rq;
535 return ELEVATOR_FRONT_MERGE; 535 return ELEVATOR_FRONT_MERGE;
536 } 536 }
537 537
538 return ELEVATOR_NO_MERGE; 538 return ELEVATOR_NO_MERGE;
539 } 539 }
540 540
541 static void cfq_merged_request(request_queue_t *q, struct request *req, 541 static void cfq_merged_request(request_queue_t *q, struct request *req,
542 int type) 542 int type)
543 { 543 {
544 if (type == ELEVATOR_FRONT_MERGE) { 544 if (type == ELEVATOR_FRONT_MERGE) {
545 struct cfq_queue *cfqq = RQ_CFQQ(req); 545 struct cfq_queue *cfqq = RQ_CFQQ(req);
546 546
547 cfq_reposition_rq_rb(cfqq, req); 547 cfq_reposition_rq_rb(cfqq, req);
548 } 548 }
549 } 549 }
550 550
551 static void 551 static void
552 cfq_merged_requests(request_queue_t *q, struct request *rq, 552 cfq_merged_requests(request_queue_t *q, struct request *rq,
553 struct request *next) 553 struct request *next)
554 { 554 {
555 /* 555 /*
556 * reposition in fifo if next is older than rq 556 * reposition in fifo if next is older than rq
557 */ 557 */
558 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 558 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
559 time_before(next->start_time, rq->start_time)) 559 time_before(next->start_time, rq->start_time))
560 list_move(&rq->queuelist, &next->queuelist); 560 list_move(&rq->queuelist, &next->queuelist);
561 561
562 cfq_remove_request(next); 562 cfq_remove_request(next);
563 } 563 }
564 564
565 static inline void 565 static inline void
566 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 566 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
567 { 567 {
568 if (cfqq) { 568 if (cfqq) {
569 /* 569 /*
570 * stop potential idle class queues waiting service 570 * stop potential idle class queues waiting service
571 */ 571 */
572 del_timer(&cfqd->idle_class_timer); 572 del_timer(&cfqd->idle_class_timer);
573 573
574 cfqq->slice_start = jiffies; 574 cfqq->slice_start = jiffies;
575 cfqq->slice_end = 0; 575 cfqq->slice_end = 0;
576 cfqq->slice_left = 0; 576 cfqq->slice_left = 0;
577 cfq_clear_cfqq_must_alloc_slice(cfqq); 577 cfq_clear_cfqq_must_alloc_slice(cfqq);
578 cfq_clear_cfqq_fifo_expire(cfqq); 578 cfq_clear_cfqq_fifo_expire(cfqq);
579 } 579 }
580 580
581 cfqd->active_queue = cfqq; 581 cfqd->active_queue = cfqq;
582 } 582 }
583 583
584 /* 584 /*
585 * current cfqq expired its slice (or was too idle), select new one 585 * current cfqq expired its slice (or was too idle), select new one
586 */ 586 */
587 static void 587 static void
588 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 588 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
589 int preempted) 589 int preempted)
590 { 590 {
591 unsigned long now = jiffies; 591 unsigned long now = jiffies;
592 592
593 if (cfq_cfqq_wait_request(cfqq)) 593 if (cfq_cfqq_wait_request(cfqq))
594 del_timer(&cfqd->idle_slice_timer); 594 del_timer(&cfqd->idle_slice_timer);
595 595
596 if (!preempted && !cfq_cfqq_dispatched(cfqq)) 596 if (!preempted && !cfq_cfqq_dispatched(cfqq))
597 cfq_schedule_dispatch(cfqd); 597 cfq_schedule_dispatch(cfqd);
598 598
599 cfq_clear_cfqq_must_dispatch(cfqq); 599 cfq_clear_cfqq_must_dispatch(cfqq);
600 cfq_clear_cfqq_wait_request(cfqq); 600 cfq_clear_cfqq_wait_request(cfqq);
601 cfq_clear_cfqq_queue_new(cfqq); 601 cfq_clear_cfqq_queue_new(cfqq);
602 602
603 /* 603 /*
604 * store what was left of this slice, if the queue idled out 604 * store what was left of this slice, if the queue idled out
605 * or was preempted 605 * or was preempted
606 */ 606 */
607 if (time_after(cfqq->slice_end, now)) 607 if (time_after(cfqq->slice_end, now))
608 cfqq->slice_left = cfqq->slice_end - now; 608 cfqq->slice_left = cfqq->slice_end - now;
609 else 609 else
610 cfqq->slice_left = 0; 610 cfqq->slice_left = 0;
611 611
612 if (cfq_cfqq_on_rr(cfqq)) 612 if (cfq_cfqq_on_rr(cfqq))
613 cfq_resort_rr_list(cfqq, preempted); 613 cfq_resort_rr_list(cfqq, preempted);
614 614
615 if (cfqq == cfqd->active_queue) 615 if (cfqq == cfqd->active_queue)
616 cfqd->active_queue = NULL; 616 cfqd->active_queue = NULL;
617 617
618 if (cfqd->active_cic) { 618 if (cfqd->active_cic) {
619 put_io_context(cfqd->active_cic->ioc); 619 put_io_context(cfqd->active_cic->ioc);
620 cfqd->active_cic = NULL; 620 cfqd->active_cic = NULL;
621 } 621 }
622 622
623 cfqd->dispatch_slice = 0; 623 cfqd->dispatch_slice = 0;
624 } 624 }
625 625
626 static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) 626 static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
627 { 627 {
628 struct cfq_queue *cfqq = cfqd->active_queue; 628 struct cfq_queue *cfqq = cfqd->active_queue;
629 629
630 if (cfqq) 630 if (cfqq)
631 __cfq_slice_expired(cfqd, cfqq, preempted); 631 __cfq_slice_expired(cfqd, cfqq, preempted);
632 } 632 }
633 633
634 /* 634 /*
635 * 0 635 * 0
636 * 0,1 636 * 0,1
637 * 0,1,2 637 * 0,1,2
638 * 0,1,2,3 638 * 0,1,2,3
639 * 0,1,2,3,4 639 * 0,1,2,3,4
640 * 0,1,2,3,4,5 640 * 0,1,2,3,4,5
641 * 0,1,2,3,4,5,6 641 * 0,1,2,3,4,5,6
642 * 0,1,2,3,4,5,6,7 642 * 0,1,2,3,4,5,6,7
643 */ 643 */
644 static int cfq_get_next_prio_level(struct cfq_data *cfqd) 644 static int cfq_get_next_prio_level(struct cfq_data *cfqd)
645 { 645 {
646 int prio, wrap; 646 int prio, wrap;
647 647
648 prio = -1; 648 prio = -1;
649 wrap = 0; 649 wrap = 0;
650 do { 650 do {
651 int p; 651 int p;
652 652
653 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { 653 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
654 if (!list_empty(&cfqd->rr_list[p])) { 654 if (!list_empty(&cfqd->rr_list[p])) {
655 prio = p; 655 prio = p;
656 break; 656 break;
657 } 657 }
658 } 658 }
659 659
660 if (prio != -1) 660 if (prio != -1)
661 break; 661 break;
662 cfqd->cur_prio = 0; 662 cfqd->cur_prio = 0;
663 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 663 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
664 cfqd->cur_end_prio = 0; 664 cfqd->cur_end_prio = 0;
665 if (wrap) 665 if (wrap)
666 break; 666 break;
667 wrap = 1; 667 wrap = 1;
668 } 668 }
669 } while (1); 669 } while (1);
670 670
671 if (unlikely(prio == -1)) 671 if (unlikely(prio == -1))
672 return -1; 672 return -1;
673 673
674 BUG_ON(prio >= CFQ_PRIO_LISTS); 674 BUG_ON(prio >= CFQ_PRIO_LISTS);
675 675
676 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); 676 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
677 677
678 cfqd->cur_prio = prio + 1; 678 cfqd->cur_prio = prio + 1;
679 if (cfqd->cur_prio > cfqd->cur_end_prio) { 679 if (cfqd->cur_prio > cfqd->cur_end_prio) {
680 cfqd->cur_end_prio = cfqd->cur_prio; 680 cfqd->cur_end_prio = cfqd->cur_prio;
681 cfqd->cur_prio = 0; 681 cfqd->cur_prio = 0;
682 } 682 }
683 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 683 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
684 cfqd->cur_prio = 0; 684 cfqd->cur_prio = 0;
685 cfqd->cur_end_prio = 0; 685 cfqd->cur_end_prio = 0;
686 } 686 }
687 687
688 return prio; 688 return prio;
689 } 689 }
690 690
691 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 691 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
692 { 692 {
693 struct cfq_queue *cfqq = NULL; 693 struct cfq_queue *cfqq = NULL;
694 694
695 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) { 695 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) {
696 /* 696 /*
697 * if current list is non-empty, grab first entry. if it is 697 * if current list is non-empty, grab first entry. if it is
698 * empty, get next prio level and grab first entry then if any 698 * empty, get next prio level and grab first entry then if any
699 * are spliced 699 * are spliced
700 */ 700 */
701 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 701 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
702 } else if (!list_empty(&cfqd->busy_rr)) { 702 } else if (!list_empty(&cfqd->busy_rr)) {
703 /* 703 /*
704 * If no new queues are available, check if the busy list has 704 * If no new queues are available, check if the busy list has
705 * some before falling back to idle io. 705 * some before falling back to idle io.
706 */ 706 */
707 cfqq = list_entry_cfqq(cfqd->busy_rr.next); 707 cfqq = list_entry_cfqq(cfqd->busy_rr.next);
708 } else if (!list_empty(&cfqd->idle_rr)) { 708 } else if (!list_empty(&cfqd->idle_rr)) {
709 /* 709 /*
710 * if we have idle queues and no rt or be queues had pending 710 * if we have idle queues and no rt or be queues had pending
711 * requests, either allow immediate service if the grace period 711 * requests, either allow immediate service if the grace period
712 * has passed or arm the idle grace timer 712 * has passed or arm the idle grace timer
713 */ 713 */
714 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; 714 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
715 715
716 if (time_after_eq(jiffies, end)) 716 if (time_after_eq(jiffies, end))
717 cfqq = list_entry_cfqq(cfqd->idle_rr.next); 717 cfqq = list_entry_cfqq(cfqd->idle_rr.next);
718 else 718 else
719 mod_timer(&cfqd->idle_class_timer, end); 719 mod_timer(&cfqd->idle_class_timer, end);
720 } 720 }
721 721
722 __cfq_set_active_queue(cfqd, cfqq); 722 __cfq_set_active_queue(cfqd, cfqq);
723 return cfqq; 723 return cfqq;
724 } 724 }
725 725
726 #define CIC_SEEKY(cic) ((cic)->seek_mean > (128 * 1024)) 726 #define CIC_SEEKY(cic) ((cic)->seek_mean > (128 * 1024))
727 727
728 static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) 728 static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
729 729
730 { 730 {
731 struct cfq_io_context *cic; 731 struct cfq_io_context *cic;
732 unsigned long sl; 732 unsigned long sl;
733 733
734 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); 734 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
735 WARN_ON(cfqq != cfqd->active_queue); 735 WARN_ON(cfqq != cfqd->active_queue);
736 736
737 /* 737 /*
738 * idle is disabled, either manually or by past process history 738 * idle is disabled, either manually or by past process history
739 */ 739 */
740 if (!cfqd->cfq_slice_idle) 740 if (!cfqd->cfq_slice_idle)
741 return 0; 741 return 0;
742 if (!cfq_cfqq_idle_window(cfqq)) 742 if (!cfq_cfqq_idle_window(cfqq))
743 return 0; 743 return 0;
744 /* 744 /*
745 * task has exited, don't wait 745 * task has exited, don't wait
746 */ 746 */
747 cic = cfqd->active_cic; 747 cic = cfqd->active_cic;
748 if (!cic || !cic->ioc->task) 748 if (!cic || !cic->ioc->task)
749 return 0; 749 return 0;
750 750
751 cfq_mark_cfqq_must_dispatch(cfqq); 751 cfq_mark_cfqq_must_dispatch(cfqq);
752 cfq_mark_cfqq_wait_request(cfqq); 752 cfq_mark_cfqq_wait_request(cfqq);
753 753
754 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); 754 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
755 755
756 /* 756 /*
757 * we don't want to idle for seeks, but we do want to allow 757 * we don't want to idle for seeks, but we do want to allow
758 * fair distribution of slice time for a process doing back-to-back 758 * fair distribution of slice time for a process doing back-to-back
759 * seeks. so allow a little bit of time for him to submit a new rq 759 * seeks. so allow a little bit of time for him to submit a new rq
760 */ 760 */
761 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) 761 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
762 sl = min(sl, msecs_to_jiffies(2)); 762 sl = min(sl, msecs_to_jiffies(2));
763 763
764 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 764 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
765 return 1; 765 return 1;
766 } 766 }
767 767
768 static void cfq_dispatch_insert(request_queue_t *q, struct request *rq) 768 static void cfq_dispatch_insert(request_queue_t *q, struct request *rq)
769 { 769 {
770 struct cfq_data *cfqd = q->elevator->elevator_data; 770 struct cfq_data *cfqd = q->elevator->elevator_data;
771 struct cfq_queue *cfqq = RQ_CFQQ(rq); 771 struct cfq_queue *cfqq = RQ_CFQQ(rq);
772 772
773 cfq_remove_request(rq); 773 cfq_remove_request(rq);
774 cfqq->on_dispatch[rq_is_sync(rq)]++; 774 cfqq->on_dispatch[rq_is_sync(rq)]++;
775 elv_dispatch_sort(q, rq); 775 elv_dispatch_sort(q, rq);
776 776
777 rq = list_entry(q->queue_head.prev, struct request, queuelist); 777 rq = list_entry(q->queue_head.prev, struct request, queuelist);
778 cfqd->last_sector = rq->sector + rq->nr_sectors; 778 cfqd->last_sector = rq->sector + rq->nr_sectors;
779 } 779 }
780 780
781 /* 781 /*
782 * return expired entry, or NULL to just start from scratch in rbtree 782 * return expired entry, or NULL to just start from scratch in rbtree
783 */ 783 */
784 static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq) 784 static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
785 { 785 {
786 struct cfq_data *cfqd = cfqq->cfqd; 786 struct cfq_data *cfqd = cfqq->cfqd;
787 struct request *rq; 787 struct request *rq;
788 int fifo; 788 int fifo;
789 789
790 if (cfq_cfqq_fifo_expire(cfqq)) 790 if (cfq_cfqq_fifo_expire(cfqq))
791 return NULL; 791 return NULL;
792 if (list_empty(&cfqq->fifo)) 792 if (list_empty(&cfqq->fifo))
793 return NULL; 793 return NULL;
794 794
795 fifo = cfq_cfqq_class_sync(cfqq); 795 fifo = cfq_cfqq_class_sync(cfqq);
796 rq = rq_entry_fifo(cfqq->fifo.next); 796 rq = rq_entry_fifo(cfqq->fifo.next);
797 797
798 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { 798 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
799 cfq_mark_cfqq_fifo_expire(cfqq); 799 cfq_mark_cfqq_fifo_expire(cfqq);
800 return rq; 800 return rq;
801 } 801 }
802 802
803 return NULL; 803 return NULL;
804 } 804 }
805 805
806 /* 806 /*
807 * Scale schedule slice based on io priority. Use the sync time slice only 807 * Scale schedule slice based on io priority. Use the sync time slice only
808 * if a queue is marked sync and has sync io queued. A sync queue with async 808 * if a queue is marked sync and has sync io queued. A sync queue with async
809 * io only, should not get full sync slice length. 809 * io only, should not get full sync slice length.
810 */ 810 */
811 static inline int 811 static inline int
812 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 812 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
813 { 813 {
814 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; 814 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];
815 815
816 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 816 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
817 817
818 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); 818 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio));
819 } 819 }
820 820
821 static inline void 821 static inline void
822 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 822 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
823 { 823 {
824 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 824 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
825 } 825 }
826 826
827 static inline int 827 static inline int
828 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 828 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
829 { 829 {
830 const int base_rq = cfqd->cfq_slice_async_rq; 830 const int base_rq = cfqd->cfq_slice_async_rq;
831 831
832 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 832 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
833 833
834 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 834 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
835 } 835 }
836 836
837 /* 837 /*
838 * get next queue for service 838 * get next queue for service
839 */ 839 */
840 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 840 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
841 { 841 {
842 unsigned long now = jiffies; 842 unsigned long now = jiffies;
843 struct cfq_queue *cfqq; 843 struct cfq_queue *cfqq;
844 844
845 cfqq = cfqd->active_queue; 845 cfqq = cfqd->active_queue;
846 if (!cfqq) 846 if (!cfqq)
847 goto new_queue; 847 goto new_queue;
848 848
849 /* 849 /*
850 * slice has expired 850 * slice has expired
851 */ 851 */
852 if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end)) 852 if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end))
853 goto expire; 853 goto expire;
854 854
855 /* 855 /*
856 * if queue has requests, dispatch one. if not, check if 856 * if queue has requests, dispatch one. if not, check if
857 * enough slice is left to wait for one 857 * enough slice is left to wait for one
858 */ 858 */
859 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 859 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
860 goto keep_queue; 860 goto keep_queue;
861 else if (cfq_cfqq_dispatched(cfqq)) { 861 else if (cfq_cfqq_dispatched(cfqq)) {
862 cfqq = NULL; 862 cfqq = NULL;
863 goto keep_queue; 863 goto keep_queue;
864 } else if (cfq_cfqq_class_sync(cfqq)) { 864 } else if (cfq_cfqq_class_sync(cfqq)) {
865 if (cfq_arm_slice_timer(cfqd, cfqq)) 865 if (cfq_arm_slice_timer(cfqd, cfqq))
866 return NULL; 866 return NULL;
867 } 867 }
868 868
869 expire: 869 expire:
870 cfq_slice_expired(cfqd, 0); 870 cfq_slice_expired(cfqd, 0);
871 new_queue: 871 new_queue:
872 cfqq = cfq_set_active_queue(cfqd); 872 cfqq = cfq_set_active_queue(cfqd);
873 keep_queue: 873 keep_queue:
874 return cfqq; 874 return cfqq;
875 } 875 }
876 876
877 static int 877 static int
878 __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, 878 __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
879 int max_dispatch) 879 int max_dispatch)
880 { 880 {
881 int dispatched = 0; 881 int dispatched = 0;
882 882
883 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); 883 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
884 884
885 do { 885 do {
886 struct request *rq; 886 struct request *rq;
887 887
888 /* 888 /*
889 * follow expired path, else get first next available 889 * follow expired path, else get first next available
890 */ 890 */
891 if ((rq = cfq_check_fifo(cfqq)) == NULL) 891 if ((rq = cfq_check_fifo(cfqq)) == NULL)
892 rq = cfqq->next_rq; 892 rq = cfqq->next_rq;
893 893
894 /* 894 /*
895 * finally, insert request into driver dispatch list 895 * finally, insert request into driver dispatch list
896 */ 896 */
897 cfq_dispatch_insert(cfqd->queue, rq); 897 cfq_dispatch_insert(cfqd->queue, rq);
898 898
899 cfqd->dispatch_slice++; 899 cfqd->dispatch_slice++;
900 dispatched++; 900 dispatched++;
901 901
902 if (!cfqd->active_cic) { 902 if (!cfqd->active_cic) {
903 atomic_inc(&RQ_CIC(rq)->ioc->refcount); 903 atomic_inc(&RQ_CIC(rq)->ioc->refcount);
904 cfqd->active_cic = RQ_CIC(rq); 904 cfqd->active_cic = RQ_CIC(rq);
905 } 905 }
906 906
907 if (RB_EMPTY_ROOT(&cfqq->sort_list)) 907 if (RB_EMPTY_ROOT(&cfqq->sort_list))
908 break; 908 break;
909 909
910 } while (dispatched < max_dispatch); 910 } while (dispatched < max_dispatch);
911 911
912 /* 912 /*
913 * if slice end isn't set yet, set it. 913 * if slice end isn't set yet, set it.
914 */ 914 */
915 if (!cfqq->slice_end) 915 if (!cfqq->slice_end)
916 cfq_set_prio_slice(cfqd, cfqq); 916 cfq_set_prio_slice(cfqd, cfqq);
917 917
918 /* 918 /*
919 * expire an async queue immediately if it has used up its slice. idle 919 * expire an async queue immediately if it has used up its slice. idle
920 * queue always expire after 1 dispatch round. 920 * queue always expire after 1 dispatch round.
921 */ 921 */
922 if ((!cfq_cfqq_sync(cfqq) && 922 if ((!cfq_cfqq_sync(cfqq) &&
923 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) || 923 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
924 cfq_class_idle(cfqq) || 924 cfq_class_idle(cfqq) ||
925 !cfq_cfqq_idle_window(cfqq)) 925 !cfq_cfqq_idle_window(cfqq))
926 cfq_slice_expired(cfqd, 0); 926 cfq_slice_expired(cfqd, 0);
927 927
928 return dispatched; 928 return dispatched;
929 } 929 }
930 930
931 static int 931 static int
932 cfq_forced_dispatch_cfqqs(struct list_head *list) 932 cfq_forced_dispatch_cfqqs(struct list_head *list)
933 { 933 {
934 struct cfq_queue *cfqq, *next; 934 struct cfq_queue *cfqq, *next;
935 int dispatched; 935 int dispatched;
936 936
937 dispatched = 0; 937 dispatched = 0;
938 list_for_each_entry_safe(cfqq, next, list, cfq_list) { 938 list_for_each_entry_safe(cfqq, next, list, cfq_list) {
939 while (cfqq->next_rq) { 939 while (cfqq->next_rq) {
940 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); 940 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
941 dispatched++; 941 dispatched++;
942 } 942 }
943 BUG_ON(!list_empty(&cfqq->fifo)); 943 BUG_ON(!list_empty(&cfqq->fifo));
944 } 944 }
945 945
946 return dispatched; 946 return dispatched;
947 } 947 }
948 948
949 static int 949 static int
950 cfq_forced_dispatch(struct cfq_data *cfqd) 950 cfq_forced_dispatch(struct cfq_data *cfqd)
951 { 951 {
952 int i, dispatched = 0; 952 int i, dispatched = 0;
953 953
954 for (i = 0; i < CFQ_PRIO_LISTS; i++) 954 for (i = 0; i < CFQ_PRIO_LISTS; i++)
955 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]); 955 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]);
956 956
957 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr); 957 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr);
958 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr); 958 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr);
959 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr); 959 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);
960 960
961 cfq_slice_expired(cfqd, 0); 961 cfq_slice_expired(cfqd, 0);
962 962
963 BUG_ON(cfqd->busy_queues); 963 BUG_ON(cfqd->busy_queues);
964 964
965 return dispatched; 965 return dispatched;
966 } 966 }
967 967
968 static int 968 static int
969 cfq_dispatch_requests(request_queue_t *q, int force) 969 cfq_dispatch_requests(request_queue_t *q, int force)
970 { 970 {
971 struct cfq_data *cfqd = q->elevator->elevator_data; 971 struct cfq_data *cfqd = q->elevator->elevator_data;
972 struct cfq_queue *cfqq, *prev_cfqq; 972 struct cfq_queue *cfqq, *prev_cfqq;
973 int dispatched; 973 int dispatched;
974 974
975 if (!cfqd->busy_queues) 975 if (!cfqd->busy_queues)
976 return 0; 976 return 0;
977 977
978 if (unlikely(force)) 978 if (unlikely(force))
979 return cfq_forced_dispatch(cfqd); 979 return cfq_forced_dispatch(cfqd);
980 980
981 dispatched = 0; 981 dispatched = 0;
982 prev_cfqq = NULL; 982 prev_cfqq = NULL;
983 while ((cfqq = cfq_select_queue(cfqd)) != NULL) { 983 while ((cfqq = cfq_select_queue(cfqd)) != NULL) {
984 int max_dispatch; 984 int max_dispatch;
985 985
986 /* 986 /*
987 * Don't repeat dispatch from the previous queue. 987 * Don't repeat dispatch from the previous queue.
988 */ 988 */
989 if (prev_cfqq == cfqq) 989 if (prev_cfqq == cfqq)
990 break; 990 break;
991 991
992 cfq_clear_cfqq_must_dispatch(cfqq); 992 cfq_clear_cfqq_must_dispatch(cfqq);
993 cfq_clear_cfqq_wait_request(cfqq); 993 cfq_clear_cfqq_wait_request(cfqq);
994 del_timer(&cfqd->idle_slice_timer); 994 del_timer(&cfqd->idle_slice_timer);
995 995
996 max_dispatch = cfqd->cfq_quantum; 996 max_dispatch = cfqd->cfq_quantum;
997 if (cfq_class_idle(cfqq)) 997 if (cfq_class_idle(cfqq))
998 max_dispatch = 1; 998 max_dispatch = 1;
999 999
1000 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1000 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1001 1001
1002 /* 1002 /*
1003 * If the dispatch cfqq has idling enabled and is still 1003 * If the dispatch cfqq has idling enabled and is still
1004 * the active queue, break out. 1004 * the active queue, break out.
1005 */ 1005 */
1006 if (cfq_cfqq_idle_window(cfqq) && cfqd->active_queue) 1006 if (cfq_cfqq_idle_window(cfqq) && cfqd->active_queue)
1007 break; 1007 break;
1008 1008
1009 prev_cfqq = cfqq; 1009 prev_cfqq = cfqq;
1010 } 1010 }
1011 1011
1012 return dispatched; 1012 return dispatched;
1013 } 1013 }
1014 1014
1015 /* 1015 /*
1016 * task holds one reference to the queue, dropped when task exits. each rq 1016 * task holds one reference to the queue, dropped when task exits. each rq
1017 * in-flight on this queue also holds a reference, dropped when rq is freed. 1017 * in-flight on this queue also holds a reference, dropped when rq is freed.
1018 * 1018 *
1019 * queue lock must be held here. 1019 * queue lock must be held here.
1020 */ 1020 */
1021 static void cfq_put_queue(struct cfq_queue *cfqq) 1021 static void cfq_put_queue(struct cfq_queue *cfqq)
1022 { 1022 {
1023 struct cfq_data *cfqd = cfqq->cfqd; 1023 struct cfq_data *cfqd = cfqq->cfqd;
1024 1024
1025 BUG_ON(atomic_read(&cfqq->ref) <= 0); 1025 BUG_ON(atomic_read(&cfqq->ref) <= 0);
1026 1026
1027 if (!atomic_dec_and_test(&cfqq->ref)) 1027 if (!atomic_dec_and_test(&cfqq->ref))
1028 return; 1028 return;
1029 1029
1030 BUG_ON(rb_first(&cfqq->sort_list)); 1030 BUG_ON(rb_first(&cfqq->sort_list));
1031 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 1031 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
1032 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1032 BUG_ON(cfq_cfqq_on_rr(cfqq));
1033 1033
1034 if (unlikely(cfqd->active_queue == cfqq)) 1034 if (unlikely(cfqd->active_queue == cfqq))
1035 __cfq_slice_expired(cfqd, cfqq, 0); 1035 __cfq_slice_expired(cfqd, cfqq, 0);
1036 1036
1037 /* 1037 /*
1038 * it's on the empty list and still hashed 1038 * it's on the empty list and still hashed
1039 */ 1039 */
1040 list_del(&cfqq->cfq_list); 1040 list_del(&cfqq->cfq_list);
1041 hlist_del(&cfqq->cfq_hash); 1041 hlist_del(&cfqq->cfq_hash);
1042 kmem_cache_free(cfq_pool, cfqq); 1042 kmem_cache_free(cfq_pool, cfqq);
1043 } 1043 }
1044 1044
1045 static struct cfq_queue * 1045 static struct cfq_queue *
1046 __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, 1046 __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1047 const int hashval) 1047 const int hashval)
1048 { 1048 {
1049 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; 1049 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
1050 struct hlist_node *entry; 1050 struct hlist_node *entry;
1051 struct cfq_queue *__cfqq; 1051 struct cfq_queue *__cfqq;
1052 1052
1053 hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) { 1053 hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) {
1054 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio); 1054 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio);
1055 1055
1056 if (__cfqq->key == key && (__p == prio || !prio)) 1056 if (__cfqq->key == key && (__p == prio || !prio))
1057 return __cfqq; 1057 return __cfqq;
1058 } 1058 }
1059 1059
1060 return NULL; 1060 return NULL;
1061 } 1061 }
1062 1062
1063 static struct cfq_queue * 1063 static struct cfq_queue *
1064 cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio) 1064 cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
1065 { 1065 {
1066 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); 1066 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
1067 } 1067 }
1068 1068
1069 static void cfq_free_io_context(struct io_context *ioc) 1069 static void cfq_free_io_context(struct io_context *ioc)
1070 { 1070 {
1071 struct cfq_io_context *__cic; 1071 struct cfq_io_context *__cic;
1072 struct rb_node *n; 1072 struct rb_node *n;
1073 int freed = 0; 1073 int freed = 0;
1074 1074
1075 while ((n = rb_first(&ioc->cic_root)) != NULL) { 1075 while ((n = rb_first(&ioc->cic_root)) != NULL) {
1076 __cic = rb_entry(n, struct cfq_io_context, rb_node); 1076 __cic = rb_entry(n, struct cfq_io_context, rb_node);
1077 rb_erase(&__cic->rb_node, &ioc->cic_root); 1077 rb_erase(&__cic->rb_node, &ioc->cic_root);
1078 kmem_cache_free(cfq_ioc_pool, __cic); 1078 kmem_cache_free(cfq_ioc_pool, __cic);
1079 freed++; 1079 freed++;
1080 } 1080 }
1081 1081
1082 elv_ioc_count_mod(ioc_count, -freed); 1082 elv_ioc_count_mod(ioc_count, -freed);
1083 1083
1084 if (ioc_gone && !elv_ioc_count_read(ioc_count)) 1084 if (ioc_gone && !elv_ioc_count_read(ioc_count))
1085 complete(ioc_gone); 1085 complete(ioc_gone);
1086 } 1086 }
1087 1087
1088 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1088 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1089 { 1089 {
1090 if (unlikely(cfqq == cfqd->active_queue)) 1090 if (unlikely(cfqq == cfqd->active_queue))
1091 __cfq_slice_expired(cfqd, cfqq, 0); 1091 __cfq_slice_expired(cfqd, cfqq, 0);
1092 1092
1093 cfq_put_queue(cfqq); 1093 cfq_put_queue(cfqq);
1094 } 1094 }
1095 1095
1096 static void __cfq_exit_single_io_context(struct cfq_data *cfqd, 1096 static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
1097 struct cfq_io_context *cic) 1097 struct cfq_io_context *cic)
1098 { 1098 {
1099 list_del_init(&cic->queue_list); 1099 list_del_init(&cic->queue_list);
1100 smp_wmb(); 1100 smp_wmb();
1101 cic->key = NULL; 1101 cic->key = NULL;
1102 1102
1103 if (cic->cfqq[ASYNC]) { 1103 if (cic->cfqq[ASYNC]) {
1104 cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); 1104 cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
1105 cic->cfqq[ASYNC] = NULL; 1105 cic->cfqq[ASYNC] = NULL;
1106 } 1106 }
1107 1107
1108 if (cic->cfqq[SYNC]) { 1108 if (cic->cfqq[SYNC]) {
1109 cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]); 1109 cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
1110 cic->cfqq[SYNC] = NULL; 1110 cic->cfqq[SYNC] = NULL;
1111 } 1111 }
1112 } 1112 }
1113 1113
1114 1114
1115 /* 1115 /*
1116 * Called with interrupts disabled 1116 * Called with interrupts disabled
1117 */ 1117 */
1118 static void cfq_exit_single_io_context(struct cfq_io_context *cic) 1118 static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1119 { 1119 {
1120 struct cfq_data *cfqd = cic->key; 1120 struct cfq_data *cfqd = cic->key;
1121 1121
1122 if (cfqd) { 1122 if (cfqd) {
1123 request_queue_t *q = cfqd->queue; 1123 request_queue_t *q = cfqd->queue;
1124 1124
1125 spin_lock_irq(q->queue_lock); 1125 spin_lock_irq(q->queue_lock);
1126 __cfq_exit_single_io_context(cfqd, cic); 1126 __cfq_exit_single_io_context(cfqd, cic);
1127 spin_unlock_irq(q->queue_lock); 1127 spin_unlock_irq(q->queue_lock);
1128 } 1128 }
1129 } 1129 }
1130 1130
1131 static void cfq_exit_io_context(struct io_context *ioc) 1131 static void cfq_exit_io_context(struct io_context *ioc)
1132 { 1132 {
1133 struct cfq_io_context *__cic; 1133 struct cfq_io_context *__cic;
1134 struct rb_node *n; 1134 struct rb_node *n;
1135 1135
1136 /* 1136 /*
1137 * put the reference this task is holding to the various queues 1137 * put the reference this task is holding to the various queues
1138 */ 1138 */
1139 1139
1140 n = rb_first(&ioc->cic_root); 1140 n = rb_first(&ioc->cic_root);
1141 while (n != NULL) { 1141 while (n != NULL) {
1142 __cic = rb_entry(n, struct cfq_io_context, rb_node); 1142 __cic = rb_entry(n, struct cfq_io_context, rb_node);
1143 1143
1144 cfq_exit_single_io_context(__cic); 1144 cfq_exit_single_io_context(__cic);
1145 n = rb_next(n); 1145 n = rb_next(n);
1146 } 1146 }
1147 } 1147 }
1148 1148
1149 static struct cfq_io_context * 1149 static struct cfq_io_context *
1150 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1150 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1151 { 1151 {
1152 struct cfq_io_context *cic; 1152 struct cfq_io_context *cic;
1153 1153
1154 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node); 1154 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node);
1155 if (cic) { 1155 if (cic) {
1156 memset(cic, 0, sizeof(*cic)); 1156 memset(cic, 0, sizeof(*cic));
1157 cic->last_end_request = jiffies; 1157 cic->last_end_request = jiffies;
1158 INIT_LIST_HEAD(&cic->queue_list); 1158 INIT_LIST_HEAD(&cic->queue_list);
1159 cic->dtor = cfq_free_io_context; 1159 cic->dtor = cfq_free_io_context;
1160 cic->exit = cfq_exit_io_context; 1160 cic->exit = cfq_exit_io_context;
1161 elv_ioc_count_inc(ioc_count); 1161 elv_ioc_count_inc(ioc_count);
1162 } 1162 }
1163 1163
1164 return cic; 1164 return cic;
1165 } 1165 }
1166 1166
1167 static void cfq_init_prio_data(struct cfq_queue *cfqq) 1167 static void cfq_init_prio_data(struct cfq_queue *cfqq)
1168 { 1168 {
1169 struct task_struct *tsk = current; 1169 struct task_struct *tsk = current;
1170 int ioprio_class; 1170 int ioprio_class;
1171 1171
1172 if (!cfq_cfqq_prio_changed(cfqq)) 1172 if (!cfq_cfqq_prio_changed(cfqq))
1173 return; 1173 return;
1174 1174
1175 ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); 1175 ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio);
1176 switch (ioprio_class) { 1176 switch (ioprio_class) {
1177 default: 1177 default:
1178 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); 1178 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
1179 case IOPRIO_CLASS_NONE: 1179 case IOPRIO_CLASS_NONE:
1180 /* 1180 /*
1181 * no prio set, place us in the middle of the BE classes 1181 * no prio set, place us in the middle of the BE classes
1182 */ 1182 */
1183 cfqq->ioprio = task_nice_ioprio(tsk); 1183 cfqq->ioprio = task_nice_ioprio(tsk);
1184 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1184 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1185 break; 1185 break;
1186 case IOPRIO_CLASS_RT: 1186 case IOPRIO_CLASS_RT:
1187 cfqq->ioprio = task_ioprio(tsk); 1187 cfqq->ioprio = task_ioprio(tsk);
1188 cfqq->ioprio_class = IOPRIO_CLASS_RT; 1188 cfqq->ioprio_class = IOPRIO_CLASS_RT;
1189 break; 1189 break;
1190 case IOPRIO_CLASS_BE: 1190 case IOPRIO_CLASS_BE:
1191 cfqq->ioprio = task_ioprio(tsk); 1191 cfqq->ioprio = task_ioprio(tsk);
1192 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1192 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1193 break; 1193 break;
1194 case IOPRIO_CLASS_IDLE: 1194 case IOPRIO_CLASS_IDLE:
1195 cfqq->ioprio_class = IOPRIO_CLASS_IDLE; 1195 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
1196 cfqq->ioprio = 7; 1196 cfqq->ioprio = 7;
1197 cfq_clear_cfqq_idle_window(cfqq); 1197 cfq_clear_cfqq_idle_window(cfqq);
1198 break; 1198 break;
1199 } 1199 }
1200 1200
1201 /* 1201 /*
1202 * keep track of original prio settings in case we have to temporarily 1202 * keep track of original prio settings in case we have to temporarily
1203 * elevate the priority of this queue 1203 * elevate the priority of this queue
1204 */ 1204 */
1205 cfqq->org_ioprio = cfqq->ioprio; 1205 cfqq->org_ioprio = cfqq->ioprio;
1206 cfqq->org_ioprio_class = cfqq->ioprio_class; 1206 cfqq->org_ioprio_class = cfqq->ioprio_class;
1207 1207
1208 if (cfq_cfqq_on_rr(cfqq)) 1208 if (cfq_cfqq_on_rr(cfqq))
1209 cfq_resort_rr_list(cfqq, 0); 1209 cfq_resort_rr_list(cfqq, 0);
1210 1210
1211 cfq_clear_cfqq_prio_changed(cfqq); 1211 cfq_clear_cfqq_prio_changed(cfqq);
1212 } 1212 }
1213 1213
1214 static inline void changed_ioprio(struct cfq_io_context *cic) 1214 static inline void changed_ioprio(struct cfq_io_context *cic)
1215 { 1215 {
1216 struct cfq_data *cfqd = cic->key; 1216 struct cfq_data *cfqd = cic->key;
1217 struct cfq_queue *cfqq; 1217 struct cfq_queue *cfqq;
1218 unsigned long flags;
1218 1219
1219 if (unlikely(!cfqd)) 1220 if (unlikely(!cfqd))
1220 return; 1221 return;
1221 1222
1222 spin_lock(cfqd->queue->queue_lock); 1223 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1223 1224
1224 cfqq = cic->cfqq[ASYNC]; 1225 cfqq = cic->cfqq[ASYNC];
1225 if (cfqq) { 1226 if (cfqq) {
1226 struct cfq_queue *new_cfqq; 1227 struct cfq_queue *new_cfqq;
1227 new_cfqq = cfq_get_queue(cfqd, CFQ_KEY_ASYNC, cic->ioc->task, 1228 new_cfqq = cfq_get_queue(cfqd, CFQ_KEY_ASYNC, cic->ioc->task,
1228 GFP_ATOMIC); 1229 GFP_ATOMIC);
1229 if (new_cfqq) { 1230 if (new_cfqq) {
1230 cic->cfqq[ASYNC] = new_cfqq; 1231 cic->cfqq[ASYNC] = new_cfqq;
1231 cfq_put_queue(cfqq); 1232 cfq_put_queue(cfqq);
1232 } 1233 }
1233 } 1234 }
1234 1235
1235 cfqq = cic->cfqq[SYNC]; 1236 cfqq = cic->cfqq[SYNC];
1236 if (cfqq) 1237 if (cfqq)
1237 cfq_mark_cfqq_prio_changed(cfqq); 1238 cfq_mark_cfqq_prio_changed(cfqq);
1238 1239
1239 spin_unlock(cfqd->queue->queue_lock); 1240 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1240 } 1241 }
1241 1242
1242 static void cfq_ioc_set_ioprio(struct io_context *ioc) 1243 static void cfq_ioc_set_ioprio(struct io_context *ioc)
1243 { 1244 {
1244 struct cfq_io_context *cic; 1245 struct cfq_io_context *cic;
1245 struct rb_node *n; 1246 struct rb_node *n;
1246 1247
1247 ioc->ioprio_changed = 0; 1248 ioc->ioprio_changed = 0;
1248 1249
1249 n = rb_first(&ioc->cic_root); 1250 n = rb_first(&ioc->cic_root);
1250 while (n != NULL) { 1251 while (n != NULL) {
1251 cic = rb_entry(n, struct cfq_io_context, rb_node); 1252 cic = rb_entry(n, struct cfq_io_context, rb_node);
1252 1253
1253 changed_ioprio(cic); 1254 changed_ioprio(cic);
1254 n = rb_next(n); 1255 n = rb_next(n);
1255 } 1256 }
1256 } 1257 }
1257 1258
1258 static struct cfq_queue * 1259 static struct cfq_queue *
1259 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, 1260 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk,
1260 gfp_t gfp_mask) 1261 gfp_t gfp_mask)
1261 { 1262 {
1262 const int hashval = hash_long(key, CFQ_QHASH_SHIFT); 1263 const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
1263 struct cfq_queue *cfqq, *new_cfqq = NULL; 1264 struct cfq_queue *cfqq, *new_cfqq = NULL;
1264 unsigned short ioprio; 1265 unsigned short ioprio;
1265 1266
1266 retry: 1267 retry:
1267 ioprio = tsk->ioprio; 1268 ioprio = tsk->ioprio;
1268 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval); 1269 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval);
1269 1270
1270 if (!cfqq) { 1271 if (!cfqq) {
1271 if (new_cfqq) { 1272 if (new_cfqq) {
1272 cfqq = new_cfqq; 1273 cfqq = new_cfqq;
1273 new_cfqq = NULL; 1274 new_cfqq = NULL;
1274 } else if (gfp_mask & __GFP_WAIT) { 1275 } else if (gfp_mask & __GFP_WAIT) {
1275 /* 1276 /*
1276 * Inform the allocator of the fact that we will 1277 * Inform the allocator of the fact that we will
1277 * just repeat this allocation if it fails, to allow 1278 * just repeat this allocation if it fails, to allow
1278 * the allocator to do whatever it needs to attempt to 1279 * the allocator to do whatever it needs to attempt to
1279 * free memory. 1280 * free memory.
1280 */ 1281 */
1281 spin_unlock_irq(cfqd->queue->queue_lock); 1282 spin_unlock_irq(cfqd->queue->queue_lock);
1282 new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node); 1283 new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node);
1283 spin_lock_irq(cfqd->queue->queue_lock); 1284 spin_lock_irq(cfqd->queue->queue_lock);
1284 goto retry; 1285 goto retry;
1285 } else { 1286 } else {
1286 cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node); 1287 cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node);
1287 if (!cfqq) 1288 if (!cfqq)
1288 goto out; 1289 goto out;
1289 } 1290 }
1290 1291
1291 memset(cfqq, 0, sizeof(*cfqq)); 1292 memset(cfqq, 0, sizeof(*cfqq));
1292 1293
1293 INIT_HLIST_NODE(&cfqq->cfq_hash); 1294 INIT_HLIST_NODE(&cfqq->cfq_hash);
1294 INIT_LIST_HEAD(&cfqq->cfq_list); 1295 INIT_LIST_HEAD(&cfqq->cfq_list);
1295 INIT_LIST_HEAD(&cfqq->fifo); 1296 INIT_LIST_HEAD(&cfqq->fifo);
1296 1297
1297 cfqq->key = key; 1298 cfqq->key = key;
1298 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); 1299 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
1299 atomic_set(&cfqq->ref, 0); 1300 atomic_set(&cfqq->ref, 0);
1300 cfqq->cfqd = cfqd; 1301 cfqq->cfqd = cfqd;
1301 /* 1302 /*
1302 * set ->slice_left to allow preemption for a new process 1303 * set ->slice_left to allow preemption for a new process
1303 */ 1304 */
1304 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1305 cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
1305 cfq_mark_cfqq_idle_window(cfqq); 1306 cfq_mark_cfqq_idle_window(cfqq);
1306 cfq_mark_cfqq_prio_changed(cfqq); 1307 cfq_mark_cfqq_prio_changed(cfqq);
1307 cfq_mark_cfqq_queue_new(cfqq); 1308 cfq_mark_cfqq_queue_new(cfqq);
1308 cfq_init_prio_data(cfqq); 1309 cfq_init_prio_data(cfqq);
1309 } 1310 }
1310 1311
1311 if (new_cfqq) 1312 if (new_cfqq)
1312 kmem_cache_free(cfq_pool, new_cfqq); 1313 kmem_cache_free(cfq_pool, new_cfqq);
1313 1314
1314 atomic_inc(&cfqq->ref); 1315 atomic_inc(&cfqq->ref);
1315 out: 1316 out:
1316 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); 1317 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
1317 return cfqq; 1318 return cfqq;
1318 } 1319 }
1319 1320
1320 static void 1321 static void
1321 cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) 1322 cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
1322 { 1323 {
1323 WARN_ON(!list_empty(&cic->queue_list)); 1324 WARN_ON(!list_empty(&cic->queue_list));
1324 rb_erase(&cic->rb_node, &ioc->cic_root); 1325 rb_erase(&cic->rb_node, &ioc->cic_root);
1325 kmem_cache_free(cfq_ioc_pool, cic); 1326 kmem_cache_free(cfq_ioc_pool, cic);
1326 elv_ioc_count_dec(ioc_count); 1327 elv_ioc_count_dec(ioc_count);
1327 } 1328 }
1328 1329
1329 static struct cfq_io_context * 1330 static struct cfq_io_context *
1330 cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) 1331 cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1331 { 1332 {
1332 struct rb_node *n; 1333 struct rb_node *n;
1333 struct cfq_io_context *cic; 1334 struct cfq_io_context *cic;
1334 void *k, *key = cfqd; 1335 void *k, *key = cfqd;
1335 1336
1336 restart: 1337 restart:
1337 n = ioc->cic_root.rb_node; 1338 n = ioc->cic_root.rb_node;
1338 while (n) { 1339 while (n) {
1339 cic = rb_entry(n, struct cfq_io_context, rb_node); 1340 cic = rb_entry(n, struct cfq_io_context, rb_node);
1340 /* ->key must be copied to avoid race with cfq_exit_queue() */ 1341 /* ->key must be copied to avoid race with cfq_exit_queue() */
1341 k = cic->key; 1342 k = cic->key;
1342 if (unlikely(!k)) { 1343 if (unlikely(!k)) {
1343 cfq_drop_dead_cic(ioc, cic); 1344 cfq_drop_dead_cic(ioc, cic);
1344 goto restart; 1345 goto restart;
1345 } 1346 }
1346 1347
1347 if (key < k) 1348 if (key < k)
1348 n = n->rb_left; 1349 n = n->rb_left;
1349 else if (key > k) 1350 else if (key > k)
1350 n = n->rb_right; 1351 n = n->rb_right;
1351 else 1352 else
1352 return cic; 1353 return cic;
1353 } 1354 }
1354 1355
1355 return NULL; 1356 return NULL;
1356 } 1357 }
1357 1358
1358 static inline void 1359 static inline void
1359 cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, 1360 cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
1360 struct cfq_io_context *cic) 1361 struct cfq_io_context *cic)
1361 { 1362 {
1362 struct rb_node **p; 1363 struct rb_node **p;
1363 struct rb_node *parent; 1364 struct rb_node *parent;
1364 struct cfq_io_context *__cic; 1365 struct cfq_io_context *__cic;
1365 unsigned long flags; 1366 unsigned long flags;
1366 void *k; 1367 void *k;
1367 1368
1368 cic->ioc = ioc; 1369 cic->ioc = ioc;
1369 cic->key = cfqd; 1370 cic->key = cfqd;
1370 1371
1371 restart: 1372 restart:
1372 parent = NULL; 1373 parent = NULL;
1373 p = &ioc->cic_root.rb_node; 1374 p = &ioc->cic_root.rb_node;
1374 while (*p) { 1375 while (*p) {
1375 parent = *p; 1376 parent = *p;
1376 __cic = rb_entry(parent, struct cfq_io_context, rb_node); 1377 __cic = rb_entry(parent, struct cfq_io_context, rb_node);
1377 /* ->key must be copied to avoid race with cfq_exit_queue() */ 1378 /* ->key must be copied to avoid race with cfq_exit_queue() */
1378 k = __cic->key; 1379 k = __cic->key;
1379 if (unlikely(!k)) { 1380 if (unlikely(!k)) {
1380 cfq_drop_dead_cic(ioc, __cic); 1381 cfq_drop_dead_cic(ioc, __cic);
1381 goto restart; 1382 goto restart;
1382 } 1383 }
1383 1384
1384 if (cic->key < k) 1385 if (cic->key < k)
1385 p = &(*p)->rb_left; 1386 p = &(*p)->rb_left;
1386 else if (cic->key > k) 1387 else if (cic->key > k)
1387 p = &(*p)->rb_right; 1388 p = &(*p)->rb_right;
1388 else 1389 else
1389 BUG(); 1390 BUG();
1390 } 1391 }
1391 1392
1392 rb_link_node(&cic->rb_node, parent, p); 1393 rb_link_node(&cic->rb_node, parent, p);
1393 rb_insert_color(&cic->rb_node, &ioc->cic_root); 1394 rb_insert_color(&cic->rb_node, &ioc->cic_root);
1394 1395
1395 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1396 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1396 list_add(&cic->queue_list, &cfqd->cic_list); 1397 list_add(&cic->queue_list, &cfqd->cic_list);
1397 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 1398 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1398 } 1399 }
1399 1400
1400 /* 1401 /*
1401 * Setup general io context and cfq io context. There can be several cfq 1402 * Setup general io context and cfq io context. There can be several cfq
1402 * io contexts per general io context, if this process is doing io to more 1403 * io contexts per general io context, if this process is doing io to more
1403 * than one device managed by cfq. 1404 * than one device managed by cfq.
1404 */ 1405 */
1405 static struct cfq_io_context * 1406 static struct cfq_io_context *
1406 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1407 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1407 { 1408 {
1408 struct io_context *ioc = NULL; 1409 struct io_context *ioc = NULL;
1409 struct cfq_io_context *cic; 1410 struct cfq_io_context *cic;
1410 1411
1411 might_sleep_if(gfp_mask & __GFP_WAIT); 1412 might_sleep_if(gfp_mask & __GFP_WAIT);
1412 1413
1413 ioc = get_io_context(gfp_mask, cfqd->queue->node); 1414 ioc = get_io_context(gfp_mask, cfqd->queue->node);
1414 if (!ioc) 1415 if (!ioc)
1415 return NULL; 1416 return NULL;
1416 1417
1417 cic = cfq_cic_rb_lookup(cfqd, ioc); 1418 cic = cfq_cic_rb_lookup(cfqd, ioc);
1418 if (cic) 1419 if (cic)
1419 goto out; 1420 goto out;
1420 1421
1421 cic = cfq_alloc_io_context(cfqd, gfp_mask); 1422 cic = cfq_alloc_io_context(cfqd, gfp_mask);
1422 if (cic == NULL) 1423 if (cic == NULL)
1423 goto err; 1424 goto err;
1424 1425
1425 cfq_cic_link(cfqd, ioc, cic); 1426 cfq_cic_link(cfqd, ioc, cic);
1426 out: 1427 out:
1427 smp_read_barrier_depends(); 1428 smp_read_barrier_depends();
1428 if (unlikely(ioc->ioprio_changed)) 1429 if (unlikely(ioc->ioprio_changed))
1429 cfq_ioc_set_ioprio(ioc); 1430 cfq_ioc_set_ioprio(ioc);
1430 1431
1431 return cic; 1432 return cic;
1432 err: 1433 err:
1433 put_io_context(ioc); 1434 put_io_context(ioc);
1434 return NULL; 1435 return NULL;
1435 } 1436 }
1436 1437
1437 static void 1438 static void
1438 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 1439 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1439 { 1440 {
1440 unsigned long elapsed, ttime; 1441 unsigned long elapsed, ttime;
1441 1442
1442 /* 1443 /*
1443 * if this context already has stuff queued, thinktime is from 1444 * if this context already has stuff queued, thinktime is from
1444 * last queue not last end 1445 * last queue not last end
1445 */ 1446 */
1446 #if 0 1447 #if 0
1447 if (time_after(cic->last_end_request, cic->last_queue)) 1448 if (time_after(cic->last_end_request, cic->last_queue))
1448 elapsed = jiffies - cic->last_end_request; 1449 elapsed = jiffies - cic->last_end_request;
1449 else 1450 else
1450 elapsed = jiffies - cic->last_queue; 1451 elapsed = jiffies - cic->last_queue;
1451 #else 1452 #else
1452 elapsed = jiffies - cic->last_end_request; 1453 elapsed = jiffies - cic->last_end_request;
1453 #endif 1454 #endif
1454 1455
1455 ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 1456 ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
1456 1457
1457 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 1458 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
1458 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 1459 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
1459 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 1460 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
1460 } 1461 }
1461 1462
1462 static void 1463 static void
1463 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, 1464 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
1464 struct request *rq) 1465 struct request *rq)
1465 { 1466 {
1466 sector_t sdist; 1467 sector_t sdist;
1467 u64 total; 1468 u64 total;
1468 1469
1469 if (cic->last_request_pos < rq->sector) 1470 if (cic->last_request_pos < rq->sector)
1470 sdist = rq->sector - cic->last_request_pos; 1471 sdist = rq->sector - cic->last_request_pos;
1471 else 1472 else
1472 sdist = cic->last_request_pos - rq->sector; 1473 sdist = cic->last_request_pos - rq->sector;
1473 1474
1474 /* 1475 /*
1475 * Don't allow the seek distance to get too large from the 1476 * Don't allow the seek distance to get too large from the
1476 * odd fragment, pagein, etc 1477 * odd fragment, pagein, etc
1477 */ 1478 */
1478 if (cic->seek_samples <= 60) /* second&third seek */ 1479 if (cic->seek_samples <= 60) /* second&third seek */
1479 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024); 1480 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024);
1480 else 1481 else
1481 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64); 1482 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64);
1482 1483
1483 cic->seek_samples = (7*cic->seek_samples + 256) / 8; 1484 cic->seek_samples = (7*cic->seek_samples + 256) / 8;
1484 cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8; 1485 cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8;
1485 total = cic->seek_total + (cic->seek_samples/2); 1486 total = cic->seek_total + (cic->seek_samples/2);
1486 do_div(total, cic->seek_samples); 1487 do_div(total, cic->seek_samples);
1487 cic->seek_mean = (sector_t)total; 1488 cic->seek_mean = (sector_t)total;
1488 } 1489 }
1489 1490
1490 /* 1491 /*
1491 * Disable idle window if the process thinks too long or seeks so much that 1492 * Disable idle window if the process thinks too long or seeks so much that
1492 * it doesn't matter 1493 * it doesn't matter
1493 */ 1494 */
1494 static void 1495 static void
1495 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1496 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1496 struct cfq_io_context *cic) 1497 struct cfq_io_context *cic)
1497 { 1498 {
1498 int enable_idle = cfq_cfqq_idle_window(cfqq); 1499 int enable_idle = cfq_cfqq_idle_window(cfqq);
1499 1500
1500 if (!cic->ioc->task || !cfqd->cfq_slice_idle || 1501 if (!cic->ioc->task || !cfqd->cfq_slice_idle ||
1501 (cfqd->hw_tag && CIC_SEEKY(cic))) 1502 (cfqd->hw_tag && CIC_SEEKY(cic)))
1502 enable_idle = 0; 1503 enable_idle = 0;
1503 else if (sample_valid(cic->ttime_samples)) { 1504 else if (sample_valid(cic->ttime_samples)) {
1504 if (cic->ttime_mean > cfqd->cfq_slice_idle) 1505 if (cic->ttime_mean > cfqd->cfq_slice_idle)
1505 enable_idle = 0; 1506 enable_idle = 0;
1506 else 1507 else
1507 enable_idle = 1; 1508 enable_idle = 1;
1508 } 1509 }
1509 1510
1510 if (enable_idle) 1511 if (enable_idle)
1511 cfq_mark_cfqq_idle_window(cfqq); 1512 cfq_mark_cfqq_idle_window(cfqq);
1512 else 1513 else
1513 cfq_clear_cfqq_idle_window(cfqq); 1514 cfq_clear_cfqq_idle_window(cfqq);
1514 } 1515 }
1515 1516
1516 1517
1517 /* 1518 /*
1518 * Check if new_cfqq should preempt the currently active queue. Return 0 for 1519 * Check if new_cfqq should preempt the currently active queue. Return 0 for
1519 * no or if we aren't sure, a 1 will cause a preempt. 1520 * no or if we aren't sure, a 1 will cause a preempt.
1520 */ 1521 */
1521 static int 1522 static int
1522 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 1523 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1523 struct request *rq) 1524 struct request *rq)
1524 { 1525 {
1525 struct cfq_queue *cfqq = cfqd->active_queue; 1526 struct cfq_queue *cfqq = cfqd->active_queue;
1526 1527
1527 if (cfq_class_idle(new_cfqq)) 1528 if (cfq_class_idle(new_cfqq))
1528 return 0; 1529 return 0;
1529 1530
1530 if (!cfqq) 1531 if (!cfqq)
1531 return 0; 1532 return 0;
1532 1533
1533 if (cfq_class_idle(cfqq)) 1534 if (cfq_class_idle(cfqq))
1534 return 1; 1535 return 1;
1535 if (!cfq_cfqq_wait_request(new_cfqq)) 1536 if (!cfq_cfqq_wait_request(new_cfqq))
1536 return 0; 1537 return 0;
1537 /* 1538 /*
1538 * if it doesn't have slice left, forget it 1539 * if it doesn't have slice left, forget it
1539 */ 1540 */
1540 if (new_cfqq->slice_left < cfqd->cfq_slice_idle) 1541 if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
1541 return 0; 1542 return 0;
1542 /* 1543 /*
1543 * if the new request is sync, but the currently running queue is 1544 * if the new request is sync, but the currently running queue is
1544 * not, let the sync request have priority. 1545 * not, let the sync request have priority.
1545 */ 1546 */
1546 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) 1547 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
1547 return 1; 1548 return 1;
1548 /* 1549 /*
1549 * So both queues are sync. Let the new request get disk time if 1550 * So both queues are sync. Let the new request get disk time if
1550 * it's a metadata request and the current queue is doing regular IO. 1551 * it's a metadata request and the current queue is doing regular IO.
1551 */ 1552 */
1552 if (rq_is_meta(rq) && !cfqq->meta_pending) 1553 if (rq_is_meta(rq) && !cfqq->meta_pending)
1553 return 1; 1554 return 1;
1554 1555
1555 return 0; 1556 return 0;
1556 } 1557 }
1557 1558
1558 /* 1559 /*
1559 * cfqq preempts the active queue. if we allowed preempt with no slice left, 1560 * cfqq preempts the active queue. if we allowed preempt with no slice left,
1560 * let it have half of its nominal slice. 1561 * let it have half of its nominal slice.
1561 */ 1562 */
1562 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1563 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1563 { 1564 {
1564 cfq_slice_expired(cfqd, 1); 1565 cfq_slice_expired(cfqd, 1);
1565 1566
1566 if (!cfqq->slice_left) 1567 if (!cfqq->slice_left)
1567 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; 1568 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
1568 1569
1569 /* 1570 /*
1570 * Put the new queue at the front of the of the current list, 1571 * Put the new queue at the front of the of the current list,
1571 * so we know that it will be selected next. 1572 * so we know that it will be selected next.
1572 */ 1573 */
1573 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 1574 BUG_ON(!cfq_cfqq_on_rr(cfqq));
1574 list_move(&cfqq->cfq_list, &cfqd->cur_rr); 1575 list_move(&cfqq->cfq_list, &cfqd->cur_rr);
1575 1576
1576 cfqq->slice_end = cfqq->slice_left + jiffies; 1577 cfqq->slice_end = cfqq->slice_left + jiffies;
1577 } 1578 }
1578 1579
1579 /* 1580 /*
1580 * Called when a new fs request (rq) is added (to cfqq). Check if there's 1581 * Called when a new fs request (rq) is added (to cfqq). Check if there's
1581 * something we should do about it 1582 * something we should do about it
1582 */ 1583 */
1583 static void 1584 static void
1584 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1585 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1585 struct request *rq) 1586 struct request *rq)
1586 { 1587 {
1587 struct cfq_io_context *cic = RQ_CIC(rq); 1588 struct cfq_io_context *cic = RQ_CIC(rq);
1588 1589
1589 if (rq_is_meta(rq)) 1590 if (rq_is_meta(rq))
1590 cfqq->meta_pending++; 1591 cfqq->meta_pending++;
1591 1592
1592 /* 1593 /*
1593 * check if this request is a better next-serve candidate)) { 1594 * check if this request is a better next-serve candidate)) {
1594 */ 1595 */
1595 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); 1596 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
1596 BUG_ON(!cfqq->next_rq); 1597 BUG_ON(!cfqq->next_rq);
1597 1598
1598 /* 1599 /*
1599 * we never wait for an async request and we don't allow preemption 1600 * we never wait for an async request and we don't allow preemption
1600 * of an async request. so just return early 1601 * of an async request. so just return early
1601 */ 1602 */
1602 if (!rq_is_sync(rq)) { 1603 if (!rq_is_sync(rq)) {
1603 /* 1604 /*
1604 * sync process issued an async request, if it's waiting 1605 * sync process issued an async request, if it's waiting
1605 * then expire it and kick rq handling. 1606 * then expire it and kick rq handling.
1606 */ 1607 */
1607 if (cic == cfqd->active_cic && 1608 if (cic == cfqd->active_cic &&
1608 del_timer(&cfqd->idle_slice_timer)) { 1609 del_timer(&cfqd->idle_slice_timer)) {
1609 cfq_slice_expired(cfqd, 0); 1610 cfq_slice_expired(cfqd, 0);
1610 blk_start_queueing(cfqd->queue); 1611 blk_start_queueing(cfqd->queue);
1611 } 1612 }
1612 return; 1613 return;
1613 } 1614 }
1614 1615
1615 cfq_update_io_thinktime(cfqd, cic); 1616 cfq_update_io_thinktime(cfqd, cic);
1616 cfq_update_io_seektime(cfqd, cic, rq); 1617 cfq_update_io_seektime(cfqd, cic, rq);
1617 cfq_update_idle_window(cfqd, cfqq, cic); 1618 cfq_update_idle_window(cfqd, cfqq, cic);
1618 1619
1619 cic->last_queue = jiffies; 1620 cic->last_queue = jiffies;
1620 cic->last_request_pos = rq->sector + rq->nr_sectors; 1621 cic->last_request_pos = rq->sector + rq->nr_sectors;
1621 1622
1622 if (cfqq == cfqd->active_queue) { 1623 if (cfqq == cfqd->active_queue) {
1623 /* 1624 /*
1624 * if we are waiting for a request for this queue, let it rip 1625 * if we are waiting for a request for this queue, let it rip
1625 * immediately and flag that we must not expire this queue 1626 * immediately and flag that we must not expire this queue
1626 * just now 1627 * just now
1627 */ 1628 */
1628 if (cfq_cfqq_wait_request(cfqq)) { 1629 if (cfq_cfqq_wait_request(cfqq)) {
1629 cfq_mark_cfqq_must_dispatch(cfqq); 1630 cfq_mark_cfqq_must_dispatch(cfqq);
1630 del_timer(&cfqd->idle_slice_timer); 1631 del_timer(&cfqd->idle_slice_timer);
1631 blk_start_queueing(cfqd->queue); 1632 blk_start_queueing(cfqd->queue);
1632 } 1633 }
1633 } else if (cfq_should_preempt(cfqd, cfqq, rq)) { 1634 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
1634 /* 1635 /*
1635 * not the active queue - expire current slice if it is 1636 * not the active queue - expire current slice if it is
1636 * idle and has expired it's mean thinktime or this new queue 1637 * idle and has expired it's mean thinktime or this new queue
1637 * has some old slice time left and is of higher priority 1638 * has some old slice time left and is of higher priority
1638 */ 1639 */
1639 cfq_preempt_queue(cfqd, cfqq); 1640 cfq_preempt_queue(cfqd, cfqq);
1640 cfq_mark_cfqq_must_dispatch(cfqq); 1641 cfq_mark_cfqq_must_dispatch(cfqq);
1641 blk_start_queueing(cfqd->queue); 1642 blk_start_queueing(cfqd->queue);
1642 } 1643 }
1643 } 1644 }
1644 1645
1645 static void cfq_insert_request(request_queue_t *q, struct request *rq) 1646 static void cfq_insert_request(request_queue_t *q, struct request *rq)
1646 { 1647 {
1647 struct cfq_data *cfqd = q->elevator->elevator_data; 1648 struct cfq_data *cfqd = q->elevator->elevator_data;
1648 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1649 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1649 1650
1650 cfq_init_prio_data(cfqq); 1651 cfq_init_prio_data(cfqq);
1651 1652
1652 cfq_add_rq_rb(rq); 1653 cfq_add_rq_rb(rq);
1653 1654
1654 if (!cfq_cfqq_on_rr(cfqq)) 1655 if (!cfq_cfqq_on_rr(cfqq))
1655 cfq_add_cfqq_rr(cfqd, cfqq); 1656 cfq_add_cfqq_rr(cfqd, cfqq);
1656 1657
1657 list_add_tail(&rq->queuelist, &cfqq->fifo); 1658 list_add_tail(&rq->queuelist, &cfqq->fifo);
1658 1659
1659 cfq_rq_enqueued(cfqd, cfqq, rq); 1660 cfq_rq_enqueued(cfqd, cfqq, rq);
1660 } 1661 }
1661 1662
1662 static void cfq_completed_request(request_queue_t *q, struct request *rq) 1663 static void cfq_completed_request(request_queue_t *q, struct request *rq)
1663 { 1664 {
1664 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1665 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1665 struct cfq_data *cfqd = cfqq->cfqd; 1666 struct cfq_data *cfqd = cfqq->cfqd;
1666 const int sync = rq_is_sync(rq); 1667 const int sync = rq_is_sync(rq);
1667 unsigned long now; 1668 unsigned long now;
1668 1669
1669 now = jiffies; 1670 now = jiffies;
1670 1671
1671 WARN_ON(!cfqd->rq_in_driver); 1672 WARN_ON(!cfqd->rq_in_driver);
1672 WARN_ON(!cfqq->on_dispatch[sync]); 1673 WARN_ON(!cfqq->on_dispatch[sync]);
1673 cfqd->rq_in_driver--; 1674 cfqd->rq_in_driver--;
1674 cfqq->on_dispatch[sync]--; 1675 cfqq->on_dispatch[sync]--;
1675 1676
1676 if (!cfq_class_idle(cfqq)) 1677 if (!cfq_class_idle(cfqq))
1677 cfqd->last_end_request = now; 1678 cfqd->last_end_request = now;
1678 1679
1679 if (!cfq_cfqq_dispatched(cfqq) && cfq_cfqq_on_rr(cfqq)) 1680 if (!cfq_cfqq_dispatched(cfqq) && cfq_cfqq_on_rr(cfqq))
1680 cfq_resort_rr_list(cfqq, 0); 1681 cfq_resort_rr_list(cfqq, 0);
1681 1682
1682 if (sync) 1683 if (sync)
1683 RQ_CIC(rq)->last_end_request = now; 1684 RQ_CIC(rq)->last_end_request = now;
1684 1685
1685 /* 1686 /*
1686 * If this is the active queue, check if it needs to be expired, 1687 * If this is the active queue, check if it needs to be expired,
1687 * or if we want to idle in case it has no pending requests. 1688 * or if we want to idle in case it has no pending requests.
1688 */ 1689 */
1689 if (cfqd->active_queue == cfqq) { 1690 if (cfqd->active_queue == cfqq) {
1690 if (time_after(now, cfqq->slice_end)) 1691 if (time_after(now, cfqq->slice_end))
1691 cfq_slice_expired(cfqd, 0); 1692 cfq_slice_expired(cfqd, 0);
1692 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) { 1693 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) {
1693 if (!cfq_arm_slice_timer(cfqd, cfqq)) 1694 if (!cfq_arm_slice_timer(cfqd, cfqq))
1694 cfq_schedule_dispatch(cfqd); 1695 cfq_schedule_dispatch(cfqd);
1695 } 1696 }
1696 } 1697 }
1697 } 1698 }
1698 1699
1699 /* 1700 /*
1700 * we temporarily boost lower priority queues if they are holding fs exclusive 1701 * we temporarily boost lower priority queues if they are holding fs exclusive
1701 * resources. they are boosted to normal prio (CLASS_BE/4) 1702 * resources. they are boosted to normal prio (CLASS_BE/4)
1702 */ 1703 */
1703 static void cfq_prio_boost(struct cfq_queue *cfqq) 1704 static void cfq_prio_boost(struct cfq_queue *cfqq)
1704 { 1705 {
1705 const int ioprio_class = cfqq->ioprio_class; 1706 const int ioprio_class = cfqq->ioprio_class;
1706 const int ioprio = cfqq->ioprio; 1707 const int ioprio = cfqq->ioprio;
1707 1708
1708 if (has_fs_excl()) { 1709 if (has_fs_excl()) {
1709 /* 1710 /*
1710 * boost idle prio on transactions that would lock out other 1711 * boost idle prio on transactions that would lock out other
1711 * users of the filesystem 1712 * users of the filesystem
1712 */ 1713 */
1713 if (cfq_class_idle(cfqq)) 1714 if (cfq_class_idle(cfqq))
1714 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1715 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1715 if (cfqq->ioprio > IOPRIO_NORM) 1716 if (cfqq->ioprio > IOPRIO_NORM)
1716 cfqq->ioprio = IOPRIO_NORM; 1717 cfqq->ioprio = IOPRIO_NORM;
1717 } else { 1718 } else {
1718 /* 1719 /*
1719 * check if we need to unboost the queue 1720 * check if we need to unboost the queue
1720 */ 1721 */
1721 if (cfqq->ioprio_class != cfqq->org_ioprio_class) 1722 if (cfqq->ioprio_class != cfqq->org_ioprio_class)
1722 cfqq->ioprio_class = cfqq->org_ioprio_class; 1723 cfqq->ioprio_class = cfqq->org_ioprio_class;
1723 if (cfqq->ioprio != cfqq->org_ioprio) 1724 if (cfqq->ioprio != cfqq->org_ioprio)
1724 cfqq->ioprio = cfqq->org_ioprio; 1725 cfqq->ioprio = cfqq->org_ioprio;
1725 } 1726 }
1726 1727
1727 /* 1728 /*
1728 * refile between round-robin lists if we moved the priority class 1729 * refile between round-robin lists if we moved the priority class
1729 */ 1730 */
1730 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) && 1731 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) &&
1731 cfq_cfqq_on_rr(cfqq)) 1732 cfq_cfqq_on_rr(cfqq))
1732 cfq_resort_rr_list(cfqq, 0); 1733 cfq_resort_rr_list(cfqq, 0);
1733 } 1734 }
1734 1735
1735 static inline int __cfq_may_queue(struct cfq_queue *cfqq) 1736 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
1736 { 1737 {
1737 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 1738 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
1738 !cfq_cfqq_must_alloc_slice(cfqq)) { 1739 !cfq_cfqq_must_alloc_slice(cfqq)) {
1739 cfq_mark_cfqq_must_alloc_slice(cfqq); 1740 cfq_mark_cfqq_must_alloc_slice(cfqq);
1740 return ELV_MQUEUE_MUST; 1741 return ELV_MQUEUE_MUST;
1741 } 1742 }
1742 1743
1743 return ELV_MQUEUE_MAY; 1744 return ELV_MQUEUE_MAY;
1744 } 1745 }
1745 1746
1746 static int cfq_may_queue(request_queue_t *q, int rw) 1747 static int cfq_may_queue(request_queue_t *q, int rw)
1747 { 1748 {
1748 struct cfq_data *cfqd = q->elevator->elevator_data; 1749 struct cfq_data *cfqd = q->elevator->elevator_data;
1749 struct task_struct *tsk = current; 1750 struct task_struct *tsk = current;
1750 struct cfq_queue *cfqq; 1751 struct cfq_queue *cfqq;
1751 1752
1752 /* 1753 /*
1753 * don't force setup of a queue from here, as a call to may_queue 1754 * don't force setup of a queue from here, as a call to may_queue
1754 * does not necessarily imply that a request actually will be queued. 1755 * does not necessarily imply that a request actually will be queued.
1755 * so just lookup a possibly existing queue, or return 'may queue' 1756 * so just lookup a possibly existing queue, or return 'may queue'
1756 * if that fails 1757 * if that fails
1757 */ 1758 */
1758 cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio); 1759 cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio);
1759 if (cfqq) { 1760 if (cfqq) {
1760 cfq_init_prio_data(cfqq); 1761 cfq_init_prio_data(cfqq);
1761 cfq_prio_boost(cfqq); 1762 cfq_prio_boost(cfqq);
1762 1763
1763 return __cfq_may_queue(cfqq); 1764 return __cfq_may_queue(cfqq);
1764 } 1765 }
1765 1766
1766 return ELV_MQUEUE_MAY; 1767 return ELV_MQUEUE_MAY;
1767 } 1768 }
1768 1769
1769 /* 1770 /*
1770 * queue lock held here 1771 * queue lock held here
1771 */ 1772 */
1772 static void cfq_put_request(request_queue_t *q, struct request *rq) 1773 static void cfq_put_request(request_queue_t *q, struct request *rq)
1773 { 1774 {
1774 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1775 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1775 1776
1776 if (cfqq) { 1777 if (cfqq) {
1777 const int rw = rq_data_dir(rq); 1778 const int rw = rq_data_dir(rq);
1778 1779
1779 BUG_ON(!cfqq->allocated[rw]); 1780 BUG_ON(!cfqq->allocated[rw]);
1780 cfqq->allocated[rw]--; 1781 cfqq->allocated[rw]--;
1781 1782
1782 put_io_context(RQ_CIC(rq)->ioc); 1783 put_io_context(RQ_CIC(rq)->ioc);
1783 1784
1784 rq->elevator_private = NULL; 1785 rq->elevator_private = NULL;
1785 rq->elevator_private2 = NULL; 1786 rq->elevator_private2 = NULL;
1786 1787
1787 cfq_put_queue(cfqq); 1788 cfq_put_queue(cfqq);
1788 } 1789 }
1789 } 1790 }
1790 1791
1791 /* 1792 /*
1792 * Allocate cfq data structures associated with this request. 1793 * Allocate cfq data structures associated with this request.
1793 */ 1794 */
1794 static int 1795 static int
1795 cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) 1796 cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
1796 { 1797 {
1797 struct cfq_data *cfqd = q->elevator->elevator_data; 1798 struct cfq_data *cfqd = q->elevator->elevator_data;
1798 struct task_struct *tsk = current; 1799 struct task_struct *tsk = current;
1799 struct cfq_io_context *cic; 1800 struct cfq_io_context *cic;
1800 const int rw = rq_data_dir(rq); 1801 const int rw = rq_data_dir(rq);
1801 pid_t key = cfq_queue_pid(tsk, rw); 1802 pid_t key = cfq_queue_pid(tsk, rw);
1802 struct cfq_queue *cfqq; 1803 struct cfq_queue *cfqq;
1803 unsigned long flags; 1804 unsigned long flags;
1804 int is_sync = key != CFQ_KEY_ASYNC; 1805 int is_sync = key != CFQ_KEY_ASYNC;
1805 1806
1806 might_sleep_if(gfp_mask & __GFP_WAIT); 1807 might_sleep_if(gfp_mask & __GFP_WAIT);
1807 1808
1808 cic = cfq_get_io_context(cfqd, gfp_mask); 1809 cic = cfq_get_io_context(cfqd, gfp_mask);
1809 1810
1810 spin_lock_irqsave(q->queue_lock, flags); 1811 spin_lock_irqsave(q->queue_lock, flags);
1811 1812
1812 if (!cic) 1813 if (!cic)
1813 goto queue_fail; 1814 goto queue_fail;
1814 1815
1815 if (!cic->cfqq[is_sync]) { 1816 if (!cic->cfqq[is_sync]) {
1816 cfqq = cfq_get_queue(cfqd, key, tsk, gfp_mask); 1817 cfqq = cfq_get_queue(cfqd, key, tsk, gfp_mask);
1817 if (!cfqq) 1818 if (!cfqq)
1818 goto queue_fail; 1819 goto queue_fail;
1819 1820
1820 cic->cfqq[is_sync] = cfqq; 1821 cic->cfqq[is_sync] = cfqq;
1821 } else 1822 } else
1822 cfqq = cic->cfqq[is_sync]; 1823 cfqq = cic->cfqq[is_sync];
1823 1824
1824 cfqq->allocated[rw]++; 1825 cfqq->allocated[rw]++;
1825 cfq_clear_cfqq_must_alloc(cfqq); 1826 cfq_clear_cfqq_must_alloc(cfqq);
1826 atomic_inc(&cfqq->ref); 1827 atomic_inc(&cfqq->ref);
1827 1828
1828 spin_unlock_irqrestore(q->queue_lock, flags); 1829 spin_unlock_irqrestore(q->queue_lock, flags);
1829 1830
1830 rq->elevator_private = cic; 1831 rq->elevator_private = cic;
1831 rq->elevator_private2 = cfqq; 1832 rq->elevator_private2 = cfqq;
1832 return 0; 1833 return 0;
1833 1834
1834 queue_fail: 1835 queue_fail:
1835 if (cic) 1836 if (cic)
1836 put_io_context(cic->ioc); 1837 put_io_context(cic->ioc);
1837 1838
1838 cfq_schedule_dispatch(cfqd); 1839 cfq_schedule_dispatch(cfqd);
1839 spin_unlock_irqrestore(q->queue_lock, flags); 1840 spin_unlock_irqrestore(q->queue_lock, flags);
1840 return 1; 1841 return 1;
1841 } 1842 }
1842 1843
1843 static void cfq_kick_queue(void *data) 1844 static void cfq_kick_queue(void *data)
1844 { 1845 {
1845 request_queue_t *q = data; 1846 request_queue_t *q = data;
1846 unsigned long flags; 1847 unsigned long flags;
1847 1848
1848 spin_lock_irqsave(q->queue_lock, flags); 1849 spin_lock_irqsave(q->queue_lock, flags);
1849 blk_start_queueing(q); 1850 blk_start_queueing(q);
1850 spin_unlock_irqrestore(q->queue_lock, flags); 1851 spin_unlock_irqrestore(q->queue_lock, flags);
1851 } 1852 }
1852 1853
1853 /* 1854 /*
1854 * Timer running if the active_queue is currently idling inside its time slice 1855 * Timer running if the active_queue is currently idling inside its time slice
1855 */ 1856 */
1856 static void cfq_idle_slice_timer(unsigned long data) 1857 static void cfq_idle_slice_timer(unsigned long data)
1857 { 1858 {
1858 struct cfq_data *cfqd = (struct cfq_data *) data; 1859 struct cfq_data *cfqd = (struct cfq_data *) data;
1859 struct cfq_queue *cfqq; 1860 struct cfq_queue *cfqq;
1860 unsigned long flags; 1861 unsigned long flags;
1861 1862
1862 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1863 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1863 1864
1864 if ((cfqq = cfqd->active_queue) != NULL) { 1865 if ((cfqq = cfqd->active_queue) != NULL) {
1865 unsigned long now = jiffies; 1866 unsigned long now = jiffies;
1866 1867
1867 /* 1868 /*
1868 * expired 1869 * expired
1869 */ 1870 */
1870 if (time_after(now, cfqq->slice_end)) 1871 if (time_after(now, cfqq->slice_end))
1871 goto expire; 1872 goto expire;
1872 1873
1873 /* 1874 /*
1874 * only expire and reinvoke request handler, if there are 1875 * only expire and reinvoke request handler, if there are
1875 * other queues with pending requests 1876 * other queues with pending requests
1876 */ 1877 */
1877 if (!cfqd->busy_queues) 1878 if (!cfqd->busy_queues)
1878 goto out_cont; 1879 goto out_cont;
1879 1880
1880 /* 1881 /*
1881 * not expired and it has a request pending, let it dispatch 1882 * not expired and it has a request pending, let it dispatch
1882 */ 1883 */
1883 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) { 1884 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) {
1884 cfq_mark_cfqq_must_dispatch(cfqq); 1885 cfq_mark_cfqq_must_dispatch(cfqq);
1885 goto out_kick; 1886 goto out_kick;
1886 } 1887 }
1887 } 1888 }
1888 expire: 1889 expire:
1889 cfq_slice_expired(cfqd, 0); 1890 cfq_slice_expired(cfqd, 0);
1890 out_kick: 1891 out_kick:
1891 cfq_schedule_dispatch(cfqd); 1892 cfq_schedule_dispatch(cfqd);
1892 out_cont: 1893 out_cont:
1893 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 1894 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1894 } 1895 }
1895 1896
1896 /* 1897 /*
1897 * Timer running if an idle class queue is waiting for service 1898 * Timer running if an idle class queue is waiting for service
1898 */ 1899 */
1899 static void cfq_idle_class_timer(unsigned long data) 1900 static void cfq_idle_class_timer(unsigned long data)
1900 { 1901 {
1901 struct cfq_data *cfqd = (struct cfq_data *) data; 1902 struct cfq_data *cfqd = (struct cfq_data *) data;
1902 unsigned long flags, end; 1903 unsigned long flags, end;
1903 1904
1904 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1905 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1905 1906
1906 /* 1907 /*
1907 * race with a non-idle queue, reset timer 1908 * race with a non-idle queue, reset timer
1908 */ 1909 */
1909 end = cfqd->last_end_request + CFQ_IDLE_GRACE; 1910 end = cfqd->last_end_request + CFQ_IDLE_GRACE;
1910 if (!time_after_eq(jiffies, end)) 1911 if (!time_after_eq(jiffies, end))
1911 mod_timer(&cfqd->idle_class_timer, end); 1912 mod_timer(&cfqd->idle_class_timer, end);
1912 else 1913 else
1913 cfq_schedule_dispatch(cfqd); 1914 cfq_schedule_dispatch(cfqd);
1914 1915
1915 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 1916 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1916 } 1917 }
1917 1918
1918 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 1919 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
1919 { 1920 {
1920 del_timer_sync(&cfqd->idle_slice_timer); 1921 del_timer_sync(&cfqd->idle_slice_timer);
1921 del_timer_sync(&cfqd->idle_class_timer); 1922 del_timer_sync(&cfqd->idle_class_timer);
1922 blk_sync_queue(cfqd->queue); 1923 blk_sync_queue(cfqd->queue);
1923 } 1924 }
1924 1925
1925 static void cfq_exit_queue(elevator_t *e) 1926 static void cfq_exit_queue(elevator_t *e)
1926 { 1927 {
1927 struct cfq_data *cfqd = e->elevator_data; 1928 struct cfq_data *cfqd = e->elevator_data;
1928 request_queue_t *q = cfqd->queue; 1929 request_queue_t *q = cfqd->queue;
1929 1930
1930 cfq_shutdown_timer_wq(cfqd); 1931 cfq_shutdown_timer_wq(cfqd);
1931 1932
1932 spin_lock_irq(q->queue_lock); 1933 spin_lock_irq(q->queue_lock);
1933 1934
1934 if (cfqd->active_queue) 1935 if (cfqd->active_queue)
1935 __cfq_slice_expired(cfqd, cfqd->active_queue, 0); 1936 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
1936 1937
1937 while (!list_empty(&cfqd->cic_list)) { 1938 while (!list_empty(&cfqd->cic_list)) {
1938 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 1939 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
1939 struct cfq_io_context, 1940 struct cfq_io_context,
1940 queue_list); 1941 queue_list);
1941 1942
1942 __cfq_exit_single_io_context(cfqd, cic); 1943 __cfq_exit_single_io_context(cfqd, cic);
1943 } 1944 }
1944 1945
1945 spin_unlock_irq(q->queue_lock); 1946 spin_unlock_irq(q->queue_lock);
1946 1947
1947 cfq_shutdown_timer_wq(cfqd); 1948 cfq_shutdown_timer_wq(cfqd);
1948 1949
1949 kfree(cfqd->cfq_hash); 1950 kfree(cfqd->cfq_hash);
1950 kfree(cfqd); 1951 kfree(cfqd);
1951 } 1952 }
1952 1953
1953 static void *cfq_init_queue(request_queue_t *q, elevator_t *e) 1954 static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
1954 { 1955 {
1955 struct cfq_data *cfqd; 1956 struct cfq_data *cfqd;
1956 int i; 1957 int i;
1957 1958
1958 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node); 1959 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
1959 if (!cfqd) 1960 if (!cfqd)
1960 return NULL; 1961 return NULL;
1961 1962
1962 memset(cfqd, 0, sizeof(*cfqd)); 1963 memset(cfqd, 0, sizeof(*cfqd));
1963 1964
1964 for (i = 0; i < CFQ_PRIO_LISTS; i++) 1965 for (i = 0; i < CFQ_PRIO_LISTS; i++)
1965 INIT_LIST_HEAD(&cfqd->rr_list[i]); 1966 INIT_LIST_HEAD(&cfqd->rr_list[i]);
1966 1967
1967 INIT_LIST_HEAD(&cfqd->busy_rr); 1968 INIT_LIST_HEAD(&cfqd->busy_rr);
1968 INIT_LIST_HEAD(&cfqd->cur_rr); 1969 INIT_LIST_HEAD(&cfqd->cur_rr);
1969 INIT_LIST_HEAD(&cfqd->idle_rr); 1970 INIT_LIST_HEAD(&cfqd->idle_rr);
1970 INIT_LIST_HEAD(&cfqd->cic_list); 1971 INIT_LIST_HEAD(&cfqd->cic_list);
1971 1972
1972 cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node); 1973 cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node);
1973 if (!cfqd->cfq_hash) 1974 if (!cfqd->cfq_hash)
1974 goto out_free; 1975 goto out_free;
1975 1976
1976 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 1977 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
1977 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 1978 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
1978 1979
1979 cfqd->queue = q; 1980 cfqd->queue = q;
1980 1981
1981 init_timer(&cfqd->idle_slice_timer); 1982 init_timer(&cfqd->idle_slice_timer);
1982 cfqd->idle_slice_timer.function = cfq_idle_slice_timer; 1983 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
1983 cfqd->idle_slice_timer.data = (unsigned long) cfqd; 1984 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
1984 1985
1985 init_timer(&cfqd->idle_class_timer); 1986 init_timer(&cfqd->idle_class_timer);
1986 cfqd->idle_class_timer.function = cfq_idle_class_timer; 1987 cfqd->idle_class_timer.function = cfq_idle_class_timer;
1987 cfqd->idle_class_timer.data = (unsigned long) cfqd; 1988 cfqd->idle_class_timer.data = (unsigned long) cfqd;
1988 1989
1989 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); 1990 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
1990 1991
1991 cfqd->cfq_quantum = cfq_quantum; 1992 cfqd->cfq_quantum = cfq_quantum;
1992 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 1993 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
1993 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 1994 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
1994 cfqd->cfq_back_max = cfq_back_max; 1995 cfqd->cfq_back_max = cfq_back_max;
1995 cfqd->cfq_back_penalty = cfq_back_penalty; 1996 cfqd->cfq_back_penalty = cfq_back_penalty;
1996 cfqd->cfq_slice[0] = cfq_slice_async; 1997 cfqd->cfq_slice[0] = cfq_slice_async;
1997 cfqd->cfq_slice[1] = cfq_slice_sync; 1998 cfqd->cfq_slice[1] = cfq_slice_sync;
1998 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 1999 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
1999 cfqd->cfq_slice_idle = cfq_slice_idle; 2000 cfqd->cfq_slice_idle = cfq_slice_idle;
2000 2001
2001 return cfqd; 2002 return cfqd;
2002 out_free: 2003 out_free:
2003 kfree(cfqd); 2004 kfree(cfqd);
2004 return NULL; 2005 return NULL;
2005 } 2006 }
2006 2007
2007 static void cfq_slab_kill(void) 2008 static void cfq_slab_kill(void)
2008 { 2009 {
2009 if (cfq_pool) 2010 if (cfq_pool)
2010 kmem_cache_destroy(cfq_pool); 2011 kmem_cache_destroy(cfq_pool);
2011 if (cfq_ioc_pool) 2012 if (cfq_ioc_pool)
2012 kmem_cache_destroy(cfq_ioc_pool); 2013 kmem_cache_destroy(cfq_ioc_pool);
2013 } 2014 }
2014 2015
2015 static int __init cfq_slab_setup(void) 2016 static int __init cfq_slab_setup(void)
2016 { 2017 {
2017 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, 2018 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
2018 NULL, NULL); 2019 NULL, NULL);
2019 if (!cfq_pool) 2020 if (!cfq_pool)
2020 goto fail; 2021 goto fail;
2021 2022
2022 cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool", 2023 cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool",
2023 sizeof(struct cfq_io_context), 0, 0, NULL, NULL); 2024 sizeof(struct cfq_io_context), 0, 0, NULL, NULL);
2024 if (!cfq_ioc_pool) 2025 if (!cfq_ioc_pool)
2025 goto fail; 2026 goto fail;
2026 2027
2027 return 0; 2028 return 0;
2028 fail: 2029 fail:
2029 cfq_slab_kill(); 2030 cfq_slab_kill();
2030 return -ENOMEM; 2031 return -ENOMEM;
2031 } 2032 }
2032 2033
2033 /* 2034 /*
2034 * sysfs parts below --> 2035 * sysfs parts below -->
2035 */ 2036 */
2036 2037
2037 static ssize_t 2038 static ssize_t
2038 cfq_var_show(unsigned int var, char *page) 2039 cfq_var_show(unsigned int var, char *page)
2039 { 2040 {
2040 return sprintf(page, "%d\n", var); 2041 return sprintf(page, "%d\n", var);
2041 } 2042 }
2042 2043
2043 static ssize_t 2044 static ssize_t
2044 cfq_var_store(unsigned int *var, const char *page, size_t count) 2045 cfq_var_store(unsigned int *var, const char *page, size_t count)
2045 { 2046 {
2046 char *p = (char *) page; 2047 char *p = (char *) page;
2047 2048
2048 *var = simple_strtoul(p, &p, 10); 2049 *var = simple_strtoul(p, &p, 10);
2049 return count; 2050 return count;
2050 } 2051 }
2051 2052
2052 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 2053 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
2053 static ssize_t __FUNC(elevator_t *e, char *page) \ 2054 static ssize_t __FUNC(elevator_t *e, char *page) \
2054 { \ 2055 { \
2055 struct cfq_data *cfqd = e->elevator_data; \ 2056 struct cfq_data *cfqd = e->elevator_data; \
2056 unsigned int __data = __VAR; \ 2057 unsigned int __data = __VAR; \
2057 if (__CONV) \ 2058 if (__CONV) \
2058 __data = jiffies_to_msecs(__data); \ 2059 __data = jiffies_to_msecs(__data); \
2059 return cfq_var_show(__data, (page)); \ 2060 return cfq_var_show(__data, (page)); \
2060 } 2061 }
2061 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 2062 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
2062 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 2063 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
2063 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 2064 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
2064 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); 2065 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
2065 SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); 2066 SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
2066 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); 2067 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
2067 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 2068 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
2068 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 2069 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
2069 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 2070 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
2070 #undef SHOW_FUNCTION 2071 #undef SHOW_FUNCTION
2071 2072
2072 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 2073 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
2073 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 2074 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
2074 { \ 2075 { \
2075 struct cfq_data *cfqd = e->elevator_data; \ 2076 struct cfq_data *cfqd = e->elevator_data; \
2076 unsigned int __data; \ 2077 unsigned int __data; \
2077 int ret = cfq_var_store(&__data, (page), count); \ 2078 int ret = cfq_var_store(&__data, (page), count); \
2078 if (__data < (MIN)) \ 2079 if (__data < (MIN)) \
2079 __data = (MIN); \ 2080 __data = (MIN); \
2080 else if (__data > (MAX)) \ 2081 else if (__data > (MAX)) \
2081 __data = (MAX); \ 2082 __data = (MAX); \
2082 if (__CONV) \ 2083 if (__CONV) \
2083 *(__PTR) = msecs_to_jiffies(__data); \ 2084 *(__PTR) = msecs_to_jiffies(__data); \
2084 else \ 2085 else \
2085 *(__PTR) = __data; \ 2086 *(__PTR) = __data; \
2086 return ret; \ 2087 return ret; \
2087 } 2088 }
2088 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 2089 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
2089 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); 2090 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
2090 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); 2091 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
2091 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 2092 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
2092 STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); 2093 STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
2093 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); 2094 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
2094 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 2095 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
2095 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 2096 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
2096 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); 2097 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
2097 #undef STORE_FUNCTION 2098 #undef STORE_FUNCTION
2098 2099
2099 #define CFQ_ATTR(name) \ 2100 #define CFQ_ATTR(name) \
2100 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) 2101 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
2101 2102
2102 static struct elv_fs_entry cfq_attrs[] = { 2103 static struct elv_fs_entry cfq_attrs[] = {
2103 CFQ_ATTR(quantum), 2104 CFQ_ATTR(quantum),
2104 CFQ_ATTR(fifo_expire_sync), 2105 CFQ_ATTR(fifo_expire_sync),
2105 CFQ_ATTR(fifo_expire_async), 2106 CFQ_ATTR(fifo_expire_async),
2106 CFQ_ATTR(back_seek_max), 2107 CFQ_ATTR(back_seek_max),
2107 CFQ_ATTR(back_seek_penalty), 2108 CFQ_ATTR(back_seek_penalty),
2108 CFQ_ATTR(slice_sync), 2109 CFQ_ATTR(slice_sync),
2109 CFQ_ATTR(slice_async), 2110 CFQ_ATTR(slice_async),
2110 CFQ_ATTR(slice_async_rq), 2111 CFQ_ATTR(slice_async_rq),
2111 CFQ_ATTR(slice_idle), 2112 CFQ_ATTR(slice_idle),
2112 __ATTR_NULL 2113 __ATTR_NULL
2113 }; 2114 };
2114 2115
2115 static struct elevator_type iosched_cfq = { 2116 static struct elevator_type iosched_cfq = {
2116 .ops = { 2117 .ops = {
2117 .elevator_merge_fn = cfq_merge, 2118 .elevator_merge_fn = cfq_merge,
2118 .elevator_merged_fn = cfq_merged_request, 2119 .elevator_merged_fn = cfq_merged_request,
2119 .elevator_merge_req_fn = cfq_merged_requests, 2120 .elevator_merge_req_fn = cfq_merged_requests,
2120 .elevator_dispatch_fn = cfq_dispatch_requests, 2121 .elevator_dispatch_fn = cfq_dispatch_requests,
2121 .elevator_add_req_fn = cfq_insert_request, 2122 .elevator_add_req_fn = cfq_insert_request,
2122 .elevator_activate_req_fn = cfq_activate_request, 2123 .elevator_activate_req_fn = cfq_activate_request,
2123 .elevator_deactivate_req_fn = cfq_deactivate_request, 2124 .elevator_deactivate_req_fn = cfq_deactivate_request,
2124 .elevator_queue_empty_fn = cfq_queue_empty, 2125 .elevator_queue_empty_fn = cfq_queue_empty,
2125 .elevator_completed_req_fn = cfq_completed_request, 2126 .elevator_completed_req_fn = cfq_completed_request,
2126 .elevator_former_req_fn = elv_rb_former_request, 2127 .elevator_former_req_fn = elv_rb_former_request,
2127 .elevator_latter_req_fn = elv_rb_latter_request, 2128 .elevator_latter_req_fn = elv_rb_latter_request,
2128 .elevator_set_req_fn = cfq_set_request, 2129 .elevator_set_req_fn = cfq_set_request,
2129 .elevator_put_req_fn = cfq_put_request, 2130 .elevator_put_req_fn = cfq_put_request,
2130 .elevator_may_queue_fn = cfq_may_queue, 2131 .elevator_may_queue_fn = cfq_may_queue,
2131 .elevator_init_fn = cfq_init_queue, 2132 .elevator_init_fn = cfq_init_queue,
2132 .elevator_exit_fn = cfq_exit_queue, 2133 .elevator_exit_fn = cfq_exit_queue,
2133 .trim = cfq_free_io_context, 2134 .trim = cfq_free_io_context,
2134 }, 2135 },
2135 .elevator_attrs = cfq_attrs, 2136 .elevator_attrs = cfq_attrs,
2136 .elevator_name = "cfq", 2137 .elevator_name = "cfq",
2137 .elevator_owner = THIS_MODULE, 2138 .elevator_owner = THIS_MODULE,
2138 }; 2139 };
2139 2140
2140 static int __init cfq_init(void) 2141 static int __init cfq_init(void)
2141 { 2142 {
2142 int ret; 2143 int ret;
2143 2144
2144 /* 2145 /*
2145 * could be 0 on HZ < 1000 setups 2146 * could be 0 on HZ < 1000 setups
2146 */ 2147 */
2147 if (!cfq_slice_async) 2148 if (!cfq_slice_async)
2148 cfq_slice_async = 1; 2149 cfq_slice_async = 1;
2149 if (!cfq_slice_idle) 2150 if (!cfq_slice_idle)
2150 cfq_slice_idle = 1; 2151 cfq_slice_idle = 1;
2151 2152
2152 if (cfq_slab_setup()) 2153 if (cfq_slab_setup())
2153 return -ENOMEM; 2154 return -ENOMEM;
2154 2155
2155 ret = elv_register(&iosched_cfq); 2156 ret = elv_register(&iosched_cfq);
2156 if (ret) 2157 if (ret)
2157 cfq_slab_kill(); 2158 cfq_slab_kill();
2158 2159
2159 return ret; 2160 return ret;
2160 } 2161 }
2161 2162
2162 static void __exit cfq_exit(void) 2163 static void __exit cfq_exit(void)
2163 { 2164 {
2164 DECLARE_COMPLETION_ONSTACK(all_gone); 2165 DECLARE_COMPLETION_ONSTACK(all_gone);
2165 elv_unregister(&iosched_cfq); 2166 elv_unregister(&iosched_cfq);
2166 ioc_gone = &all_gone; 2167 ioc_gone = &all_gone;
2167 /* ioc_gone's update must be visible before reading ioc_count */ 2168 /* ioc_gone's update must be visible before reading ioc_count */
2168 smp_wmb(); 2169 smp_wmb();
2169 if (elv_ioc_count_read(ioc_count)) 2170 if (elv_ioc_count_read(ioc_count))
2170 wait_for_completion(ioc_gone); 2171 wait_for_completion(ioc_gone);
2171 synchronize_rcu(); 2172 synchronize_rcu();
2172 cfq_slab_kill(); 2173 cfq_slab_kill();
2173 } 2174 }
2174 2175
2175 module_init(cfq_init); 2176 module_init(cfq_init);
2176 module_exit(cfq_exit); 2177 module_exit(cfq_exit);
2177 2178
2178 MODULE_AUTHOR("Jens Axboe"); 2179 MODULE_AUTHOR("Jens Axboe");
2179 MODULE_LICENSE("GPL"); 2180 MODULE_LICENSE("GPL");
2180 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); 2181 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");
2181 2182