Commit 0871714e08fed7ba66cadad11b2e4f85a9dc9b96

Authored by Jens Axboe
1 parent fadad878cc

cfq-iosched: relax IOPRIO_CLASS_IDLE restrictions

Currently you must be root to set idle io prio class on a process. This
is due to the fact that the idle class is implemented as a true idle
class, meaning that it will not make progress if someone else is
requesting disk access. Unfortunately this means that it opens DOS
opportunities by locking down file system resources, hence it is root
only at the moment.

This patch relaxes the idle class a little, by removing the truly idle
part (which entals a grace period with associated timer). The
modifications make the idle class as close to zero impact as can be done
while still guarenteeing progress. This means we can relax the root only
criteria as well.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 2 changed files with 34 additions and 85 deletions Inline Diff

1 /* 1 /*
2 * CFQ, or complete fairness queueing, disk scheduler. 2 * CFQ, or complete fairness queueing, disk scheduler.
3 * 3 *
4 * Based on ideas from a previously unfinished io 4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 * 6 *
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 */ 8 */
9 #include <linux/module.h> 9 #include <linux/module.h>
10 #include <linux/blkdev.h> 10 #include <linux/blkdev.h>
11 #include <linux/elevator.h> 11 #include <linux/elevator.h>
12 #include <linux/rbtree.h> 12 #include <linux/rbtree.h>
13 #include <linux/ioprio.h> 13 #include <linux/ioprio.h>
14 14
15 /* 15 /*
16 * tunables 16 * tunables
17 */ 17 */
18 static const int cfq_quantum = 4; /* max queue in one round of service */ 18 static const int cfq_quantum = 4; /* max queue in one round of service */
19 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 19 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
20 static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 20 static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
21 static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ 21 static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
22 22
23 static const int cfq_slice_sync = HZ / 10; 23 static const int cfq_slice_sync = HZ / 10;
24 static int cfq_slice_async = HZ / 25; 24 static int cfq_slice_async = HZ / 25;
25 static const int cfq_slice_async_rq = 2; 25 static const int cfq_slice_async_rq = 2;
26 static int cfq_slice_idle = HZ / 125; 26 static int cfq_slice_idle = HZ / 125;
27 27
28 /* 28 /*
29 * grace period before allowing idle class to get disk access 29 * offset from end of service tree
30 */ 30 */
31 #define CFQ_IDLE_GRACE (HZ / 10) 31 #define CFQ_IDLE_DELAY (HZ / 5)
32 32
33 /* 33 /*
34 * below this threshold, we consider thinktime immediate 34 * below this threshold, we consider thinktime immediate
35 */ 35 */
36 #define CFQ_MIN_TT (2) 36 #define CFQ_MIN_TT (2)
37 37
38 #define CFQ_SLICE_SCALE (5) 38 #define CFQ_SLICE_SCALE (5)
39 39
40 #define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private) 40 #define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private)
41 #define RQ_CFQQ(rq) ((rq)->elevator_private2) 41 #define RQ_CFQQ(rq) ((rq)->elevator_private2)
42 42
43 static struct kmem_cache *cfq_pool; 43 static struct kmem_cache *cfq_pool;
44 static struct kmem_cache *cfq_ioc_pool; 44 static struct kmem_cache *cfq_ioc_pool;
45 45
46 static DEFINE_PER_CPU(unsigned long, ioc_count); 46 static DEFINE_PER_CPU(unsigned long, ioc_count);
47 static struct completion *ioc_gone; 47 static struct completion *ioc_gone;
48 48
49 #define CFQ_PRIO_LISTS IOPRIO_BE_NR 49 #define CFQ_PRIO_LISTS IOPRIO_BE_NR
50 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 50 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
51 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 51 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
52 52
53 #define ASYNC (0) 53 #define ASYNC (0)
54 #define SYNC (1) 54 #define SYNC (1)
55 55
56 #define sample_valid(samples) ((samples) > 80) 56 #define sample_valid(samples) ((samples) > 80)
57 57
58 /* 58 /*
59 * Most of our rbtree usage is for sorting with min extraction, so 59 * Most of our rbtree usage is for sorting with min extraction, so
60 * if we cache the leftmost node we don't have to walk down the tree 60 * if we cache the leftmost node we don't have to walk down the tree
61 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should 61 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
62 * move this into the elevator for the rq sorting as well. 62 * move this into the elevator for the rq sorting as well.
63 */ 63 */
64 struct cfq_rb_root { 64 struct cfq_rb_root {
65 struct rb_root rb; 65 struct rb_root rb;
66 struct rb_node *left; 66 struct rb_node *left;
67 }; 67 };
68 #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } 68 #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }
69 69
70 /* 70 /*
71 * Per block device queue structure 71 * Per block device queue structure
72 */ 72 */
73 struct cfq_data { 73 struct cfq_data {
74 struct request_queue *queue; 74 struct request_queue *queue;
75 75
76 /* 76 /*
77 * rr list of queues with requests and the count of them 77 * rr list of queues with requests and the count of them
78 */ 78 */
79 struct cfq_rb_root service_tree; 79 struct cfq_rb_root service_tree;
80 unsigned int busy_queues; 80 unsigned int busy_queues;
81 81
82 int rq_in_driver; 82 int rq_in_driver;
83 int sync_flight; 83 int sync_flight;
84 int hw_tag; 84 int hw_tag;
85 85
86 /* 86 /*
87 * idle window management 87 * idle window management
88 */ 88 */
89 struct timer_list idle_slice_timer; 89 struct timer_list idle_slice_timer;
90 struct work_struct unplug_work; 90 struct work_struct unplug_work;
91 91
92 struct cfq_queue *active_queue; 92 struct cfq_queue *active_queue;
93 struct cfq_io_context *active_cic; 93 struct cfq_io_context *active_cic;
94 94
95 /* 95 /*
96 * async queue for each priority case 96 * async queue for each priority case
97 */ 97 */
98 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; 98 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
99 struct cfq_queue *async_idle_cfqq; 99 struct cfq_queue *async_idle_cfqq;
100 100
101 struct timer_list idle_class_timer;
102
103 sector_t last_position; 101 sector_t last_position;
104 unsigned long last_end_request; 102 unsigned long last_end_request;
105 103
106 /* 104 /*
107 * tunables, see top of file 105 * tunables, see top of file
108 */ 106 */
109 unsigned int cfq_quantum; 107 unsigned int cfq_quantum;
110 unsigned int cfq_fifo_expire[2]; 108 unsigned int cfq_fifo_expire[2];
111 unsigned int cfq_back_penalty; 109 unsigned int cfq_back_penalty;
112 unsigned int cfq_back_max; 110 unsigned int cfq_back_max;
113 unsigned int cfq_slice[2]; 111 unsigned int cfq_slice[2];
114 unsigned int cfq_slice_async_rq; 112 unsigned int cfq_slice_async_rq;
115 unsigned int cfq_slice_idle; 113 unsigned int cfq_slice_idle;
116 114
117 struct list_head cic_list; 115 struct list_head cic_list;
118 }; 116 };
119 117
120 /* 118 /*
121 * Per process-grouping structure 119 * Per process-grouping structure
122 */ 120 */
123 struct cfq_queue { 121 struct cfq_queue {
124 /* reference count */ 122 /* reference count */
125 atomic_t ref; 123 atomic_t ref;
126 /* parent cfq_data */ 124 /* parent cfq_data */
127 struct cfq_data *cfqd; 125 struct cfq_data *cfqd;
128 /* service_tree member */ 126 /* service_tree member */
129 struct rb_node rb_node; 127 struct rb_node rb_node;
130 /* service_tree key */ 128 /* service_tree key */
131 unsigned long rb_key; 129 unsigned long rb_key;
132 /* sorted list of pending requests */ 130 /* sorted list of pending requests */
133 struct rb_root sort_list; 131 struct rb_root sort_list;
134 /* if fifo isn't expired, next request to serve */ 132 /* if fifo isn't expired, next request to serve */
135 struct request *next_rq; 133 struct request *next_rq;
136 /* requests queued in sort_list */ 134 /* requests queued in sort_list */
137 int queued[2]; 135 int queued[2];
138 /* currently allocated requests */ 136 /* currently allocated requests */
139 int allocated[2]; 137 int allocated[2];
140 /* pending metadata requests */ 138 /* pending metadata requests */
141 int meta_pending; 139 int meta_pending;
142 /* fifo list of requests in sort_list */ 140 /* fifo list of requests in sort_list */
143 struct list_head fifo; 141 struct list_head fifo;
144 142
145 unsigned long slice_end; 143 unsigned long slice_end;
146 long slice_resid; 144 long slice_resid;
147 145
148 /* number of requests that are on the dispatch list or inside driver */ 146 /* number of requests that are on the dispatch list or inside driver */
149 int dispatched; 147 int dispatched;
150 148
151 /* io prio of this group */ 149 /* io prio of this group */
152 unsigned short ioprio, org_ioprio; 150 unsigned short ioprio, org_ioprio;
153 unsigned short ioprio_class, org_ioprio_class; 151 unsigned short ioprio_class, org_ioprio_class;
154 152
155 /* various state flags, see below */ 153 /* various state flags, see below */
156 unsigned int flags; 154 unsigned int flags;
157 }; 155 };
158 156
159 enum cfqq_state_flags { 157 enum cfqq_state_flags {
160 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ 158 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
161 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ 159 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
162 CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ 160 CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */
163 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ 161 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
164 CFQ_CFQQ_FLAG_must_dispatch, /* must dispatch, even if expired */ 162 CFQ_CFQQ_FLAG_must_dispatch, /* must dispatch, even if expired */
165 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ 163 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
166 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ 164 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
167 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ 165 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
168 CFQ_CFQQ_FLAG_queue_new, /* queue never been serviced */ 166 CFQ_CFQQ_FLAG_queue_new, /* queue never been serviced */
169 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ 167 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
170 CFQ_CFQQ_FLAG_sync, /* synchronous queue */ 168 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
171 }; 169 };
172 170
173 #define CFQ_CFQQ_FNS(name) \ 171 #define CFQ_CFQQ_FNS(name) \
174 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ 172 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
175 { \ 173 { \
176 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ 174 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
177 } \ 175 } \
178 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ 176 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
179 { \ 177 { \
180 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ 178 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
181 } \ 179 } \
182 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ 180 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
183 { \ 181 { \
184 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ 182 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
185 } 183 }
186 184
187 CFQ_CFQQ_FNS(on_rr); 185 CFQ_CFQQ_FNS(on_rr);
188 CFQ_CFQQ_FNS(wait_request); 186 CFQ_CFQQ_FNS(wait_request);
189 CFQ_CFQQ_FNS(must_alloc); 187 CFQ_CFQQ_FNS(must_alloc);
190 CFQ_CFQQ_FNS(must_alloc_slice); 188 CFQ_CFQQ_FNS(must_alloc_slice);
191 CFQ_CFQQ_FNS(must_dispatch); 189 CFQ_CFQQ_FNS(must_dispatch);
192 CFQ_CFQQ_FNS(fifo_expire); 190 CFQ_CFQQ_FNS(fifo_expire);
193 CFQ_CFQQ_FNS(idle_window); 191 CFQ_CFQQ_FNS(idle_window);
194 CFQ_CFQQ_FNS(prio_changed); 192 CFQ_CFQQ_FNS(prio_changed);
195 CFQ_CFQQ_FNS(queue_new); 193 CFQ_CFQQ_FNS(queue_new);
196 CFQ_CFQQ_FNS(slice_new); 194 CFQ_CFQQ_FNS(slice_new);
197 CFQ_CFQQ_FNS(sync); 195 CFQ_CFQQ_FNS(sync);
198 #undef CFQ_CFQQ_FNS 196 #undef CFQ_CFQQ_FNS
199 197
200 static void cfq_dispatch_insert(struct request_queue *, struct request *); 198 static void cfq_dispatch_insert(struct request_queue *, struct request *);
201 static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, 199 static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
202 struct io_context *, gfp_t); 200 struct io_context *, gfp_t);
203 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, 201 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
204 struct io_context *); 202 struct io_context *);
205 203
206 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, 204 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
207 int is_sync) 205 int is_sync)
208 { 206 {
209 return cic->cfqq[!!is_sync]; 207 return cic->cfqq[!!is_sync];
210 } 208 }
211 209
212 static inline void cic_set_cfqq(struct cfq_io_context *cic, 210 static inline void cic_set_cfqq(struct cfq_io_context *cic,
213 struct cfq_queue *cfqq, int is_sync) 211 struct cfq_queue *cfqq, int is_sync)
214 { 212 {
215 cic->cfqq[!!is_sync] = cfqq; 213 cic->cfqq[!!is_sync] = cfqq;
216 } 214 }
217 215
218 /* 216 /*
219 * We regard a request as SYNC, if it's either a read or has the SYNC bit 217 * We regard a request as SYNC, if it's either a read or has the SYNC bit
220 * set (in which case it could also be direct WRITE). 218 * set (in which case it could also be direct WRITE).
221 */ 219 */
222 static inline int cfq_bio_sync(struct bio *bio) 220 static inline int cfq_bio_sync(struct bio *bio)
223 { 221 {
224 if (bio_data_dir(bio) == READ || bio_sync(bio)) 222 if (bio_data_dir(bio) == READ || bio_sync(bio))
225 return 1; 223 return 1;
226 224
227 return 0; 225 return 0;
228 } 226 }
229 227
230 /* 228 /*
231 * scheduler run of queue, if there are requests pending and no one in the 229 * scheduler run of queue, if there are requests pending and no one in the
232 * driver that will restart queueing 230 * driver that will restart queueing
233 */ 231 */
234 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 232 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
235 { 233 {
236 if (cfqd->busy_queues) 234 if (cfqd->busy_queues)
237 kblockd_schedule_work(&cfqd->unplug_work); 235 kblockd_schedule_work(&cfqd->unplug_work);
238 } 236 }
239 237
240 static int cfq_queue_empty(struct request_queue *q) 238 static int cfq_queue_empty(struct request_queue *q)
241 { 239 {
242 struct cfq_data *cfqd = q->elevator->elevator_data; 240 struct cfq_data *cfqd = q->elevator->elevator_data;
243 241
244 return !cfqd->busy_queues; 242 return !cfqd->busy_queues;
245 } 243 }
246 244
247 /* 245 /*
248 * Scale schedule slice based on io priority. Use the sync time slice only 246 * Scale schedule slice based on io priority. Use the sync time slice only
249 * if a queue is marked sync and has sync io queued. A sync queue with async 247 * if a queue is marked sync and has sync io queued. A sync queue with async
250 * io only, should not get full sync slice length. 248 * io only, should not get full sync slice length.
251 */ 249 */
252 static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync, 250 static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync,
253 unsigned short prio) 251 unsigned short prio)
254 { 252 {
255 const int base_slice = cfqd->cfq_slice[sync]; 253 const int base_slice = cfqd->cfq_slice[sync];
256 254
257 WARN_ON(prio >= IOPRIO_BE_NR); 255 WARN_ON(prio >= IOPRIO_BE_NR);
258 256
259 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); 257 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
260 } 258 }
261 259
262 static inline int 260 static inline int
263 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 261 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
264 { 262 {
265 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); 263 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
266 } 264 }
267 265
268 static inline void 266 static inline void
269 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 267 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
270 { 268 {
271 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 269 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
272 } 270 }
273 271
274 /* 272 /*
275 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end 273 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
276 * isn't valid until the first request from the dispatch is activated 274 * isn't valid until the first request from the dispatch is activated
277 * and the slice time set. 275 * and the slice time set.
278 */ 276 */
279 static inline int cfq_slice_used(struct cfq_queue *cfqq) 277 static inline int cfq_slice_used(struct cfq_queue *cfqq)
280 { 278 {
281 if (cfq_cfqq_slice_new(cfqq)) 279 if (cfq_cfqq_slice_new(cfqq))
282 return 0; 280 return 0;
283 if (time_before(jiffies, cfqq->slice_end)) 281 if (time_before(jiffies, cfqq->slice_end))
284 return 0; 282 return 0;
285 283
286 return 1; 284 return 1;
287 } 285 }
288 286
289 /* 287 /*
290 * Lifted from AS - choose which of rq1 and rq2 that is best served now. 288 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
291 * We choose the request that is closest to the head right now. Distance 289 * We choose the request that is closest to the head right now. Distance
292 * behind the head is penalized and only allowed to a certain extent. 290 * behind the head is penalized and only allowed to a certain extent.
293 */ 291 */
294 static struct request * 292 static struct request *
295 cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2) 293 cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
296 { 294 {
297 sector_t last, s1, s2, d1 = 0, d2 = 0; 295 sector_t last, s1, s2, d1 = 0, d2 = 0;
298 unsigned long back_max; 296 unsigned long back_max;
299 #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ 297 #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */
300 #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ 298 #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
301 unsigned wrap = 0; /* bit mask: requests behind the disk head? */ 299 unsigned wrap = 0; /* bit mask: requests behind the disk head? */
302 300
303 if (rq1 == NULL || rq1 == rq2) 301 if (rq1 == NULL || rq1 == rq2)
304 return rq2; 302 return rq2;
305 if (rq2 == NULL) 303 if (rq2 == NULL)
306 return rq1; 304 return rq1;
307 305
308 if (rq_is_sync(rq1) && !rq_is_sync(rq2)) 306 if (rq_is_sync(rq1) && !rq_is_sync(rq2))
309 return rq1; 307 return rq1;
310 else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) 308 else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
311 return rq2; 309 return rq2;
312 if (rq_is_meta(rq1) && !rq_is_meta(rq2)) 310 if (rq_is_meta(rq1) && !rq_is_meta(rq2))
313 return rq1; 311 return rq1;
314 else if (rq_is_meta(rq2) && !rq_is_meta(rq1)) 312 else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
315 return rq2; 313 return rq2;
316 314
317 s1 = rq1->sector; 315 s1 = rq1->sector;
318 s2 = rq2->sector; 316 s2 = rq2->sector;
319 317
320 last = cfqd->last_position; 318 last = cfqd->last_position;
321 319
322 /* 320 /*
323 * by definition, 1KiB is 2 sectors 321 * by definition, 1KiB is 2 sectors
324 */ 322 */
325 back_max = cfqd->cfq_back_max * 2; 323 back_max = cfqd->cfq_back_max * 2;
326 324
327 /* 325 /*
328 * Strict one way elevator _except_ in the case where we allow 326 * Strict one way elevator _except_ in the case where we allow
329 * short backward seeks which are biased as twice the cost of a 327 * short backward seeks which are biased as twice the cost of a
330 * similar forward seek. 328 * similar forward seek.
331 */ 329 */
332 if (s1 >= last) 330 if (s1 >= last)
333 d1 = s1 - last; 331 d1 = s1 - last;
334 else if (s1 + back_max >= last) 332 else if (s1 + back_max >= last)
335 d1 = (last - s1) * cfqd->cfq_back_penalty; 333 d1 = (last - s1) * cfqd->cfq_back_penalty;
336 else 334 else
337 wrap |= CFQ_RQ1_WRAP; 335 wrap |= CFQ_RQ1_WRAP;
338 336
339 if (s2 >= last) 337 if (s2 >= last)
340 d2 = s2 - last; 338 d2 = s2 - last;
341 else if (s2 + back_max >= last) 339 else if (s2 + back_max >= last)
342 d2 = (last - s2) * cfqd->cfq_back_penalty; 340 d2 = (last - s2) * cfqd->cfq_back_penalty;
343 else 341 else
344 wrap |= CFQ_RQ2_WRAP; 342 wrap |= CFQ_RQ2_WRAP;
345 343
346 /* Found required data */ 344 /* Found required data */
347 345
348 /* 346 /*
349 * By doing switch() on the bit mask "wrap" we avoid having to 347 * By doing switch() on the bit mask "wrap" we avoid having to
350 * check two variables for all permutations: --> faster! 348 * check two variables for all permutations: --> faster!
351 */ 349 */
352 switch (wrap) { 350 switch (wrap) {
353 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ 351 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
354 if (d1 < d2) 352 if (d1 < d2)
355 return rq1; 353 return rq1;
356 else if (d2 < d1) 354 else if (d2 < d1)
357 return rq2; 355 return rq2;
358 else { 356 else {
359 if (s1 >= s2) 357 if (s1 >= s2)
360 return rq1; 358 return rq1;
361 else 359 else
362 return rq2; 360 return rq2;
363 } 361 }
364 362
365 case CFQ_RQ2_WRAP: 363 case CFQ_RQ2_WRAP:
366 return rq1; 364 return rq1;
367 case CFQ_RQ1_WRAP: 365 case CFQ_RQ1_WRAP:
368 return rq2; 366 return rq2;
369 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */ 367 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
370 default: 368 default:
371 /* 369 /*
372 * Since both rqs are wrapped, 370 * Since both rqs are wrapped,
373 * start with the one that's further behind head 371 * start with the one that's further behind head
374 * (--> only *one* back seek required), 372 * (--> only *one* back seek required),
375 * since back seek takes more time than forward. 373 * since back seek takes more time than forward.
376 */ 374 */
377 if (s1 <= s2) 375 if (s1 <= s2)
378 return rq1; 376 return rq1;
379 else 377 else
380 return rq2; 378 return rq2;
381 } 379 }
382 } 380 }
383 381
384 /* 382 /*
385 * The below is leftmost cache rbtree addon 383 * The below is leftmost cache rbtree addon
386 */ 384 */
387 static struct rb_node *cfq_rb_first(struct cfq_rb_root *root) 385 static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
388 { 386 {
389 if (!root->left) 387 if (!root->left)
390 root->left = rb_first(&root->rb); 388 root->left = rb_first(&root->rb);
391 389
392 return root->left; 390 if (root->left)
391 return rb_entry(root->left, struct cfq_queue, rb_node);
392
393 return NULL;
393 } 394 }
394 395
395 static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) 396 static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
396 { 397 {
397 if (root->left == n) 398 if (root->left == n)
398 root->left = NULL; 399 root->left = NULL;
399 400
400 rb_erase(n, &root->rb); 401 rb_erase(n, &root->rb);
401 RB_CLEAR_NODE(n); 402 RB_CLEAR_NODE(n);
402 } 403 }
403 404
404 /* 405 /*
405 * would be nice to take fifo expire time into account as well 406 * would be nice to take fifo expire time into account as well
406 */ 407 */
407 static struct request * 408 static struct request *
408 cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 409 cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
409 struct request *last) 410 struct request *last)
410 { 411 {
411 struct rb_node *rbnext = rb_next(&last->rb_node); 412 struct rb_node *rbnext = rb_next(&last->rb_node);
412 struct rb_node *rbprev = rb_prev(&last->rb_node); 413 struct rb_node *rbprev = rb_prev(&last->rb_node);
413 struct request *next = NULL, *prev = NULL; 414 struct request *next = NULL, *prev = NULL;
414 415
415 BUG_ON(RB_EMPTY_NODE(&last->rb_node)); 416 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
416 417
417 if (rbprev) 418 if (rbprev)
418 prev = rb_entry_rq(rbprev); 419 prev = rb_entry_rq(rbprev);
419 420
420 if (rbnext) 421 if (rbnext)
421 next = rb_entry_rq(rbnext); 422 next = rb_entry_rq(rbnext);
422 else { 423 else {
423 rbnext = rb_first(&cfqq->sort_list); 424 rbnext = rb_first(&cfqq->sort_list);
424 if (rbnext && rbnext != &last->rb_node) 425 if (rbnext && rbnext != &last->rb_node)
425 next = rb_entry_rq(rbnext); 426 next = rb_entry_rq(rbnext);
426 } 427 }
427 428
428 return cfq_choose_req(cfqd, next, prev); 429 return cfq_choose_req(cfqd, next, prev);
429 } 430 }
430 431
431 static unsigned long cfq_slice_offset(struct cfq_data *cfqd, 432 static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
432 struct cfq_queue *cfqq) 433 struct cfq_queue *cfqq)
433 { 434 {
434 /* 435 /*
435 * just an approximation, should be ok. 436 * just an approximation, should be ok.
436 */ 437 */
437 return (cfqd->busy_queues - 1) * (cfq_prio_slice(cfqd, 1, 0) - 438 return (cfqd->busy_queues - 1) * (cfq_prio_slice(cfqd, 1, 0) -
438 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio)); 439 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
439 } 440 }
440 441
441 /* 442 /*
442 * The cfqd->service_tree holds all pending cfq_queue's that have 443 * The cfqd->service_tree holds all pending cfq_queue's that have
443 * requests waiting to be processed. It is sorted in the order that 444 * requests waiting to be processed. It is sorted in the order that
444 * we will service the queues. 445 * we will service the queues.
445 */ 446 */
446 static void cfq_service_tree_add(struct cfq_data *cfqd, 447 static void cfq_service_tree_add(struct cfq_data *cfqd,
447 struct cfq_queue *cfqq, int add_front) 448 struct cfq_queue *cfqq, int add_front)
448 { 449 {
449 struct rb_node **p = &cfqd->service_tree.rb.rb_node; 450 struct rb_node **p, *parent;
450 struct rb_node *parent = NULL; 451 struct cfq_queue *__cfqq;
451 unsigned long rb_key; 452 unsigned long rb_key;
452 int left; 453 int left;
453 454
454 if (!add_front) { 455 if (cfq_class_idle(cfqq)) {
456 rb_key = CFQ_IDLE_DELAY;
457 parent = rb_last(&cfqd->service_tree.rb);
458 if (parent && parent != &cfqq->rb_node) {
459 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
460 rb_key += __cfqq->rb_key;
461 } else
462 rb_key += jiffies;
463 } else if (!add_front) {
455 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; 464 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
456 rb_key += cfqq->slice_resid; 465 rb_key += cfqq->slice_resid;
457 cfqq->slice_resid = 0; 466 cfqq->slice_resid = 0;
458 } else 467 } else
459 rb_key = 0; 468 rb_key = 0;
460 469
461 if (!RB_EMPTY_NODE(&cfqq->rb_node)) { 470 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
462 /* 471 /*
463 * same position, nothing more to do 472 * same position, nothing more to do
464 */ 473 */
465 if (rb_key == cfqq->rb_key) 474 if (rb_key == cfqq->rb_key)
466 return; 475 return;
467 476
468 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); 477 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
469 } 478 }
470 479
471 left = 1; 480 left = 1;
481 parent = NULL;
482 p = &cfqd->service_tree.rb.rb_node;
472 while (*p) { 483 while (*p) {
473 struct cfq_queue *__cfqq;
474 struct rb_node **n; 484 struct rb_node **n;
475 485
476 parent = *p; 486 parent = *p;
477 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 487 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
478 488
479 /* 489 /*
480 * sort RT queues first, we always want to give 490 * sort RT queues first, we always want to give
481 * preference to them. IDLE queues goes to the back. 491 * preference to them. IDLE queues goes to the back.
482 * after that, sort on the next service time. 492 * after that, sort on the next service time.
483 */ 493 */
484 if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq)) 494 if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq))
485 n = &(*p)->rb_left; 495 n = &(*p)->rb_left;
486 else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq)) 496 else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq))
487 n = &(*p)->rb_right; 497 n = &(*p)->rb_right;
488 else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq)) 498 else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq))
489 n = &(*p)->rb_left; 499 n = &(*p)->rb_left;
490 else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq)) 500 else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
491 n = &(*p)->rb_right; 501 n = &(*p)->rb_right;
492 else if (rb_key < __cfqq->rb_key) 502 else if (rb_key < __cfqq->rb_key)
493 n = &(*p)->rb_left; 503 n = &(*p)->rb_left;
494 else 504 else
495 n = &(*p)->rb_right; 505 n = &(*p)->rb_right;
496 506
497 if (n == &(*p)->rb_right) 507 if (n == &(*p)->rb_right)
498 left = 0; 508 left = 0;
499 509
500 p = n; 510 p = n;
501 } 511 }
502 512
503 if (left) 513 if (left)
504 cfqd->service_tree.left = &cfqq->rb_node; 514 cfqd->service_tree.left = &cfqq->rb_node;
505 515
506 cfqq->rb_key = rb_key; 516 cfqq->rb_key = rb_key;
507 rb_link_node(&cfqq->rb_node, parent, p); 517 rb_link_node(&cfqq->rb_node, parent, p);
508 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); 518 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
509 } 519 }
510 520
511 /* 521 /*
512 * Update cfqq's position in the service tree. 522 * Update cfqq's position in the service tree.
513 */ 523 */
514 static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) 524 static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
515 { 525 {
516 /* 526 /*
517 * Resorting requires the cfqq to be on the RR list already. 527 * Resorting requires the cfqq to be on the RR list already.
518 */ 528 */
519 if (cfq_cfqq_on_rr(cfqq)) 529 if (cfq_cfqq_on_rr(cfqq))
520 cfq_service_tree_add(cfqd, cfqq, 0); 530 cfq_service_tree_add(cfqd, cfqq, 0);
521 } 531 }
522 532
523 /* 533 /*
524 * add to busy list of queues for service, trying to be fair in ordering 534 * add to busy list of queues for service, trying to be fair in ordering
525 * the pending list according to last request service 535 * the pending list according to last request service
526 */ 536 */
527 static inline void 537 static inline void
528 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 538 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
529 { 539 {
530 BUG_ON(cfq_cfqq_on_rr(cfqq)); 540 BUG_ON(cfq_cfqq_on_rr(cfqq));
531 cfq_mark_cfqq_on_rr(cfqq); 541 cfq_mark_cfqq_on_rr(cfqq);
532 cfqd->busy_queues++; 542 cfqd->busy_queues++;
533 543
534 cfq_resort_rr_list(cfqd, cfqq); 544 cfq_resort_rr_list(cfqd, cfqq);
535 } 545 }
536 546
537 /* 547 /*
538 * Called when the cfqq no longer has requests pending, remove it from 548 * Called when the cfqq no longer has requests pending, remove it from
539 * the service tree. 549 * the service tree.
540 */ 550 */
541 static inline void 551 static inline void
542 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 552 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
543 { 553 {
544 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 554 BUG_ON(!cfq_cfqq_on_rr(cfqq));
545 cfq_clear_cfqq_on_rr(cfqq); 555 cfq_clear_cfqq_on_rr(cfqq);
546 556
547 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 557 if (!RB_EMPTY_NODE(&cfqq->rb_node))
548 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); 558 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
549 559
550 BUG_ON(!cfqd->busy_queues); 560 BUG_ON(!cfqd->busy_queues);
551 cfqd->busy_queues--; 561 cfqd->busy_queues--;
552 } 562 }
553 563
554 /* 564 /*
555 * rb tree support functions 565 * rb tree support functions
556 */ 566 */
557 static inline void cfq_del_rq_rb(struct request *rq) 567 static inline void cfq_del_rq_rb(struct request *rq)
558 { 568 {
559 struct cfq_queue *cfqq = RQ_CFQQ(rq); 569 struct cfq_queue *cfqq = RQ_CFQQ(rq);
560 struct cfq_data *cfqd = cfqq->cfqd; 570 struct cfq_data *cfqd = cfqq->cfqd;
561 const int sync = rq_is_sync(rq); 571 const int sync = rq_is_sync(rq);
562 572
563 BUG_ON(!cfqq->queued[sync]); 573 BUG_ON(!cfqq->queued[sync]);
564 cfqq->queued[sync]--; 574 cfqq->queued[sync]--;
565 575
566 elv_rb_del(&cfqq->sort_list, rq); 576 elv_rb_del(&cfqq->sort_list, rq);
567 577
568 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) 578 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
569 cfq_del_cfqq_rr(cfqd, cfqq); 579 cfq_del_cfqq_rr(cfqd, cfqq);
570 } 580 }
571 581
572 static void cfq_add_rq_rb(struct request *rq) 582 static void cfq_add_rq_rb(struct request *rq)
573 { 583 {
574 struct cfq_queue *cfqq = RQ_CFQQ(rq); 584 struct cfq_queue *cfqq = RQ_CFQQ(rq);
575 struct cfq_data *cfqd = cfqq->cfqd; 585 struct cfq_data *cfqd = cfqq->cfqd;
576 struct request *__alias; 586 struct request *__alias;
577 587
578 cfqq->queued[rq_is_sync(rq)]++; 588 cfqq->queued[rq_is_sync(rq)]++;
579 589
580 /* 590 /*
581 * looks a little odd, but the first insert might return an alias. 591 * looks a little odd, but the first insert might return an alias.
582 * if that happens, put the alias on the dispatch list 592 * if that happens, put the alias on the dispatch list
583 */ 593 */
584 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) 594 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
585 cfq_dispatch_insert(cfqd->queue, __alias); 595 cfq_dispatch_insert(cfqd->queue, __alias);
586 596
587 if (!cfq_cfqq_on_rr(cfqq)) 597 if (!cfq_cfqq_on_rr(cfqq))
588 cfq_add_cfqq_rr(cfqd, cfqq); 598 cfq_add_cfqq_rr(cfqd, cfqq);
589 599
590 /* 600 /*
591 * check if this request is a better next-serve candidate 601 * check if this request is a better next-serve candidate
592 */ 602 */
593 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); 603 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
594 BUG_ON(!cfqq->next_rq); 604 BUG_ON(!cfqq->next_rq);
595 } 605 }
596 606
597 static inline void 607 static inline void
598 cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) 608 cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
599 { 609 {
600 elv_rb_del(&cfqq->sort_list, rq); 610 elv_rb_del(&cfqq->sort_list, rq);
601 cfqq->queued[rq_is_sync(rq)]--; 611 cfqq->queued[rq_is_sync(rq)]--;
602 cfq_add_rq_rb(rq); 612 cfq_add_rq_rb(rq);
603 } 613 }
604 614
605 static struct request * 615 static struct request *
606 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) 616 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
607 { 617 {
608 struct task_struct *tsk = current; 618 struct task_struct *tsk = current;
609 struct cfq_io_context *cic; 619 struct cfq_io_context *cic;
610 struct cfq_queue *cfqq; 620 struct cfq_queue *cfqq;
611 621
612 cic = cfq_cic_lookup(cfqd, tsk->io_context); 622 cic = cfq_cic_lookup(cfqd, tsk->io_context);
613 if (!cic) 623 if (!cic)
614 return NULL; 624 return NULL;
615 625
616 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 626 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
617 if (cfqq) { 627 if (cfqq) {
618 sector_t sector = bio->bi_sector + bio_sectors(bio); 628 sector_t sector = bio->bi_sector + bio_sectors(bio);
619 629
620 return elv_rb_find(&cfqq->sort_list, sector); 630 return elv_rb_find(&cfqq->sort_list, sector);
621 } 631 }
622 632
623 return NULL; 633 return NULL;
624 } 634 }
625 635
626 static void cfq_activate_request(struct request_queue *q, struct request *rq) 636 static void cfq_activate_request(struct request_queue *q, struct request *rq)
627 { 637 {
628 struct cfq_data *cfqd = q->elevator->elevator_data; 638 struct cfq_data *cfqd = q->elevator->elevator_data;
629 639
630 cfqd->rq_in_driver++; 640 cfqd->rq_in_driver++;
631 641
632 /* 642 /*
633 * If the depth is larger 1, it really could be queueing. But lets 643 * If the depth is larger 1, it really could be queueing. But lets
634 * make the mark a little higher - idling could still be good for 644 * make the mark a little higher - idling could still be good for
635 * low queueing, and a low queueing number could also just indicate 645 * low queueing, and a low queueing number could also just indicate
636 * a SCSI mid layer like behaviour where limit+1 is often seen. 646 * a SCSI mid layer like behaviour where limit+1 is often seen.
637 */ 647 */
638 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) 648 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
639 cfqd->hw_tag = 1; 649 cfqd->hw_tag = 1;
640 650
641 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; 651 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
642 } 652 }
643 653
644 static void cfq_deactivate_request(struct request_queue *q, struct request *rq) 654 static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
645 { 655 {
646 struct cfq_data *cfqd = q->elevator->elevator_data; 656 struct cfq_data *cfqd = q->elevator->elevator_data;
647 657
648 WARN_ON(!cfqd->rq_in_driver); 658 WARN_ON(!cfqd->rq_in_driver);
649 cfqd->rq_in_driver--; 659 cfqd->rq_in_driver--;
650 } 660 }
651 661
652 static void cfq_remove_request(struct request *rq) 662 static void cfq_remove_request(struct request *rq)
653 { 663 {
654 struct cfq_queue *cfqq = RQ_CFQQ(rq); 664 struct cfq_queue *cfqq = RQ_CFQQ(rq);
655 665
656 if (cfqq->next_rq == rq) 666 if (cfqq->next_rq == rq)
657 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq); 667 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
658 668
659 list_del_init(&rq->queuelist); 669 list_del_init(&rq->queuelist);
660 cfq_del_rq_rb(rq); 670 cfq_del_rq_rb(rq);
661 671
662 if (rq_is_meta(rq)) { 672 if (rq_is_meta(rq)) {
663 WARN_ON(!cfqq->meta_pending); 673 WARN_ON(!cfqq->meta_pending);
664 cfqq->meta_pending--; 674 cfqq->meta_pending--;
665 } 675 }
666 } 676 }
667 677
668 static int cfq_merge(struct request_queue *q, struct request **req, 678 static int cfq_merge(struct request_queue *q, struct request **req,
669 struct bio *bio) 679 struct bio *bio)
670 { 680 {
671 struct cfq_data *cfqd = q->elevator->elevator_data; 681 struct cfq_data *cfqd = q->elevator->elevator_data;
672 struct request *__rq; 682 struct request *__rq;
673 683
674 __rq = cfq_find_rq_fmerge(cfqd, bio); 684 __rq = cfq_find_rq_fmerge(cfqd, bio);
675 if (__rq && elv_rq_merge_ok(__rq, bio)) { 685 if (__rq && elv_rq_merge_ok(__rq, bio)) {
676 *req = __rq; 686 *req = __rq;
677 return ELEVATOR_FRONT_MERGE; 687 return ELEVATOR_FRONT_MERGE;
678 } 688 }
679 689
680 return ELEVATOR_NO_MERGE; 690 return ELEVATOR_NO_MERGE;
681 } 691 }
682 692
683 static void cfq_merged_request(struct request_queue *q, struct request *req, 693 static void cfq_merged_request(struct request_queue *q, struct request *req,
684 int type) 694 int type)
685 { 695 {
686 if (type == ELEVATOR_FRONT_MERGE) { 696 if (type == ELEVATOR_FRONT_MERGE) {
687 struct cfq_queue *cfqq = RQ_CFQQ(req); 697 struct cfq_queue *cfqq = RQ_CFQQ(req);
688 698
689 cfq_reposition_rq_rb(cfqq, req); 699 cfq_reposition_rq_rb(cfqq, req);
690 } 700 }
691 } 701 }
692 702
693 static void 703 static void
694 cfq_merged_requests(struct request_queue *q, struct request *rq, 704 cfq_merged_requests(struct request_queue *q, struct request *rq,
695 struct request *next) 705 struct request *next)
696 { 706 {
697 /* 707 /*
698 * reposition in fifo if next is older than rq 708 * reposition in fifo if next is older than rq
699 */ 709 */
700 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 710 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
701 time_before(next->start_time, rq->start_time)) 711 time_before(next->start_time, rq->start_time))
702 list_move(&rq->queuelist, &next->queuelist); 712 list_move(&rq->queuelist, &next->queuelist);
703 713
704 cfq_remove_request(next); 714 cfq_remove_request(next);
705 } 715 }
706 716
707 static int cfq_allow_merge(struct request_queue *q, struct request *rq, 717 static int cfq_allow_merge(struct request_queue *q, struct request *rq,
708 struct bio *bio) 718 struct bio *bio)
709 { 719 {
710 struct cfq_data *cfqd = q->elevator->elevator_data; 720 struct cfq_data *cfqd = q->elevator->elevator_data;
711 struct cfq_io_context *cic; 721 struct cfq_io_context *cic;
712 struct cfq_queue *cfqq; 722 struct cfq_queue *cfqq;
713 723
714 /* 724 /*
715 * Disallow merge of a sync bio into an async request. 725 * Disallow merge of a sync bio into an async request.
716 */ 726 */
717 if (cfq_bio_sync(bio) && !rq_is_sync(rq)) 727 if (cfq_bio_sync(bio) && !rq_is_sync(rq))
718 return 0; 728 return 0;
719 729
720 /* 730 /*
721 * Lookup the cfqq that this bio will be queued with. Allow 731 * Lookup the cfqq that this bio will be queued with. Allow
722 * merge only if rq is queued there. 732 * merge only if rq is queued there.
723 */ 733 */
724 cic = cfq_cic_lookup(cfqd, current->io_context); 734 cic = cfq_cic_lookup(cfqd, current->io_context);
725 if (!cic) 735 if (!cic)
726 return 0; 736 return 0;
727 737
728 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 738 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
729 if (cfqq == RQ_CFQQ(rq)) 739 if (cfqq == RQ_CFQQ(rq))
730 return 1; 740 return 1;
731 741
732 return 0; 742 return 0;
733 } 743 }
734 744
735 static inline void 745 static inline void
736 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 746 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
737 { 747 {
738 if (cfqq) { 748 if (cfqq) {
739 /*
740 * stop potential idle class queues waiting service
741 */
742 del_timer(&cfqd->idle_class_timer);
743
744 cfqq->slice_end = 0; 749 cfqq->slice_end = 0;
745 cfq_clear_cfqq_must_alloc_slice(cfqq); 750 cfq_clear_cfqq_must_alloc_slice(cfqq);
746 cfq_clear_cfqq_fifo_expire(cfqq); 751 cfq_clear_cfqq_fifo_expire(cfqq);
747 cfq_mark_cfqq_slice_new(cfqq); 752 cfq_mark_cfqq_slice_new(cfqq);
748 cfq_clear_cfqq_queue_new(cfqq); 753 cfq_clear_cfqq_queue_new(cfqq);
749 } 754 }
750 755
751 cfqd->active_queue = cfqq; 756 cfqd->active_queue = cfqq;
752 } 757 }
753 758
754 /* 759 /*
755 * current cfqq expired its slice (or was too idle), select new one 760 * current cfqq expired its slice (or was too idle), select new one
756 */ 761 */
757 static void 762 static void
758 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 763 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
759 int timed_out) 764 int timed_out)
760 { 765 {
761 if (cfq_cfqq_wait_request(cfqq)) 766 if (cfq_cfqq_wait_request(cfqq))
762 del_timer(&cfqd->idle_slice_timer); 767 del_timer(&cfqd->idle_slice_timer);
763 768
764 cfq_clear_cfqq_must_dispatch(cfqq); 769 cfq_clear_cfqq_must_dispatch(cfqq);
765 cfq_clear_cfqq_wait_request(cfqq); 770 cfq_clear_cfqq_wait_request(cfqq);
766 771
767 /* 772 /*
768 * store what was left of this slice, if the queue idled/timed out 773 * store what was left of this slice, if the queue idled/timed out
769 */ 774 */
770 if (timed_out && !cfq_cfqq_slice_new(cfqq)) 775 if (timed_out && !cfq_cfqq_slice_new(cfqq))
771 cfqq->slice_resid = cfqq->slice_end - jiffies; 776 cfqq->slice_resid = cfqq->slice_end - jiffies;
772 777
773 cfq_resort_rr_list(cfqd, cfqq); 778 cfq_resort_rr_list(cfqd, cfqq);
774 779
775 if (cfqq == cfqd->active_queue) 780 if (cfqq == cfqd->active_queue)
776 cfqd->active_queue = NULL; 781 cfqd->active_queue = NULL;
777 782
778 if (cfqd->active_cic) { 783 if (cfqd->active_cic) {
779 put_io_context(cfqd->active_cic->ioc); 784 put_io_context(cfqd->active_cic->ioc);
780 cfqd->active_cic = NULL; 785 cfqd->active_cic = NULL;
781 } 786 }
782 } 787 }
783 788
784 static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out) 789 static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out)
785 { 790 {
786 struct cfq_queue *cfqq = cfqd->active_queue; 791 struct cfq_queue *cfqq = cfqd->active_queue;
787 792
788 if (cfqq) 793 if (cfqq)
789 __cfq_slice_expired(cfqd, cfqq, timed_out); 794 __cfq_slice_expired(cfqd, cfqq, timed_out);
790 } 795 }
791 796
792 static int start_idle_class_timer(struct cfq_data *cfqd)
793 {
794 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
795 unsigned long now = jiffies;
796
797 if (time_before(now, end) &&
798 time_after_eq(now, cfqd->last_end_request)) {
799 mod_timer(&cfqd->idle_class_timer, end);
800 return 1;
801 }
802
803 return 0;
804 }
805
806 /* 797 /*
807 * Get next queue for service. Unless we have a queue preemption, 798 * Get next queue for service. Unless we have a queue preemption,
808 * we'll simply select the first cfqq in the service tree. 799 * we'll simply select the first cfqq in the service tree.
809 */ 800 */
810 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) 801 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
811 { 802 {
812 struct cfq_queue *cfqq;
813 struct rb_node *n;
814
815 if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) 803 if (RB_EMPTY_ROOT(&cfqd->service_tree.rb))
816 return NULL; 804 return NULL;
817 805
818 n = cfq_rb_first(&cfqd->service_tree); 806 return cfq_rb_first(&cfqd->service_tree);
819 cfqq = rb_entry(n, struct cfq_queue, rb_node);
820
821 if (cfq_class_idle(cfqq)) {
822 /*
823 * if we have idle queues and no rt or be queues had
824 * pending requests, either allow immediate service if
825 * the grace period has passed or arm the idle grace
826 * timer
827 */
828 if (start_idle_class_timer(cfqd))
829 cfqq = NULL;
830 }
831
832 return cfqq;
833 } 807 }
834 808
835 /* 809 /*
836 * Get and set a new active queue for service. 810 * Get and set a new active queue for service.
837 */ 811 */
838 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 812 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
839 { 813 {
840 struct cfq_queue *cfqq; 814 struct cfq_queue *cfqq;
841 815
842 cfqq = cfq_get_next_queue(cfqd); 816 cfqq = cfq_get_next_queue(cfqd);
843 __cfq_set_active_queue(cfqd, cfqq); 817 __cfq_set_active_queue(cfqd, cfqq);
844 return cfqq; 818 return cfqq;
845 } 819 }
846 820
847 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, 821 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
848 struct request *rq) 822 struct request *rq)
849 { 823 {
850 if (rq->sector >= cfqd->last_position) 824 if (rq->sector >= cfqd->last_position)
851 return rq->sector - cfqd->last_position; 825 return rq->sector - cfqd->last_position;
852 else 826 else
853 return cfqd->last_position - rq->sector; 827 return cfqd->last_position - rq->sector;
854 } 828 }
855 829
856 static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) 830 static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
857 { 831 {
858 struct cfq_io_context *cic = cfqd->active_cic; 832 struct cfq_io_context *cic = cfqd->active_cic;
859 833
860 if (!sample_valid(cic->seek_samples)) 834 if (!sample_valid(cic->seek_samples))
861 return 0; 835 return 0;
862 836
863 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; 837 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
864 } 838 }
865 839
866 static int cfq_close_cooperator(struct cfq_data *cfq_data, 840 static int cfq_close_cooperator(struct cfq_data *cfq_data,
867 struct cfq_queue *cfqq) 841 struct cfq_queue *cfqq)
868 { 842 {
869 /* 843 /*
870 * We should notice if some of the queues are cooperating, eg 844 * We should notice if some of the queues are cooperating, eg
871 * working closely on the same area of the disk. In that case, 845 * working closely on the same area of the disk. In that case,
872 * we can group them together and don't waste time idling. 846 * we can group them together and don't waste time idling.
873 */ 847 */
874 return 0; 848 return 0;
875 } 849 }
876 850
877 #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) 851 #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
878 852
879 static void cfq_arm_slice_timer(struct cfq_data *cfqd) 853 static void cfq_arm_slice_timer(struct cfq_data *cfqd)
880 { 854 {
881 struct cfq_queue *cfqq = cfqd->active_queue; 855 struct cfq_queue *cfqq = cfqd->active_queue;
882 struct cfq_io_context *cic; 856 struct cfq_io_context *cic;
883 unsigned long sl; 857 unsigned long sl;
884 858
885 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); 859 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
886 WARN_ON(cfq_cfqq_slice_new(cfqq)); 860 WARN_ON(cfq_cfqq_slice_new(cfqq));
887 861
888 /* 862 /*
889 * idle is disabled, either manually or by past process history 863 * idle is disabled, either manually or by past process history
890 */ 864 */
891 if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq)) 865 if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq))
892 return; 866 return;
893 867
894 /* 868 /*
895 * task has exited, don't wait 869 * task has exited, don't wait
896 */ 870 */
897 cic = cfqd->active_cic; 871 cic = cfqd->active_cic;
898 if (!cic || !atomic_read(&cic->ioc->nr_tasks)) 872 if (!cic || !atomic_read(&cic->ioc->nr_tasks))
899 return; 873 return;
900 874
901 /* 875 /*
902 * See if this prio level has a good candidate 876 * See if this prio level has a good candidate
903 */ 877 */
904 if (cfq_close_cooperator(cfqd, cfqq) && 878 if (cfq_close_cooperator(cfqd, cfqq) &&
905 (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2)) 879 (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
906 return; 880 return;
907 881
908 cfq_mark_cfqq_must_dispatch(cfqq); 882 cfq_mark_cfqq_must_dispatch(cfqq);
909 cfq_mark_cfqq_wait_request(cfqq); 883 cfq_mark_cfqq_wait_request(cfqq);
910 884
911 /* 885 /*
912 * we don't want to idle for seeks, but we do want to allow 886 * we don't want to idle for seeks, but we do want to allow
913 * fair distribution of slice time for a process doing back-to-back 887 * fair distribution of slice time for a process doing back-to-back
914 * seeks. so allow a little bit of time for him to submit a new rq 888 * seeks. so allow a little bit of time for him to submit a new rq
915 */ 889 */
916 sl = cfqd->cfq_slice_idle; 890 sl = cfqd->cfq_slice_idle;
917 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) 891 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
918 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); 892 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
919 893
920 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 894 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
921 } 895 }
922 896
923 /* 897 /*
924 * Move request from internal lists to the request queue dispatch list. 898 * Move request from internal lists to the request queue dispatch list.
925 */ 899 */
926 static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) 900 static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
927 { 901 {
928 struct cfq_data *cfqd = q->elevator->elevator_data; 902 struct cfq_data *cfqd = q->elevator->elevator_data;
929 struct cfq_queue *cfqq = RQ_CFQQ(rq); 903 struct cfq_queue *cfqq = RQ_CFQQ(rq);
930 904
931 cfq_remove_request(rq); 905 cfq_remove_request(rq);
932 cfqq->dispatched++; 906 cfqq->dispatched++;
933 elv_dispatch_sort(q, rq); 907 elv_dispatch_sort(q, rq);
934 908
935 if (cfq_cfqq_sync(cfqq)) 909 if (cfq_cfqq_sync(cfqq))
936 cfqd->sync_flight++; 910 cfqd->sync_flight++;
937 } 911 }
938 912
939 /* 913 /*
940 * return expired entry, or NULL to just start from scratch in rbtree 914 * return expired entry, or NULL to just start from scratch in rbtree
941 */ 915 */
942 static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq) 916 static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
943 { 917 {
944 struct cfq_data *cfqd = cfqq->cfqd; 918 struct cfq_data *cfqd = cfqq->cfqd;
945 struct request *rq; 919 struct request *rq;
946 int fifo; 920 int fifo;
947 921
948 if (cfq_cfqq_fifo_expire(cfqq)) 922 if (cfq_cfqq_fifo_expire(cfqq))
949 return NULL; 923 return NULL;
950 924
951 cfq_mark_cfqq_fifo_expire(cfqq); 925 cfq_mark_cfqq_fifo_expire(cfqq);
952 926
953 if (list_empty(&cfqq->fifo)) 927 if (list_empty(&cfqq->fifo))
954 return NULL; 928 return NULL;
955 929
956 fifo = cfq_cfqq_sync(cfqq); 930 fifo = cfq_cfqq_sync(cfqq);
957 rq = rq_entry_fifo(cfqq->fifo.next); 931 rq = rq_entry_fifo(cfqq->fifo.next);
958 932
959 if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) 933 if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
960 return NULL; 934 return NULL;
961 935
962 return rq; 936 return rq;
963 } 937 }
964 938
965 static inline int 939 static inline int
966 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 940 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
967 { 941 {
968 const int base_rq = cfqd->cfq_slice_async_rq; 942 const int base_rq = cfqd->cfq_slice_async_rq;
969 943
970 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 944 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
971 945
972 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 946 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
973 } 947 }
974 948
975 /* 949 /*
976 * Select a queue for service. If we have a current active queue, 950 * Select a queue for service. If we have a current active queue,
977 * check whether to continue servicing it, or retrieve and set a new one. 951 * check whether to continue servicing it, or retrieve and set a new one.
978 */ 952 */
979 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 953 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
980 { 954 {
981 struct cfq_queue *cfqq; 955 struct cfq_queue *cfqq;
982 956
983 cfqq = cfqd->active_queue; 957 cfqq = cfqd->active_queue;
984 if (!cfqq) 958 if (!cfqq)
985 goto new_queue; 959 goto new_queue;
986 960
987 /* 961 /*
988 * The active queue has run out of time, expire it and select new. 962 * The active queue has run out of time, expire it and select new.
989 */ 963 */
990 if (cfq_slice_used(cfqq)) 964 if (cfq_slice_used(cfqq))
991 goto expire; 965 goto expire;
992 966
993 /* 967 /*
994 * The active queue has requests and isn't expired, allow it to 968 * The active queue has requests and isn't expired, allow it to
995 * dispatch. 969 * dispatch.
996 */ 970 */
997 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 971 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
998 goto keep_queue; 972 goto keep_queue;
999 973
1000 /* 974 /*
1001 * No requests pending. If the active queue still has requests in 975 * No requests pending. If the active queue still has requests in
1002 * flight or is idling for a new request, allow either of these 976 * flight or is idling for a new request, allow either of these
1003 * conditions to happen (or time out) before selecting a new queue. 977 * conditions to happen (or time out) before selecting a new queue.
1004 */ 978 */
1005 if (timer_pending(&cfqd->idle_slice_timer) || 979 if (timer_pending(&cfqd->idle_slice_timer) ||
1006 (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) { 980 (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) {
1007 cfqq = NULL; 981 cfqq = NULL;
1008 goto keep_queue; 982 goto keep_queue;
1009 } 983 }
1010 984
1011 expire: 985 expire:
1012 cfq_slice_expired(cfqd, 0); 986 cfq_slice_expired(cfqd, 0);
1013 new_queue: 987 new_queue:
1014 cfqq = cfq_set_active_queue(cfqd); 988 cfqq = cfq_set_active_queue(cfqd);
1015 keep_queue: 989 keep_queue:
1016 return cfqq; 990 return cfqq;
1017 } 991 }
1018 992
1019 /* 993 /*
1020 * Dispatch some requests from cfqq, moving them to the request queue 994 * Dispatch some requests from cfqq, moving them to the request queue
1021 * dispatch list. 995 * dispatch list.
1022 */ 996 */
1023 static int 997 static int
1024 __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, 998 __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1025 int max_dispatch) 999 int max_dispatch)
1026 { 1000 {
1027 int dispatched = 0; 1001 int dispatched = 0;
1028 1002
1029 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); 1003 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
1030 1004
1031 do { 1005 do {
1032 struct request *rq; 1006 struct request *rq;
1033 1007
1034 /* 1008 /*
1035 * follow expired path, else get first next available 1009 * follow expired path, else get first next available
1036 */ 1010 */
1037 if ((rq = cfq_check_fifo(cfqq)) == NULL) 1011 if ((rq = cfq_check_fifo(cfqq)) == NULL)
1038 rq = cfqq->next_rq; 1012 rq = cfqq->next_rq;
1039 1013
1040 /* 1014 /*
1041 * finally, insert request into driver dispatch list 1015 * finally, insert request into driver dispatch list
1042 */ 1016 */
1043 cfq_dispatch_insert(cfqd->queue, rq); 1017 cfq_dispatch_insert(cfqd->queue, rq);
1044 1018
1045 dispatched++; 1019 dispatched++;
1046 1020
1047 if (!cfqd->active_cic) { 1021 if (!cfqd->active_cic) {
1048 atomic_inc(&RQ_CIC(rq)->ioc->refcount); 1022 atomic_inc(&RQ_CIC(rq)->ioc->refcount);
1049 cfqd->active_cic = RQ_CIC(rq); 1023 cfqd->active_cic = RQ_CIC(rq);
1050 } 1024 }
1051 1025
1052 if (RB_EMPTY_ROOT(&cfqq->sort_list)) 1026 if (RB_EMPTY_ROOT(&cfqq->sort_list))
1053 break; 1027 break;
1054 1028
1055 } while (dispatched < max_dispatch); 1029 } while (dispatched < max_dispatch);
1056 1030
1057 /* 1031 /*
1058 * expire an async queue immediately if it has used up its slice. idle 1032 * expire an async queue immediately if it has used up its slice. idle
1059 * queue always expire after 1 dispatch round. 1033 * queue always expire after 1 dispatch round.
1060 */ 1034 */
1061 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && 1035 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
1062 dispatched >= cfq_prio_to_maxrq(cfqd, cfqq)) || 1036 dispatched >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
1063 cfq_class_idle(cfqq))) { 1037 cfq_class_idle(cfqq))) {
1064 cfqq->slice_end = jiffies + 1; 1038 cfqq->slice_end = jiffies + 1;
1065 cfq_slice_expired(cfqd, 0); 1039 cfq_slice_expired(cfqd, 0);
1066 } 1040 }
1067 1041
1068 return dispatched; 1042 return dispatched;
1069 } 1043 }
1070 1044
1071 static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) 1045 static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
1072 { 1046 {
1073 int dispatched = 0; 1047 int dispatched = 0;
1074 1048
1075 while (cfqq->next_rq) { 1049 while (cfqq->next_rq) {
1076 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); 1050 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
1077 dispatched++; 1051 dispatched++;
1078 } 1052 }
1079 1053
1080 BUG_ON(!list_empty(&cfqq->fifo)); 1054 BUG_ON(!list_empty(&cfqq->fifo));
1081 return dispatched; 1055 return dispatched;
1082 } 1056 }
1083 1057
1084 /* 1058 /*
1085 * Drain our current requests. Used for barriers and when switching 1059 * Drain our current requests. Used for barriers and when switching
1086 * io schedulers on-the-fly. 1060 * io schedulers on-the-fly.
1087 */ 1061 */
1088 static int cfq_forced_dispatch(struct cfq_data *cfqd) 1062 static int cfq_forced_dispatch(struct cfq_data *cfqd)
1089 { 1063 {
1064 struct cfq_queue *cfqq;
1090 int dispatched = 0; 1065 int dispatched = 0;
1091 struct rb_node *n;
1092 1066
1093 while ((n = cfq_rb_first(&cfqd->service_tree)) != NULL) { 1067 while ((cfqq = cfq_rb_first(&cfqd->service_tree)) != NULL)
1094 struct cfq_queue *cfqq = rb_entry(n, struct cfq_queue, rb_node);
1095
1096 dispatched += __cfq_forced_dispatch_cfqq(cfqq); 1068 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
1097 }
1098 1069
1099 cfq_slice_expired(cfqd, 0); 1070 cfq_slice_expired(cfqd, 0);
1100 1071
1101 BUG_ON(cfqd->busy_queues); 1072 BUG_ON(cfqd->busy_queues);
1102 1073
1103 return dispatched; 1074 return dispatched;
1104 } 1075 }
1105 1076
1106 static int cfq_dispatch_requests(struct request_queue *q, int force) 1077 static int cfq_dispatch_requests(struct request_queue *q, int force)
1107 { 1078 {
1108 struct cfq_data *cfqd = q->elevator->elevator_data; 1079 struct cfq_data *cfqd = q->elevator->elevator_data;
1109 struct cfq_queue *cfqq; 1080 struct cfq_queue *cfqq;
1110 int dispatched; 1081 int dispatched;
1111 1082
1112 if (!cfqd->busy_queues) 1083 if (!cfqd->busy_queues)
1113 return 0; 1084 return 0;
1114 1085
1115 if (unlikely(force)) 1086 if (unlikely(force))
1116 return cfq_forced_dispatch(cfqd); 1087 return cfq_forced_dispatch(cfqd);
1117 1088
1118 dispatched = 0; 1089 dispatched = 0;
1119 while ((cfqq = cfq_select_queue(cfqd)) != NULL) { 1090 while ((cfqq = cfq_select_queue(cfqd)) != NULL) {
1120 int max_dispatch; 1091 int max_dispatch;
1121 1092
1122 max_dispatch = cfqd->cfq_quantum; 1093 max_dispatch = cfqd->cfq_quantum;
1123 if (cfq_class_idle(cfqq)) 1094 if (cfq_class_idle(cfqq))
1124 max_dispatch = 1; 1095 max_dispatch = 1;
1125 1096
1126 if (cfqq->dispatched >= max_dispatch) { 1097 if (cfqq->dispatched >= max_dispatch) {
1127 if (cfqd->busy_queues > 1) 1098 if (cfqd->busy_queues > 1)
1128 break; 1099 break;
1129 if (cfqq->dispatched >= 4 * max_dispatch) 1100 if (cfqq->dispatched >= 4 * max_dispatch)
1130 break; 1101 break;
1131 } 1102 }
1132 1103
1133 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) 1104 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
1134 break; 1105 break;
1135 1106
1136 cfq_clear_cfqq_must_dispatch(cfqq); 1107 cfq_clear_cfqq_must_dispatch(cfqq);
1137 cfq_clear_cfqq_wait_request(cfqq); 1108 cfq_clear_cfqq_wait_request(cfqq);
1138 del_timer(&cfqd->idle_slice_timer); 1109 del_timer(&cfqd->idle_slice_timer);
1139 1110
1140 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1111 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1141 } 1112 }
1142 1113
1143 return dispatched; 1114 return dispatched;
1144 } 1115 }
1145 1116
1146 /* 1117 /*
1147 * task holds one reference to the queue, dropped when task exits. each rq 1118 * task holds one reference to the queue, dropped when task exits. each rq
1148 * in-flight on this queue also holds a reference, dropped when rq is freed. 1119 * in-flight on this queue also holds a reference, dropped when rq is freed.
1149 * 1120 *
1150 * queue lock must be held here. 1121 * queue lock must be held here.
1151 */ 1122 */
1152 static void cfq_put_queue(struct cfq_queue *cfqq) 1123 static void cfq_put_queue(struct cfq_queue *cfqq)
1153 { 1124 {
1154 struct cfq_data *cfqd = cfqq->cfqd; 1125 struct cfq_data *cfqd = cfqq->cfqd;
1155 1126
1156 BUG_ON(atomic_read(&cfqq->ref) <= 0); 1127 BUG_ON(atomic_read(&cfqq->ref) <= 0);
1157 1128
1158 if (!atomic_dec_and_test(&cfqq->ref)) 1129 if (!atomic_dec_and_test(&cfqq->ref))
1159 return; 1130 return;
1160 1131
1161 BUG_ON(rb_first(&cfqq->sort_list)); 1132 BUG_ON(rb_first(&cfqq->sort_list));
1162 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 1133 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
1163 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1134 BUG_ON(cfq_cfqq_on_rr(cfqq));
1164 1135
1165 if (unlikely(cfqd->active_queue == cfqq)) { 1136 if (unlikely(cfqd->active_queue == cfqq)) {
1166 __cfq_slice_expired(cfqd, cfqq, 0); 1137 __cfq_slice_expired(cfqd, cfqq, 0);
1167 cfq_schedule_dispatch(cfqd); 1138 cfq_schedule_dispatch(cfqd);
1168 } 1139 }
1169 1140
1170 kmem_cache_free(cfq_pool, cfqq); 1141 kmem_cache_free(cfq_pool, cfqq);
1171 } 1142 }
1172 1143
1173 /* 1144 /*
1174 * Call func for each cic attached to this ioc. Returns number of cic's seen. 1145 * Call func for each cic attached to this ioc. Returns number of cic's seen.
1175 */ 1146 */
1176 #define CIC_GANG_NR 16 1147 #define CIC_GANG_NR 16
1177 static unsigned int 1148 static unsigned int
1178 call_for_each_cic(struct io_context *ioc, 1149 call_for_each_cic(struct io_context *ioc,
1179 void (*func)(struct io_context *, struct cfq_io_context *)) 1150 void (*func)(struct io_context *, struct cfq_io_context *))
1180 { 1151 {
1181 struct cfq_io_context *cics[CIC_GANG_NR]; 1152 struct cfq_io_context *cics[CIC_GANG_NR];
1182 unsigned long index = 0; 1153 unsigned long index = 0;
1183 unsigned int called = 0; 1154 unsigned int called = 0;
1184 int nr; 1155 int nr;
1185 1156
1186 rcu_read_lock(); 1157 rcu_read_lock();
1187 1158
1188 do { 1159 do {
1189 int i; 1160 int i;
1190 1161
1191 /* 1162 /*
1192 * Perhaps there's a better way - this just gang lookups from 1163 * Perhaps there's a better way - this just gang lookups from
1193 * 0 to the end, restarting after each CIC_GANG_NR from the 1164 * 0 to the end, restarting after each CIC_GANG_NR from the
1194 * last key + 1. 1165 * last key + 1.
1195 */ 1166 */
1196 nr = radix_tree_gang_lookup(&ioc->radix_root, (void **) cics, 1167 nr = radix_tree_gang_lookup(&ioc->radix_root, (void **) cics,
1197 index, CIC_GANG_NR); 1168 index, CIC_GANG_NR);
1198 if (!nr) 1169 if (!nr)
1199 break; 1170 break;
1200 1171
1201 called += nr; 1172 called += nr;
1202 index = 1 + (unsigned long) cics[nr - 1]->key; 1173 index = 1 + (unsigned long) cics[nr - 1]->key;
1203 1174
1204 for (i = 0; i < nr; i++) 1175 for (i = 0; i < nr; i++)
1205 func(ioc, cics[i]); 1176 func(ioc, cics[i]);
1206 } while (nr == CIC_GANG_NR); 1177 } while (nr == CIC_GANG_NR);
1207 1178
1208 rcu_read_unlock(); 1179 rcu_read_unlock();
1209 1180
1210 return called; 1181 return called;
1211 } 1182 }
1212 1183
1213 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) 1184 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
1214 { 1185 {
1215 unsigned long flags; 1186 unsigned long flags;
1216 1187
1217 BUG_ON(!cic->dead_key); 1188 BUG_ON(!cic->dead_key);
1218 1189
1219 spin_lock_irqsave(&ioc->lock, flags); 1190 spin_lock_irqsave(&ioc->lock, flags);
1220 radix_tree_delete(&ioc->radix_root, cic->dead_key); 1191 radix_tree_delete(&ioc->radix_root, cic->dead_key);
1221 spin_unlock_irqrestore(&ioc->lock, flags); 1192 spin_unlock_irqrestore(&ioc->lock, flags);
1222 1193
1223 kmem_cache_free(cfq_ioc_pool, cic); 1194 kmem_cache_free(cfq_ioc_pool, cic);
1224 } 1195 }
1225 1196
1226 static void cfq_free_io_context(struct io_context *ioc) 1197 static void cfq_free_io_context(struct io_context *ioc)
1227 { 1198 {
1228 int freed; 1199 int freed;
1229 1200
1230 /* 1201 /*
1231 * ioc->refcount is zero here, so no more cic's are allowed to be 1202 * ioc->refcount is zero here, so no more cic's are allowed to be
1232 * linked into this ioc. So it should be ok to iterate over the known 1203 * linked into this ioc. So it should be ok to iterate over the known
1233 * list, we will see all cic's since no new ones are added. 1204 * list, we will see all cic's since no new ones are added.
1234 */ 1205 */
1235 freed = call_for_each_cic(ioc, cic_free_func); 1206 freed = call_for_each_cic(ioc, cic_free_func);
1236 1207
1237 elv_ioc_count_mod(ioc_count, -freed); 1208 elv_ioc_count_mod(ioc_count, -freed);
1238 1209
1239 if (ioc_gone && !elv_ioc_count_read(ioc_count)) 1210 if (ioc_gone && !elv_ioc_count_read(ioc_count))
1240 complete(ioc_gone); 1211 complete(ioc_gone);
1241 } 1212 }
1242 1213
1243 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1214 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1244 { 1215 {
1245 if (unlikely(cfqq == cfqd->active_queue)) { 1216 if (unlikely(cfqq == cfqd->active_queue)) {
1246 __cfq_slice_expired(cfqd, cfqq, 0); 1217 __cfq_slice_expired(cfqd, cfqq, 0);
1247 cfq_schedule_dispatch(cfqd); 1218 cfq_schedule_dispatch(cfqd);
1248 } 1219 }
1249 1220
1250 cfq_put_queue(cfqq); 1221 cfq_put_queue(cfqq);
1251 } 1222 }
1252 1223
1253 static void __cfq_exit_single_io_context(struct cfq_data *cfqd, 1224 static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
1254 struct cfq_io_context *cic) 1225 struct cfq_io_context *cic)
1255 { 1226 {
1256 list_del_init(&cic->queue_list); 1227 list_del_init(&cic->queue_list);
1257 1228
1258 /* 1229 /*
1259 * Make sure key == NULL is seen for dead queues 1230 * Make sure key == NULL is seen for dead queues
1260 */ 1231 */
1261 smp_wmb(); 1232 smp_wmb();
1262 cic->dead_key = (unsigned long) cic->key; 1233 cic->dead_key = (unsigned long) cic->key;
1263 cic->key = NULL; 1234 cic->key = NULL;
1264 1235
1265 if (cic->cfqq[ASYNC]) { 1236 if (cic->cfqq[ASYNC]) {
1266 cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); 1237 cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
1267 cic->cfqq[ASYNC] = NULL; 1238 cic->cfqq[ASYNC] = NULL;
1268 } 1239 }
1269 1240
1270 if (cic->cfqq[SYNC]) { 1241 if (cic->cfqq[SYNC]) {
1271 cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]); 1242 cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
1272 cic->cfqq[SYNC] = NULL; 1243 cic->cfqq[SYNC] = NULL;
1273 } 1244 }
1274 } 1245 }
1275 1246
1276 static void cfq_exit_single_io_context(struct io_context *ioc, 1247 static void cfq_exit_single_io_context(struct io_context *ioc,
1277 struct cfq_io_context *cic) 1248 struct cfq_io_context *cic)
1278 { 1249 {
1279 struct cfq_data *cfqd = cic->key; 1250 struct cfq_data *cfqd = cic->key;
1280 1251
1281 if (cfqd) { 1252 if (cfqd) {
1282 struct request_queue *q = cfqd->queue; 1253 struct request_queue *q = cfqd->queue;
1283 unsigned long flags; 1254 unsigned long flags;
1284 1255
1285 spin_lock_irqsave(q->queue_lock, flags); 1256 spin_lock_irqsave(q->queue_lock, flags);
1286 __cfq_exit_single_io_context(cfqd, cic); 1257 __cfq_exit_single_io_context(cfqd, cic);
1287 spin_unlock_irqrestore(q->queue_lock, flags); 1258 spin_unlock_irqrestore(q->queue_lock, flags);
1288 } 1259 }
1289 } 1260 }
1290 1261
1291 /* 1262 /*
1292 * The process that ioc belongs to has exited, we need to clean up 1263 * The process that ioc belongs to has exited, we need to clean up
1293 * and put the internal structures we have that belongs to that process. 1264 * and put the internal structures we have that belongs to that process.
1294 */ 1265 */
1295 static void cfq_exit_io_context(struct io_context *ioc) 1266 static void cfq_exit_io_context(struct io_context *ioc)
1296 { 1267 {
1297 rcu_assign_pointer(ioc->ioc_data, NULL); 1268 rcu_assign_pointer(ioc->ioc_data, NULL);
1298 call_for_each_cic(ioc, cfq_exit_single_io_context); 1269 call_for_each_cic(ioc, cfq_exit_single_io_context);
1299 } 1270 }
1300 1271
1301 static struct cfq_io_context * 1272 static struct cfq_io_context *
1302 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1273 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1303 { 1274 {
1304 struct cfq_io_context *cic; 1275 struct cfq_io_context *cic;
1305 1276
1306 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO, 1277 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
1307 cfqd->queue->node); 1278 cfqd->queue->node);
1308 if (cic) { 1279 if (cic) {
1309 cic->last_end_request = jiffies; 1280 cic->last_end_request = jiffies;
1310 INIT_LIST_HEAD(&cic->queue_list); 1281 INIT_LIST_HEAD(&cic->queue_list);
1311 cic->dtor = cfq_free_io_context; 1282 cic->dtor = cfq_free_io_context;
1312 cic->exit = cfq_exit_io_context; 1283 cic->exit = cfq_exit_io_context;
1313 elv_ioc_count_inc(ioc_count); 1284 elv_ioc_count_inc(ioc_count);
1314 } 1285 }
1315 1286
1316 return cic; 1287 return cic;
1317 } 1288 }
1318 1289
1319 static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) 1290 static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
1320 { 1291 {
1321 struct task_struct *tsk = current; 1292 struct task_struct *tsk = current;
1322 int ioprio_class; 1293 int ioprio_class;
1323 1294
1324 if (!cfq_cfqq_prio_changed(cfqq)) 1295 if (!cfq_cfqq_prio_changed(cfqq))
1325 return; 1296 return;
1326 1297
1327 ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); 1298 ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
1328 switch (ioprio_class) { 1299 switch (ioprio_class) {
1329 default: 1300 default:
1330 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); 1301 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
1331 case IOPRIO_CLASS_NONE: 1302 case IOPRIO_CLASS_NONE:
1332 /* 1303 /*
1333 * no prio set, place us in the middle of the BE classes 1304 * no prio set, place us in the middle of the BE classes
1334 */ 1305 */
1335 cfqq->ioprio = task_nice_ioprio(tsk); 1306 cfqq->ioprio = task_nice_ioprio(tsk);
1336 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1307 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1337 break; 1308 break;
1338 case IOPRIO_CLASS_RT: 1309 case IOPRIO_CLASS_RT:
1339 cfqq->ioprio = task_ioprio(ioc); 1310 cfqq->ioprio = task_ioprio(ioc);
1340 cfqq->ioprio_class = IOPRIO_CLASS_RT; 1311 cfqq->ioprio_class = IOPRIO_CLASS_RT;
1341 break; 1312 break;
1342 case IOPRIO_CLASS_BE: 1313 case IOPRIO_CLASS_BE:
1343 cfqq->ioprio = task_ioprio(ioc); 1314 cfqq->ioprio = task_ioprio(ioc);
1344 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1315 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1345 break; 1316 break;
1346 case IOPRIO_CLASS_IDLE: 1317 case IOPRIO_CLASS_IDLE:
1347 cfqq->ioprio_class = IOPRIO_CLASS_IDLE; 1318 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
1348 cfqq->ioprio = 7; 1319 cfqq->ioprio = 7;
1349 cfq_clear_cfqq_idle_window(cfqq); 1320 cfq_clear_cfqq_idle_window(cfqq);
1350 break; 1321 break;
1351 } 1322 }
1352 1323
1353 /* 1324 /*
1354 * keep track of original prio settings in case we have to temporarily 1325 * keep track of original prio settings in case we have to temporarily
1355 * elevate the priority of this queue 1326 * elevate the priority of this queue
1356 */ 1327 */
1357 cfqq->org_ioprio = cfqq->ioprio; 1328 cfqq->org_ioprio = cfqq->ioprio;
1358 cfqq->org_ioprio_class = cfqq->ioprio_class; 1329 cfqq->org_ioprio_class = cfqq->ioprio_class;
1359 cfq_clear_cfqq_prio_changed(cfqq); 1330 cfq_clear_cfqq_prio_changed(cfqq);
1360 } 1331 }
1361 1332
1362 static inline void changed_ioprio(struct io_context *ioc, 1333 static inline void changed_ioprio(struct io_context *ioc,
1363 struct cfq_io_context *cic) 1334 struct cfq_io_context *cic)
1364 { 1335 {
1365 struct cfq_data *cfqd = cic->key; 1336 struct cfq_data *cfqd = cic->key;
1366 struct cfq_queue *cfqq; 1337 struct cfq_queue *cfqq;
1367 unsigned long flags; 1338 unsigned long flags;
1368 1339
1369 if (unlikely(!cfqd)) 1340 if (unlikely(!cfqd))
1370 return; 1341 return;
1371 1342
1372 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1343 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1373 1344
1374 cfqq = cic->cfqq[ASYNC]; 1345 cfqq = cic->cfqq[ASYNC];
1375 if (cfqq) { 1346 if (cfqq) {
1376 struct cfq_queue *new_cfqq; 1347 struct cfq_queue *new_cfqq;
1377 new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC); 1348 new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC);
1378 if (new_cfqq) { 1349 if (new_cfqq) {
1379 cic->cfqq[ASYNC] = new_cfqq; 1350 cic->cfqq[ASYNC] = new_cfqq;
1380 cfq_put_queue(cfqq); 1351 cfq_put_queue(cfqq);
1381 } 1352 }
1382 } 1353 }
1383 1354
1384 cfqq = cic->cfqq[SYNC]; 1355 cfqq = cic->cfqq[SYNC];
1385 if (cfqq) 1356 if (cfqq)
1386 cfq_mark_cfqq_prio_changed(cfqq); 1357 cfq_mark_cfqq_prio_changed(cfqq);
1387 1358
1388 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 1359 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1389 } 1360 }
1390 1361
1391 static void cfq_ioc_set_ioprio(struct io_context *ioc) 1362 static void cfq_ioc_set_ioprio(struct io_context *ioc)
1392 { 1363 {
1393 call_for_each_cic(ioc, changed_ioprio); 1364 call_for_each_cic(ioc, changed_ioprio);
1394 ioc->ioprio_changed = 0; 1365 ioc->ioprio_changed = 0;
1395 } 1366 }
1396 1367
1397 static struct cfq_queue * 1368 static struct cfq_queue *
1398 cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, 1369 cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
1399 struct io_context *ioc, gfp_t gfp_mask) 1370 struct io_context *ioc, gfp_t gfp_mask)
1400 { 1371 {
1401 struct cfq_queue *cfqq, *new_cfqq = NULL; 1372 struct cfq_queue *cfqq, *new_cfqq = NULL;
1402 struct cfq_io_context *cic; 1373 struct cfq_io_context *cic;
1403 1374
1404 retry: 1375 retry:
1405 cic = cfq_cic_lookup(cfqd, ioc); 1376 cic = cfq_cic_lookup(cfqd, ioc);
1406 /* cic always exists here */ 1377 /* cic always exists here */
1407 cfqq = cic_to_cfqq(cic, is_sync); 1378 cfqq = cic_to_cfqq(cic, is_sync);
1408 1379
1409 if (!cfqq) { 1380 if (!cfqq) {
1410 if (new_cfqq) { 1381 if (new_cfqq) {
1411 cfqq = new_cfqq; 1382 cfqq = new_cfqq;
1412 new_cfqq = NULL; 1383 new_cfqq = NULL;
1413 } else if (gfp_mask & __GFP_WAIT) { 1384 } else if (gfp_mask & __GFP_WAIT) {
1414 /* 1385 /*
1415 * Inform the allocator of the fact that we will 1386 * Inform the allocator of the fact that we will
1416 * just repeat this allocation if it fails, to allow 1387 * just repeat this allocation if it fails, to allow
1417 * the allocator to do whatever it needs to attempt to 1388 * the allocator to do whatever it needs to attempt to
1418 * free memory. 1389 * free memory.
1419 */ 1390 */
1420 spin_unlock_irq(cfqd->queue->queue_lock); 1391 spin_unlock_irq(cfqd->queue->queue_lock);
1421 new_cfqq = kmem_cache_alloc_node(cfq_pool, 1392 new_cfqq = kmem_cache_alloc_node(cfq_pool,
1422 gfp_mask | __GFP_NOFAIL | __GFP_ZERO, 1393 gfp_mask | __GFP_NOFAIL | __GFP_ZERO,
1423 cfqd->queue->node); 1394 cfqd->queue->node);
1424 spin_lock_irq(cfqd->queue->queue_lock); 1395 spin_lock_irq(cfqd->queue->queue_lock);
1425 goto retry; 1396 goto retry;
1426 } else { 1397 } else {
1427 cfqq = kmem_cache_alloc_node(cfq_pool, 1398 cfqq = kmem_cache_alloc_node(cfq_pool,
1428 gfp_mask | __GFP_ZERO, 1399 gfp_mask | __GFP_ZERO,
1429 cfqd->queue->node); 1400 cfqd->queue->node);
1430 if (!cfqq) 1401 if (!cfqq)
1431 goto out; 1402 goto out;
1432 } 1403 }
1433 1404
1434 RB_CLEAR_NODE(&cfqq->rb_node); 1405 RB_CLEAR_NODE(&cfqq->rb_node);
1435 INIT_LIST_HEAD(&cfqq->fifo); 1406 INIT_LIST_HEAD(&cfqq->fifo);
1436 1407
1437 atomic_set(&cfqq->ref, 0); 1408 atomic_set(&cfqq->ref, 0);
1438 cfqq->cfqd = cfqd; 1409 cfqq->cfqd = cfqd;
1439 1410
1440 if (is_sync) {
1441 cfq_mark_cfqq_idle_window(cfqq);
1442 cfq_mark_cfqq_sync(cfqq);
1443 }
1444
1445 cfq_mark_cfqq_prio_changed(cfqq); 1411 cfq_mark_cfqq_prio_changed(cfqq);
1446 cfq_mark_cfqq_queue_new(cfqq); 1412 cfq_mark_cfqq_queue_new(cfqq);
1447 1413
1448 cfq_init_prio_data(cfqq, ioc); 1414 cfq_init_prio_data(cfqq, ioc);
1415
1416 if (is_sync) {
1417 if (!cfq_class_idle(cfqq))
1418 cfq_mark_cfqq_idle_window(cfqq);
1419 cfq_mark_cfqq_sync(cfqq);
1420 }
1449 } 1421 }
1450 1422
1451 if (new_cfqq) 1423 if (new_cfqq)
1452 kmem_cache_free(cfq_pool, new_cfqq); 1424 kmem_cache_free(cfq_pool, new_cfqq);
1453 1425
1454 out: 1426 out:
1455 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); 1427 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
1456 return cfqq; 1428 return cfqq;
1457 } 1429 }
1458 1430
1459 static struct cfq_queue ** 1431 static struct cfq_queue **
1460 cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) 1432 cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
1461 { 1433 {
1462 switch(ioprio_class) { 1434 switch(ioprio_class) {
1463 case IOPRIO_CLASS_RT: 1435 case IOPRIO_CLASS_RT:
1464 return &cfqd->async_cfqq[0][ioprio]; 1436 return &cfqd->async_cfqq[0][ioprio];
1465 case IOPRIO_CLASS_BE: 1437 case IOPRIO_CLASS_BE:
1466 return &cfqd->async_cfqq[1][ioprio]; 1438 return &cfqd->async_cfqq[1][ioprio];
1467 case IOPRIO_CLASS_IDLE: 1439 case IOPRIO_CLASS_IDLE:
1468 return &cfqd->async_idle_cfqq; 1440 return &cfqd->async_idle_cfqq;
1469 default: 1441 default:
1470 BUG(); 1442 BUG();
1471 } 1443 }
1472 } 1444 }
1473 1445
1474 static struct cfq_queue * 1446 static struct cfq_queue *
1475 cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, 1447 cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
1476 gfp_t gfp_mask) 1448 gfp_t gfp_mask)
1477 { 1449 {
1478 const int ioprio = task_ioprio(ioc); 1450 const int ioprio = task_ioprio(ioc);
1479 const int ioprio_class = task_ioprio_class(ioc); 1451 const int ioprio_class = task_ioprio_class(ioc);
1480 struct cfq_queue **async_cfqq = NULL; 1452 struct cfq_queue **async_cfqq = NULL;
1481 struct cfq_queue *cfqq = NULL; 1453 struct cfq_queue *cfqq = NULL;
1482 1454
1483 if (!is_sync) { 1455 if (!is_sync) {
1484 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); 1456 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
1485 cfqq = *async_cfqq; 1457 cfqq = *async_cfqq;
1486 } 1458 }
1487 1459
1488 if (!cfqq) { 1460 if (!cfqq) {
1489 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); 1461 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
1490 if (!cfqq) 1462 if (!cfqq)
1491 return NULL; 1463 return NULL;
1492 } 1464 }
1493 1465
1494 /* 1466 /*
1495 * pin the queue now that it's allocated, scheduler exit will prune it 1467 * pin the queue now that it's allocated, scheduler exit will prune it
1496 */ 1468 */
1497 if (!is_sync && !(*async_cfqq)) { 1469 if (!is_sync && !(*async_cfqq)) {
1498 atomic_inc(&cfqq->ref); 1470 atomic_inc(&cfqq->ref);
1499 *async_cfqq = cfqq; 1471 *async_cfqq = cfqq;
1500 } 1472 }
1501 1473
1502 atomic_inc(&cfqq->ref); 1474 atomic_inc(&cfqq->ref);
1503 return cfqq; 1475 return cfqq;
1504 } 1476 }
1505 1477
1506 static void cfq_cic_free(struct cfq_io_context *cic) 1478 static void cfq_cic_free(struct cfq_io_context *cic)
1507 { 1479 {
1508 kmem_cache_free(cfq_ioc_pool, cic); 1480 kmem_cache_free(cfq_ioc_pool, cic);
1509 elv_ioc_count_dec(ioc_count); 1481 elv_ioc_count_dec(ioc_count);
1510 1482
1511 if (ioc_gone && !elv_ioc_count_read(ioc_count)) 1483 if (ioc_gone && !elv_ioc_count_read(ioc_count))
1512 complete(ioc_gone); 1484 complete(ioc_gone);
1513 } 1485 }
1514 1486
1515 /* 1487 /*
1516 * We drop cfq io contexts lazily, so we may find a dead one. 1488 * We drop cfq io contexts lazily, so we may find a dead one.
1517 */ 1489 */
1518 static void 1490 static void
1519 cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, 1491 cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
1520 struct cfq_io_context *cic) 1492 struct cfq_io_context *cic)
1521 { 1493 {
1522 unsigned long flags; 1494 unsigned long flags;
1523 1495
1524 WARN_ON(!list_empty(&cic->queue_list)); 1496 WARN_ON(!list_empty(&cic->queue_list));
1525 1497
1526 spin_lock_irqsave(&ioc->lock, flags); 1498 spin_lock_irqsave(&ioc->lock, flags);
1527 1499
1528 if (ioc->ioc_data == cic) 1500 if (ioc->ioc_data == cic)
1529 rcu_assign_pointer(ioc->ioc_data, NULL); 1501 rcu_assign_pointer(ioc->ioc_data, NULL);
1530 1502
1531 radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); 1503 radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd);
1532 spin_unlock_irqrestore(&ioc->lock, flags); 1504 spin_unlock_irqrestore(&ioc->lock, flags);
1533 1505
1534 cfq_cic_free(cic); 1506 cfq_cic_free(cic);
1535 } 1507 }
1536 1508
1537 static struct cfq_io_context * 1509 static struct cfq_io_context *
1538 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) 1510 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1539 { 1511 {
1540 struct cfq_io_context *cic; 1512 struct cfq_io_context *cic;
1541 void *k; 1513 void *k;
1542 1514
1543 if (unlikely(!ioc)) 1515 if (unlikely(!ioc))
1544 return NULL; 1516 return NULL;
1545 1517
1546 /* 1518 /*
1547 * we maintain a last-hit cache, to avoid browsing over the tree 1519 * we maintain a last-hit cache, to avoid browsing over the tree
1548 */ 1520 */
1549 cic = rcu_dereference(ioc->ioc_data); 1521 cic = rcu_dereference(ioc->ioc_data);
1550 if (cic && cic->key == cfqd) 1522 if (cic && cic->key == cfqd)
1551 return cic; 1523 return cic;
1552 1524
1553 do { 1525 do {
1554 rcu_read_lock(); 1526 rcu_read_lock();
1555 cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); 1527 cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
1556 rcu_read_unlock(); 1528 rcu_read_unlock();
1557 if (!cic) 1529 if (!cic)
1558 break; 1530 break;
1559 /* ->key must be copied to avoid race with cfq_exit_queue() */ 1531 /* ->key must be copied to avoid race with cfq_exit_queue() */
1560 k = cic->key; 1532 k = cic->key;
1561 if (unlikely(!k)) { 1533 if (unlikely(!k)) {
1562 cfq_drop_dead_cic(cfqd, ioc, cic); 1534 cfq_drop_dead_cic(cfqd, ioc, cic);
1563 continue; 1535 continue;
1564 } 1536 }
1565 1537
1566 rcu_assign_pointer(ioc->ioc_data, cic); 1538 rcu_assign_pointer(ioc->ioc_data, cic);
1567 break; 1539 break;
1568 } while (1); 1540 } while (1);
1569 1541
1570 return cic; 1542 return cic;
1571 } 1543 }
1572 1544
1573 /* 1545 /*
1574 * Add cic into ioc, using cfqd as the search key. This enables us to lookup 1546 * Add cic into ioc, using cfqd as the search key. This enables us to lookup
1575 * the process specific cfq io context when entered from the block layer. 1547 * the process specific cfq io context when entered from the block layer.
1576 * Also adds the cic to a per-cfqd list, used when this queue is removed. 1548 * Also adds the cic to a per-cfqd list, used when this queue is removed.
1577 */ 1549 */
1578 static inline int 1550 static inline int
1579 cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, 1551 cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
1580 struct cfq_io_context *cic, gfp_t gfp_mask) 1552 struct cfq_io_context *cic, gfp_t gfp_mask)
1581 { 1553 {
1582 unsigned long flags; 1554 unsigned long flags;
1583 int ret; 1555 int ret;
1584 1556
1585 ret = radix_tree_preload(gfp_mask); 1557 ret = radix_tree_preload(gfp_mask);
1586 if (!ret) { 1558 if (!ret) {
1587 cic->ioc = ioc; 1559 cic->ioc = ioc;
1588 cic->key = cfqd; 1560 cic->key = cfqd;
1589 1561
1590 spin_lock_irqsave(&ioc->lock, flags); 1562 spin_lock_irqsave(&ioc->lock, flags);
1591 ret = radix_tree_insert(&ioc->radix_root, 1563 ret = radix_tree_insert(&ioc->radix_root,
1592 (unsigned long) cfqd, cic); 1564 (unsigned long) cfqd, cic);
1593 spin_unlock_irqrestore(&ioc->lock, flags); 1565 spin_unlock_irqrestore(&ioc->lock, flags);
1594 1566
1595 radix_tree_preload_end(); 1567 radix_tree_preload_end();
1596 1568
1597 if (!ret) { 1569 if (!ret) {
1598 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1570 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1599 list_add(&cic->queue_list, &cfqd->cic_list); 1571 list_add(&cic->queue_list, &cfqd->cic_list);
1600 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 1572 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1601 } 1573 }
1602 } 1574 }
1603 1575
1604 if (ret) 1576 if (ret)
1605 printk(KERN_ERR "cfq: cic link failed!\n"); 1577 printk(KERN_ERR "cfq: cic link failed!\n");
1606 1578
1607 return ret; 1579 return ret;
1608 } 1580 }
1609 1581
1610 /* 1582 /*
1611 * Setup general io context and cfq io context. There can be several cfq 1583 * Setup general io context and cfq io context. There can be several cfq
1612 * io contexts per general io context, if this process is doing io to more 1584 * io contexts per general io context, if this process is doing io to more
1613 * than one device managed by cfq. 1585 * than one device managed by cfq.
1614 */ 1586 */
1615 static struct cfq_io_context * 1587 static struct cfq_io_context *
1616 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1588 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1617 { 1589 {
1618 struct io_context *ioc = NULL; 1590 struct io_context *ioc = NULL;
1619 struct cfq_io_context *cic; 1591 struct cfq_io_context *cic;
1620 1592
1621 might_sleep_if(gfp_mask & __GFP_WAIT); 1593 might_sleep_if(gfp_mask & __GFP_WAIT);
1622 1594
1623 ioc = get_io_context(gfp_mask, cfqd->queue->node); 1595 ioc = get_io_context(gfp_mask, cfqd->queue->node);
1624 if (!ioc) 1596 if (!ioc)
1625 return NULL; 1597 return NULL;
1626 1598
1627 cic = cfq_cic_lookup(cfqd, ioc); 1599 cic = cfq_cic_lookup(cfqd, ioc);
1628 if (cic) 1600 if (cic)
1629 goto out; 1601 goto out;
1630 1602
1631 cic = cfq_alloc_io_context(cfqd, gfp_mask); 1603 cic = cfq_alloc_io_context(cfqd, gfp_mask);
1632 if (cic == NULL) 1604 if (cic == NULL)
1633 goto err; 1605 goto err;
1634 1606
1635 if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) 1607 if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
1636 goto err_free; 1608 goto err_free;
1637 1609
1638 out: 1610 out:
1639 smp_read_barrier_depends(); 1611 smp_read_barrier_depends();
1640 if (unlikely(ioc->ioprio_changed)) 1612 if (unlikely(ioc->ioprio_changed))
1641 cfq_ioc_set_ioprio(ioc); 1613 cfq_ioc_set_ioprio(ioc);
1642 1614
1643 return cic; 1615 return cic;
1644 err_free: 1616 err_free:
1645 cfq_cic_free(cic); 1617 cfq_cic_free(cic);
1646 err: 1618 err:
1647 put_io_context(ioc); 1619 put_io_context(ioc);
1648 return NULL; 1620 return NULL;
1649 } 1621 }
1650 1622
1651 static void 1623 static void
1652 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 1624 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1653 { 1625 {
1654 unsigned long elapsed = jiffies - cic->last_end_request; 1626 unsigned long elapsed = jiffies - cic->last_end_request;
1655 unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 1627 unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
1656 1628
1657 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 1629 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
1658 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 1630 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
1659 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 1631 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
1660 } 1632 }
1661 1633
1662 static void 1634 static void
1663 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, 1635 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
1664 struct request *rq) 1636 struct request *rq)
1665 { 1637 {
1666 sector_t sdist; 1638 sector_t sdist;
1667 u64 total; 1639 u64 total;
1668 1640
1669 if (cic->last_request_pos < rq->sector) 1641 if (cic->last_request_pos < rq->sector)
1670 sdist = rq->sector - cic->last_request_pos; 1642 sdist = rq->sector - cic->last_request_pos;
1671 else 1643 else
1672 sdist = cic->last_request_pos - rq->sector; 1644 sdist = cic->last_request_pos - rq->sector;
1673 1645
1674 /* 1646 /*
1675 * Don't allow the seek distance to get too large from the 1647 * Don't allow the seek distance to get too large from the
1676 * odd fragment, pagein, etc 1648 * odd fragment, pagein, etc
1677 */ 1649 */
1678 if (cic->seek_samples <= 60) /* second&third seek */ 1650 if (cic->seek_samples <= 60) /* second&third seek */
1679 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024); 1651 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024);
1680 else 1652 else
1681 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64); 1653 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64);
1682 1654
1683 cic->seek_samples = (7*cic->seek_samples + 256) / 8; 1655 cic->seek_samples = (7*cic->seek_samples + 256) / 8;
1684 cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8; 1656 cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8;
1685 total = cic->seek_total + (cic->seek_samples/2); 1657 total = cic->seek_total + (cic->seek_samples/2);
1686 do_div(total, cic->seek_samples); 1658 do_div(total, cic->seek_samples);
1687 cic->seek_mean = (sector_t)total; 1659 cic->seek_mean = (sector_t)total;
1688 } 1660 }
1689 1661
1690 /* 1662 /*
1691 * Disable idle window if the process thinks too long or seeks so much that 1663 * Disable idle window if the process thinks too long or seeks so much that
1692 * it doesn't matter 1664 * it doesn't matter
1693 */ 1665 */
1694 static void 1666 static void
1695 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1667 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1696 struct cfq_io_context *cic) 1668 struct cfq_io_context *cic)
1697 { 1669 {
1698 int enable_idle; 1670 int enable_idle;
1699 1671
1700 if (!cfq_cfqq_sync(cfqq)) 1672 /*
1673 * Don't idle for async or idle io prio class
1674 */
1675 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
1701 return; 1676 return;
1702 1677
1703 enable_idle = cfq_cfqq_idle_window(cfqq); 1678 enable_idle = cfq_cfqq_idle_window(cfqq);
1704 1679
1705 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 1680 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
1706 (cfqd->hw_tag && CIC_SEEKY(cic))) 1681 (cfqd->hw_tag && CIC_SEEKY(cic)))
1707 enable_idle = 0; 1682 enable_idle = 0;
1708 else if (sample_valid(cic->ttime_samples)) { 1683 else if (sample_valid(cic->ttime_samples)) {
1709 if (cic->ttime_mean > cfqd->cfq_slice_idle) 1684 if (cic->ttime_mean > cfqd->cfq_slice_idle)
1710 enable_idle = 0; 1685 enable_idle = 0;
1711 else 1686 else
1712 enable_idle = 1; 1687 enable_idle = 1;
1713 } 1688 }
1714 1689
1715 if (enable_idle) 1690 if (enable_idle)
1716 cfq_mark_cfqq_idle_window(cfqq); 1691 cfq_mark_cfqq_idle_window(cfqq);
1717 else 1692 else
1718 cfq_clear_cfqq_idle_window(cfqq); 1693 cfq_clear_cfqq_idle_window(cfqq);
1719 } 1694 }
1720 1695
1721 /* 1696 /*
1722 * Check if new_cfqq should preempt the currently active queue. Return 0 for 1697 * Check if new_cfqq should preempt the currently active queue. Return 0 for
1723 * no or if we aren't sure, a 1 will cause a preempt. 1698 * no or if we aren't sure, a 1 will cause a preempt.
1724 */ 1699 */
1725 static int 1700 static int
1726 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 1701 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1727 struct request *rq) 1702 struct request *rq)
1728 { 1703 {
1729 struct cfq_queue *cfqq; 1704 struct cfq_queue *cfqq;
1730 1705
1731 cfqq = cfqd->active_queue; 1706 cfqq = cfqd->active_queue;
1732 if (!cfqq) 1707 if (!cfqq)
1733 return 0; 1708 return 0;
1734 1709
1735 if (cfq_slice_used(cfqq)) 1710 if (cfq_slice_used(cfqq))
1736 return 1; 1711 return 1;
1737 1712
1738 if (cfq_class_idle(new_cfqq)) 1713 if (cfq_class_idle(new_cfqq))
1739 return 0; 1714 return 0;
1740 1715
1741 if (cfq_class_idle(cfqq)) 1716 if (cfq_class_idle(cfqq))
1742 return 1; 1717 return 1;
1743 1718
1744 /* 1719 /*
1745 * if the new request is sync, but the currently running queue is 1720 * if the new request is sync, but the currently running queue is
1746 * not, let the sync request have priority. 1721 * not, let the sync request have priority.
1747 */ 1722 */
1748 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) 1723 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
1749 return 1; 1724 return 1;
1750 1725
1751 /* 1726 /*
1752 * So both queues are sync. Let the new request get disk time if 1727 * So both queues are sync. Let the new request get disk time if
1753 * it's a metadata request and the current queue is doing regular IO. 1728 * it's a metadata request and the current queue is doing regular IO.
1754 */ 1729 */
1755 if (rq_is_meta(rq) && !cfqq->meta_pending) 1730 if (rq_is_meta(rq) && !cfqq->meta_pending)
1756 return 1; 1731 return 1;
1757 1732
1758 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) 1733 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
1759 return 0; 1734 return 0;
1760 1735
1761 /* 1736 /*
1762 * if this request is as-good as one we would expect from the 1737 * if this request is as-good as one we would expect from the
1763 * current cfqq, let it preempt 1738 * current cfqq, let it preempt
1764 */ 1739 */
1765 if (cfq_rq_close(cfqd, rq)) 1740 if (cfq_rq_close(cfqd, rq))
1766 return 1; 1741 return 1;
1767 1742
1768 return 0; 1743 return 0;
1769 } 1744 }
1770 1745
1771 /* 1746 /*
1772 * cfqq preempts the active queue. if we allowed preempt with no slice left, 1747 * cfqq preempts the active queue. if we allowed preempt with no slice left,
1773 * let it have half of its nominal slice. 1748 * let it have half of its nominal slice.
1774 */ 1749 */
1775 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1750 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1776 { 1751 {
1777 cfq_slice_expired(cfqd, 1); 1752 cfq_slice_expired(cfqd, 1);
1778 1753
1779 /* 1754 /*
1780 * Put the new queue at the front of the of the current list, 1755 * Put the new queue at the front of the of the current list,
1781 * so we know that it will be selected next. 1756 * so we know that it will be selected next.
1782 */ 1757 */
1783 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 1758 BUG_ON(!cfq_cfqq_on_rr(cfqq));
1784 1759
1785 cfq_service_tree_add(cfqd, cfqq, 1); 1760 cfq_service_tree_add(cfqd, cfqq, 1);
1786 1761
1787 cfqq->slice_end = 0; 1762 cfqq->slice_end = 0;
1788 cfq_mark_cfqq_slice_new(cfqq); 1763 cfq_mark_cfqq_slice_new(cfqq);
1789 } 1764 }
1790 1765
1791 /* 1766 /*
1792 * Called when a new fs request (rq) is added (to cfqq). Check if there's 1767 * Called when a new fs request (rq) is added (to cfqq). Check if there's
1793 * something we should do about it 1768 * something we should do about it
1794 */ 1769 */
1795 static void 1770 static void
1796 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1771 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1797 struct request *rq) 1772 struct request *rq)
1798 { 1773 {
1799 struct cfq_io_context *cic = RQ_CIC(rq); 1774 struct cfq_io_context *cic = RQ_CIC(rq);
1800 1775
1801 if (rq_is_meta(rq)) 1776 if (rq_is_meta(rq))
1802 cfqq->meta_pending++; 1777 cfqq->meta_pending++;
1803 1778
1804 cfq_update_io_thinktime(cfqd, cic); 1779 cfq_update_io_thinktime(cfqd, cic);
1805 cfq_update_io_seektime(cfqd, cic, rq); 1780 cfq_update_io_seektime(cfqd, cic, rq);
1806 cfq_update_idle_window(cfqd, cfqq, cic); 1781 cfq_update_idle_window(cfqd, cfqq, cic);
1807 1782
1808 cic->last_request_pos = rq->sector + rq->nr_sectors; 1783 cic->last_request_pos = rq->sector + rq->nr_sectors;
1809 1784
1810 if (cfqq == cfqd->active_queue) { 1785 if (cfqq == cfqd->active_queue) {
1811 /* 1786 /*
1812 * if we are waiting for a request for this queue, let it rip 1787 * if we are waiting for a request for this queue, let it rip
1813 * immediately and flag that we must not expire this queue 1788 * immediately and flag that we must not expire this queue
1814 * just now 1789 * just now
1815 */ 1790 */
1816 if (cfq_cfqq_wait_request(cfqq)) { 1791 if (cfq_cfqq_wait_request(cfqq)) {
1817 cfq_mark_cfqq_must_dispatch(cfqq); 1792 cfq_mark_cfqq_must_dispatch(cfqq);
1818 del_timer(&cfqd->idle_slice_timer); 1793 del_timer(&cfqd->idle_slice_timer);
1819 blk_start_queueing(cfqd->queue); 1794 blk_start_queueing(cfqd->queue);
1820 } 1795 }
1821 } else if (cfq_should_preempt(cfqd, cfqq, rq)) { 1796 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
1822 /* 1797 /*
1823 * not the active queue - expire current slice if it is 1798 * not the active queue - expire current slice if it is
1824 * idle and has expired it's mean thinktime or this new queue 1799 * idle and has expired it's mean thinktime or this new queue
1825 * has some old slice time left and is of higher priority 1800 * has some old slice time left and is of higher priority
1826 */ 1801 */
1827 cfq_preempt_queue(cfqd, cfqq); 1802 cfq_preempt_queue(cfqd, cfqq);
1828 cfq_mark_cfqq_must_dispatch(cfqq); 1803 cfq_mark_cfqq_must_dispatch(cfqq);
1829 blk_start_queueing(cfqd->queue); 1804 blk_start_queueing(cfqd->queue);
1830 } 1805 }
1831 } 1806 }
1832 1807
1833 static void cfq_insert_request(struct request_queue *q, struct request *rq) 1808 static void cfq_insert_request(struct request_queue *q, struct request *rq)
1834 { 1809 {
1835 struct cfq_data *cfqd = q->elevator->elevator_data; 1810 struct cfq_data *cfqd = q->elevator->elevator_data;
1836 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1811 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1837 1812
1838 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); 1813 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
1839 1814
1840 cfq_add_rq_rb(rq); 1815 cfq_add_rq_rb(rq);
1841 1816
1842 list_add_tail(&rq->queuelist, &cfqq->fifo); 1817 list_add_tail(&rq->queuelist, &cfqq->fifo);
1843 1818
1844 cfq_rq_enqueued(cfqd, cfqq, rq); 1819 cfq_rq_enqueued(cfqd, cfqq, rq);
1845 } 1820 }
1846 1821
1847 static void cfq_completed_request(struct request_queue *q, struct request *rq) 1822 static void cfq_completed_request(struct request_queue *q, struct request *rq)
1848 { 1823 {
1849 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1824 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1850 struct cfq_data *cfqd = cfqq->cfqd; 1825 struct cfq_data *cfqd = cfqq->cfqd;
1851 const int sync = rq_is_sync(rq); 1826 const int sync = rq_is_sync(rq);
1852 unsigned long now; 1827 unsigned long now;
1853 1828
1854 now = jiffies; 1829 now = jiffies;
1855 1830
1856 WARN_ON(!cfqd->rq_in_driver); 1831 WARN_ON(!cfqd->rq_in_driver);
1857 WARN_ON(!cfqq->dispatched); 1832 WARN_ON(!cfqq->dispatched);
1858 cfqd->rq_in_driver--; 1833 cfqd->rq_in_driver--;
1859 cfqq->dispatched--; 1834 cfqq->dispatched--;
1860 1835
1861 if (cfq_cfqq_sync(cfqq)) 1836 if (cfq_cfqq_sync(cfqq))
1862 cfqd->sync_flight--; 1837 cfqd->sync_flight--;
1863 1838
1864 if (!cfq_class_idle(cfqq)) 1839 if (!cfq_class_idle(cfqq))
1865 cfqd->last_end_request = now; 1840 cfqd->last_end_request = now;
1866 1841
1867 if (sync) 1842 if (sync)
1868 RQ_CIC(rq)->last_end_request = now; 1843 RQ_CIC(rq)->last_end_request = now;
1869 1844
1870 /* 1845 /*
1871 * If this is the active queue, check if it needs to be expired, 1846 * If this is the active queue, check if it needs to be expired,
1872 * or if we want to idle in case it has no pending requests. 1847 * or if we want to idle in case it has no pending requests.
1873 */ 1848 */
1874 if (cfqd->active_queue == cfqq) { 1849 if (cfqd->active_queue == cfqq) {
1875 if (cfq_cfqq_slice_new(cfqq)) { 1850 if (cfq_cfqq_slice_new(cfqq)) {
1876 cfq_set_prio_slice(cfqd, cfqq); 1851 cfq_set_prio_slice(cfqd, cfqq);
1877 cfq_clear_cfqq_slice_new(cfqq); 1852 cfq_clear_cfqq_slice_new(cfqq);
1878 } 1853 }
1879 if (cfq_slice_used(cfqq)) 1854 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
1880 cfq_slice_expired(cfqd, 1); 1855 cfq_slice_expired(cfqd, 1);
1881 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) 1856 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list))
1882 cfq_arm_slice_timer(cfqd); 1857 cfq_arm_slice_timer(cfqd);
1883 } 1858 }
1884 1859
1885 if (!cfqd->rq_in_driver) 1860 if (!cfqd->rq_in_driver)
1886 cfq_schedule_dispatch(cfqd); 1861 cfq_schedule_dispatch(cfqd);
1887 } 1862 }
1888 1863
1889 /* 1864 /*
1890 * we temporarily boost lower priority queues if they are holding fs exclusive 1865 * we temporarily boost lower priority queues if they are holding fs exclusive
1891 * resources. they are boosted to normal prio (CLASS_BE/4) 1866 * resources. they are boosted to normal prio (CLASS_BE/4)
1892 */ 1867 */
1893 static void cfq_prio_boost(struct cfq_queue *cfqq) 1868 static void cfq_prio_boost(struct cfq_queue *cfqq)
1894 { 1869 {
1895 if (has_fs_excl()) { 1870 if (has_fs_excl()) {
1896 /* 1871 /*
1897 * boost idle prio on transactions that would lock out other 1872 * boost idle prio on transactions that would lock out other
1898 * users of the filesystem 1873 * users of the filesystem
1899 */ 1874 */
1900 if (cfq_class_idle(cfqq)) 1875 if (cfq_class_idle(cfqq))
1901 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1876 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1902 if (cfqq->ioprio > IOPRIO_NORM) 1877 if (cfqq->ioprio > IOPRIO_NORM)
1903 cfqq->ioprio = IOPRIO_NORM; 1878 cfqq->ioprio = IOPRIO_NORM;
1904 } else { 1879 } else {
1905 /* 1880 /*
1906 * check if we need to unboost the queue 1881 * check if we need to unboost the queue
1907 */ 1882 */
1908 if (cfqq->ioprio_class != cfqq->org_ioprio_class) 1883 if (cfqq->ioprio_class != cfqq->org_ioprio_class)
1909 cfqq->ioprio_class = cfqq->org_ioprio_class; 1884 cfqq->ioprio_class = cfqq->org_ioprio_class;
1910 if (cfqq->ioprio != cfqq->org_ioprio) 1885 if (cfqq->ioprio != cfqq->org_ioprio)
1911 cfqq->ioprio = cfqq->org_ioprio; 1886 cfqq->ioprio = cfqq->org_ioprio;
1912 } 1887 }
1913 } 1888 }
1914 1889
1915 static inline int __cfq_may_queue(struct cfq_queue *cfqq) 1890 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
1916 { 1891 {
1917 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 1892 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
1918 !cfq_cfqq_must_alloc_slice(cfqq)) { 1893 !cfq_cfqq_must_alloc_slice(cfqq)) {
1919 cfq_mark_cfqq_must_alloc_slice(cfqq); 1894 cfq_mark_cfqq_must_alloc_slice(cfqq);
1920 return ELV_MQUEUE_MUST; 1895 return ELV_MQUEUE_MUST;
1921 } 1896 }
1922 1897
1923 return ELV_MQUEUE_MAY; 1898 return ELV_MQUEUE_MAY;
1924 } 1899 }
1925 1900
1926 static int cfq_may_queue(struct request_queue *q, int rw) 1901 static int cfq_may_queue(struct request_queue *q, int rw)
1927 { 1902 {
1928 struct cfq_data *cfqd = q->elevator->elevator_data; 1903 struct cfq_data *cfqd = q->elevator->elevator_data;
1929 struct task_struct *tsk = current; 1904 struct task_struct *tsk = current;
1930 struct cfq_io_context *cic; 1905 struct cfq_io_context *cic;
1931 struct cfq_queue *cfqq; 1906 struct cfq_queue *cfqq;
1932 1907
1933 /* 1908 /*
1934 * don't force setup of a queue from here, as a call to may_queue 1909 * don't force setup of a queue from here, as a call to may_queue
1935 * does not necessarily imply that a request actually will be queued. 1910 * does not necessarily imply that a request actually will be queued.
1936 * so just lookup a possibly existing queue, or return 'may queue' 1911 * so just lookup a possibly existing queue, or return 'may queue'
1937 * if that fails 1912 * if that fails
1938 */ 1913 */
1939 cic = cfq_cic_lookup(cfqd, tsk->io_context); 1914 cic = cfq_cic_lookup(cfqd, tsk->io_context);
1940 if (!cic) 1915 if (!cic)
1941 return ELV_MQUEUE_MAY; 1916 return ELV_MQUEUE_MAY;
1942 1917
1943 cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); 1918 cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC);
1944 if (cfqq) { 1919 if (cfqq) {
1945 cfq_init_prio_data(cfqq, cic->ioc); 1920 cfq_init_prio_data(cfqq, cic->ioc);
1946 cfq_prio_boost(cfqq); 1921 cfq_prio_boost(cfqq);
1947 1922
1948 return __cfq_may_queue(cfqq); 1923 return __cfq_may_queue(cfqq);
1949 } 1924 }
1950 1925
1951 return ELV_MQUEUE_MAY; 1926 return ELV_MQUEUE_MAY;
1952 } 1927 }
1953 1928
1954 /* 1929 /*
1955 * queue lock held here 1930 * queue lock held here
1956 */ 1931 */
1957 static void cfq_put_request(struct request *rq) 1932 static void cfq_put_request(struct request *rq)
1958 { 1933 {
1959 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1934 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1960 1935
1961 if (cfqq) { 1936 if (cfqq) {
1962 const int rw = rq_data_dir(rq); 1937 const int rw = rq_data_dir(rq);
1963 1938
1964 BUG_ON(!cfqq->allocated[rw]); 1939 BUG_ON(!cfqq->allocated[rw]);
1965 cfqq->allocated[rw]--; 1940 cfqq->allocated[rw]--;
1966 1941
1967 put_io_context(RQ_CIC(rq)->ioc); 1942 put_io_context(RQ_CIC(rq)->ioc);
1968 1943
1969 rq->elevator_private = NULL; 1944 rq->elevator_private = NULL;
1970 rq->elevator_private2 = NULL; 1945 rq->elevator_private2 = NULL;
1971 1946
1972 cfq_put_queue(cfqq); 1947 cfq_put_queue(cfqq);
1973 } 1948 }
1974 } 1949 }
1975 1950
1976 /* 1951 /*
1977 * Allocate cfq data structures associated with this request. 1952 * Allocate cfq data structures associated with this request.
1978 */ 1953 */
1979 static int 1954 static int
1980 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 1955 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
1981 { 1956 {
1982 struct cfq_data *cfqd = q->elevator->elevator_data; 1957 struct cfq_data *cfqd = q->elevator->elevator_data;
1983 struct cfq_io_context *cic; 1958 struct cfq_io_context *cic;
1984 const int rw = rq_data_dir(rq); 1959 const int rw = rq_data_dir(rq);
1985 const int is_sync = rq_is_sync(rq); 1960 const int is_sync = rq_is_sync(rq);
1986 struct cfq_queue *cfqq; 1961 struct cfq_queue *cfqq;
1987 unsigned long flags; 1962 unsigned long flags;
1988 1963
1989 might_sleep_if(gfp_mask & __GFP_WAIT); 1964 might_sleep_if(gfp_mask & __GFP_WAIT);
1990 1965
1991 cic = cfq_get_io_context(cfqd, gfp_mask); 1966 cic = cfq_get_io_context(cfqd, gfp_mask);
1992 1967
1993 spin_lock_irqsave(q->queue_lock, flags); 1968 spin_lock_irqsave(q->queue_lock, flags);
1994 1969
1995 if (!cic) 1970 if (!cic)
1996 goto queue_fail; 1971 goto queue_fail;
1997 1972
1998 cfqq = cic_to_cfqq(cic, is_sync); 1973 cfqq = cic_to_cfqq(cic, is_sync);
1999 if (!cfqq) { 1974 if (!cfqq) {
2000 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); 1975 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
2001 1976
2002 if (!cfqq) 1977 if (!cfqq)
2003 goto queue_fail; 1978 goto queue_fail;
2004 1979
2005 cic_set_cfqq(cic, cfqq, is_sync); 1980 cic_set_cfqq(cic, cfqq, is_sync);
2006 } 1981 }
2007 1982
2008 cfqq->allocated[rw]++; 1983 cfqq->allocated[rw]++;
2009 cfq_clear_cfqq_must_alloc(cfqq); 1984 cfq_clear_cfqq_must_alloc(cfqq);
2010 atomic_inc(&cfqq->ref); 1985 atomic_inc(&cfqq->ref);
2011 1986
2012 spin_unlock_irqrestore(q->queue_lock, flags); 1987 spin_unlock_irqrestore(q->queue_lock, flags);
2013 1988
2014 rq->elevator_private = cic; 1989 rq->elevator_private = cic;
2015 rq->elevator_private2 = cfqq; 1990 rq->elevator_private2 = cfqq;
2016 return 0; 1991 return 0;
2017 1992
2018 queue_fail: 1993 queue_fail:
2019 if (cic) 1994 if (cic)
2020 put_io_context(cic->ioc); 1995 put_io_context(cic->ioc);
2021 1996
2022 cfq_schedule_dispatch(cfqd); 1997 cfq_schedule_dispatch(cfqd);
2023 spin_unlock_irqrestore(q->queue_lock, flags); 1998 spin_unlock_irqrestore(q->queue_lock, flags);
2024 return 1; 1999 return 1;
2025 } 2000 }
2026 2001
2027 static void cfq_kick_queue(struct work_struct *work) 2002 static void cfq_kick_queue(struct work_struct *work)
2028 { 2003 {
2029 struct cfq_data *cfqd = 2004 struct cfq_data *cfqd =
2030 container_of(work, struct cfq_data, unplug_work); 2005 container_of(work, struct cfq_data, unplug_work);
2031 struct request_queue *q = cfqd->queue; 2006 struct request_queue *q = cfqd->queue;
2032 unsigned long flags; 2007 unsigned long flags;
2033 2008
2034 spin_lock_irqsave(q->queue_lock, flags); 2009 spin_lock_irqsave(q->queue_lock, flags);
2035 blk_start_queueing(q); 2010 blk_start_queueing(q);
2036 spin_unlock_irqrestore(q->queue_lock, flags); 2011 spin_unlock_irqrestore(q->queue_lock, flags);
2037 } 2012 }
2038 2013
2039 /* 2014 /*
2040 * Timer running if the active_queue is currently idling inside its time slice 2015 * Timer running if the active_queue is currently idling inside its time slice
2041 */ 2016 */
2042 static void cfq_idle_slice_timer(unsigned long data) 2017 static void cfq_idle_slice_timer(unsigned long data)
2043 { 2018 {
2044 struct cfq_data *cfqd = (struct cfq_data *) data; 2019 struct cfq_data *cfqd = (struct cfq_data *) data;
2045 struct cfq_queue *cfqq; 2020 struct cfq_queue *cfqq;
2046 unsigned long flags; 2021 unsigned long flags;
2047 int timed_out = 1; 2022 int timed_out = 1;
2048 2023
2049 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2024 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2050 2025
2051 if ((cfqq = cfqd->active_queue) != NULL) { 2026 if ((cfqq = cfqd->active_queue) != NULL) {
2052 timed_out = 0; 2027 timed_out = 0;
2053 2028
2054 /* 2029 /*
2055 * expired 2030 * expired
2056 */ 2031 */
2057 if (cfq_slice_used(cfqq)) 2032 if (cfq_slice_used(cfqq))
2058 goto expire; 2033 goto expire;
2059 2034
2060 /* 2035 /*
2061 * only expire and reinvoke request handler, if there are 2036 * only expire and reinvoke request handler, if there are
2062 * other queues with pending requests 2037 * other queues with pending requests
2063 */ 2038 */
2064 if (!cfqd->busy_queues) 2039 if (!cfqd->busy_queues)
2065 goto out_cont; 2040 goto out_cont;
2066 2041
2067 /* 2042 /*
2068 * not expired and it has a request pending, let it dispatch 2043 * not expired and it has a request pending, let it dispatch
2069 */ 2044 */
2070 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) { 2045 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) {
2071 cfq_mark_cfqq_must_dispatch(cfqq); 2046 cfq_mark_cfqq_must_dispatch(cfqq);
2072 goto out_kick; 2047 goto out_kick;
2073 } 2048 }
2074 } 2049 }
2075 expire: 2050 expire:
2076 cfq_slice_expired(cfqd, timed_out); 2051 cfq_slice_expired(cfqd, timed_out);
2077 out_kick: 2052 out_kick:
2078 cfq_schedule_dispatch(cfqd); 2053 cfq_schedule_dispatch(cfqd);
2079 out_cont: 2054 out_cont:
2080 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2055 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2081 } 2056 }
2082 2057
2083 /*
2084 * Timer running if an idle class queue is waiting for service
2085 */
2086 static void cfq_idle_class_timer(unsigned long data)
2087 {
2088 struct cfq_data *cfqd = (struct cfq_data *) data;
2089 unsigned long flags;
2090
2091 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2092
2093 /*
2094 * race with a non-idle queue, reset timer
2095 */
2096 if (!start_idle_class_timer(cfqd))
2097 cfq_schedule_dispatch(cfqd);
2098
2099 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2100 }
2101
2102 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 2058 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
2103 { 2059 {
2104 del_timer_sync(&cfqd->idle_slice_timer); 2060 del_timer_sync(&cfqd->idle_slice_timer);
2105 del_timer_sync(&cfqd->idle_class_timer);
2106 kblockd_flush_work(&cfqd->unplug_work); 2061 kblockd_flush_work(&cfqd->unplug_work);
2107 } 2062 }
2108 2063
2109 static void cfq_put_async_queues(struct cfq_data *cfqd) 2064 static void cfq_put_async_queues(struct cfq_data *cfqd)
2110 { 2065 {
2111 int i; 2066 int i;
2112 2067
2113 for (i = 0; i < IOPRIO_BE_NR; i++) { 2068 for (i = 0; i < IOPRIO_BE_NR; i++) {
2114 if (cfqd->async_cfqq[0][i]) 2069 if (cfqd->async_cfqq[0][i])
2115 cfq_put_queue(cfqd->async_cfqq[0][i]); 2070 cfq_put_queue(cfqd->async_cfqq[0][i]);
2116 if (cfqd->async_cfqq[1][i]) 2071 if (cfqd->async_cfqq[1][i])
2117 cfq_put_queue(cfqd->async_cfqq[1][i]); 2072 cfq_put_queue(cfqd->async_cfqq[1][i]);
2118 } 2073 }
2119 2074
2120 if (cfqd->async_idle_cfqq) 2075 if (cfqd->async_idle_cfqq)
2121 cfq_put_queue(cfqd->async_idle_cfqq); 2076 cfq_put_queue(cfqd->async_idle_cfqq);
2122 } 2077 }
2123 2078
2124 static void cfq_exit_queue(elevator_t *e) 2079 static void cfq_exit_queue(elevator_t *e)
2125 { 2080 {
2126 struct cfq_data *cfqd = e->elevator_data; 2081 struct cfq_data *cfqd = e->elevator_data;
2127 struct request_queue *q = cfqd->queue; 2082 struct request_queue *q = cfqd->queue;
2128 2083
2129 cfq_shutdown_timer_wq(cfqd); 2084 cfq_shutdown_timer_wq(cfqd);
2130 2085
2131 spin_lock_irq(q->queue_lock); 2086 spin_lock_irq(q->queue_lock);
2132 2087
2133 if (cfqd->active_queue) 2088 if (cfqd->active_queue)
2134 __cfq_slice_expired(cfqd, cfqd->active_queue, 0); 2089 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
2135 2090
2136 while (!list_empty(&cfqd->cic_list)) { 2091 while (!list_empty(&cfqd->cic_list)) {
2137 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 2092 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
2138 struct cfq_io_context, 2093 struct cfq_io_context,
2139 queue_list); 2094 queue_list);
2140 2095
2141 __cfq_exit_single_io_context(cfqd, cic); 2096 __cfq_exit_single_io_context(cfqd, cic);
2142 } 2097 }
2143 2098
2144 cfq_put_async_queues(cfqd); 2099 cfq_put_async_queues(cfqd);
2145 2100
2146 spin_unlock_irq(q->queue_lock); 2101 spin_unlock_irq(q->queue_lock);
2147 2102
2148 cfq_shutdown_timer_wq(cfqd); 2103 cfq_shutdown_timer_wq(cfqd);
2149 2104
2150 kfree(cfqd); 2105 kfree(cfqd);
2151 } 2106 }
2152 2107
2153 static void *cfq_init_queue(struct request_queue *q) 2108 static void *cfq_init_queue(struct request_queue *q)
2154 { 2109 {
2155 struct cfq_data *cfqd; 2110 struct cfq_data *cfqd;
2156 2111
2157 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 2112 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
2158 if (!cfqd) 2113 if (!cfqd)
2159 return NULL; 2114 return NULL;
2160 2115
2161 cfqd->service_tree = CFQ_RB_ROOT; 2116 cfqd->service_tree = CFQ_RB_ROOT;
2162 INIT_LIST_HEAD(&cfqd->cic_list); 2117 INIT_LIST_HEAD(&cfqd->cic_list);
2163 2118
2164 cfqd->queue = q; 2119 cfqd->queue = q;
2165 2120
2166 init_timer(&cfqd->idle_slice_timer); 2121 init_timer(&cfqd->idle_slice_timer);
2167 cfqd->idle_slice_timer.function = cfq_idle_slice_timer; 2122 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
2168 cfqd->idle_slice_timer.data = (unsigned long) cfqd; 2123 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
2169
2170 init_timer(&cfqd->idle_class_timer);
2171 cfqd->idle_class_timer.function = cfq_idle_class_timer;
2172 cfqd->idle_class_timer.data = (unsigned long) cfqd;
2173 2124
2174 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); 2125 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
2175 2126
2176 cfqd->last_end_request = jiffies; 2127 cfqd->last_end_request = jiffies;
2177 cfqd->cfq_quantum = cfq_quantum; 2128 cfqd->cfq_quantum = cfq_quantum;
2178 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 2129 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
2179 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 2130 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
2180 cfqd->cfq_back_max = cfq_back_max; 2131 cfqd->cfq_back_max = cfq_back_max;
2181 cfqd->cfq_back_penalty = cfq_back_penalty; 2132 cfqd->cfq_back_penalty = cfq_back_penalty;
2182 cfqd->cfq_slice[0] = cfq_slice_async; 2133 cfqd->cfq_slice[0] = cfq_slice_async;
2183 cfqd->cfq_slice[1] = cfq_slice_sync; 2134 cfqd->cfq_slice[1] = cfq_slice_sync;
2184 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2135 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2185 cfqd->cfq_slice_idle = cfq_slice_idle; 2136 cfqd->cfq_slice_idle = cfq_slice_idle;
2186 2137
2187 return cfqd; 2138 return cfqd;
2188 } 2139 }
2189 2140
2190 static void cfq_slab_kill(void) 2141 static void cfq_slab_kill(void)
2191 { 2142 {
2192 if (cfq_pool) 2143 if (cfq_pool)
2193 kmem_cache_destroy(cfq_pool); 2144 kmem_cache_destroy(cfq_pool);
2194 if (cfq_ioc_pool) 2145 if (cfq_ioc_pool)
2195 kmem_cache_destroy(cfq_ioc_pool); 2146 kmem_cache_destroy(cfq_ioc_pool);
2196 } 2147 }
2197 2148
2198 static int __init cfq_slab_setup(void) 2149 static int __init cfq_slab_setup(void)
2199 { 2150 {
2200 cfq_pool = KMEM_CACHE(cfq_queue, 0); 2151 cfq_pool = KMEM_CACHE(cfq_queue, 0);
2201 if (!cfq_pool) 2152 if (!cfq_pool)
2202 goto fail; 2153 goto fail;
2203 2154
2204 cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU); 2155 cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU);
2205 if (!cfq_ioc_pool) 2156 if (!cfq_ioc_pool)
2206 goto fail; 2157 goto fail;
2207 2158
2208 return 0; 2159 return 0;
2209 fail: 2160 fail:
2210 cfq_slab_kill(); 2161 cfq_slab_kill();
2211 return -ENOMEM; 2162 return -ENOMEM;
2212 } 2163 }
2213 2164
2214 /* 2165 /*
2215 * sysfs parts below --> 2166 * sysfs parts below -->
2216 */ 2167 */
2217 static ssize_t 2168 static ssize_t
2218 cfq_var_show(unsigned int var, char *page) 2169 cfq_var_show(unsigned int var, char *page)
2219 { 2170 {
2220 return sprintf(page, "%d\n", var); 2171 return sprintf(page, "%d\n", var);
2221 } 2172 }
2222 2173
2223 static ssize_t 2174 static ssize_t
2224 cfq_var_store(unsigned int *var, const char *page, size_t count) 2175 cfq_var_store(unsigned int *var, const char *page, size_t count)
2225 { 2176 {
2226 char *p = (char *) page; 2177 char *p = (char *) page;
2227 2178
2228 *var = simple_strtoul(p, &p, 10); 2179 *var = simple_strtoul(p, &p, 10);
2229 return count; 2180 return count;
2230 } 2181 }
2231 2182
2232 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 2183 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
2233 static ssize_t __FUNC(elevator_t *e, char *page) \ 2184 static ssize_t __FUNC(elevator_t *e, char *page) \
2234 { \ 2185 { \
2235 struct cfq_data *cfqd = e->elevator_data; \ 2186 struct cfq_data *cfqd = e->elevator_data; \
2236 unsigned int __data = __VAR; \ 2187 unsigned int __data = __VAR; \
2237 if (__CONV) \ 2188 if (__CONV) \
2238 __data = jiffies_to_msecs(__data); \ 2189 __data = jiffies_to_msecs(__data); \
2239 return cfq_var_show(__data, (page)); \ 2190 return cfq_var_show(__data, (page)); \
2240 } 2191 }
2241 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 2192 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
2242 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 2193 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
2243 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 2194 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
2244 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); 2195 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
2245 SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); 2196 SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
2246 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); 2197 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
2247 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 2198 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
2248 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 2199 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
2249 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 2200 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
2250 #undef SHOW_FUNCTION 2201 #undef SHOW_FUNCTION
2251 2202
2252 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 2203 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
2253 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 2204 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
2254 { \ 2205 { \
2255 struct cfq_data *cfqd = e->elevator_data; \ 2206 struct cfq_data *cfqd = e->elevator_data; \
2256 unsigned int __data; \ 2207 unsigned int __data; \
2257 int ret = cfq_var_store(&__data, (page), count); \ 2208 int ret = cfq_var_store(&__data, (page), count); \
2258 if (__data < (MIN)) \ 2209 if (__data < (MIN)) \
2259 __data = (MIN); \ 2210 __data = (MIN); \
2260 else if (__data > (MAX)) \ 2211 else if (__data > (MAX)) \
2261 __data = (MAX); \ 2212 __data = (MAX); \
2262 if (__CONV) \ 2213 if (__CONV) \
2263 *(__PTR) = msecs_to_jiffies(__data); \ 2214 *(__PTR) = msecs_to_jiffies(__data); \
2264 else \ 2215 else \
2265 *(__PTR) = __data; \ 2216 *(__PTR) = __data; \
2266 return ret; \ 2217 return ret; \
2267 } 2218 }
2268 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 2219 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
2269 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); 2220 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
2270 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); 2221 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
2271 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 2222 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
2272 STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); 2223 STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
2273 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); 2224 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
2274 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 2225 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
2275 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 2226 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
2276 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); 2227 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
2277 #undef STORE_FUNCTION 2228 #undef STORE_FUNCTION
2278 2229
2279 #define CFQ_ATTR(name) \ 2230 #define CFQ_ATTR(name) \
2280 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) 2231 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
2281 2232
2282 static struct elv_fs_entry cfq_attrs[] = { 2233 static struct elv_fs_entry cfq_attrs[] = {
2283 CFQ_ATTR(quantum), 2234 CFQ_ATTR(quantum),
2284 CFQ_ATTR(fifo_expire_sync), 2235 CFQ_ATTR(fifo_expire_sync),
2285 CFQ_ATTR(fifo_expire_async), 2236 CFQ_ATTR(fifo_expire_async),
2286 CFQ_ATTR(back_seek_max), 2237 CFQ_ATTR(back_seek_max),
2287 CFQ_ATTR(back_seek_penalty), 2238 CFQ_ATTR(back_seek_penalty),
2288 CFQ_ATTR(slice_sync), 2239 CFQ_ATTR(slice_sync),
2289 CFQ_ATTR(slice_async), 2240 CFQ_ATTR(slice_async),
2290 CFQ_ATTR(slice_async_rq), 2241 CFQ_ATTR(slice_async_rq),
2291 CFQ_ATTR(slice_idle), 2242 CFQ_ATTR(slice_idle),
2292 __ATTR_NULL 2243 __ATTR_NULL
2293 }; 2244 };
2294 2245
2295 static struct elevator_type iosched_cfq = { 2246 static struct elevator_type iosched_cfq = {
2296 .ops = { 2247 .ops = {
2297 .elevator_merge_fn = cfq_merge, 2248 .elevator_merge_fn = cfq_merge,
2298 .elevator_merged_fn = cfq_merged_request, 2249 .elevator_merged_fn = cfq_merged_request,
2299 .elevator_merge_req_fn = cfq_merged_requests, 2250 .elevator_merge_req_fn = cfq_merged_requests,
2300 .elevator_allow_merge_fn = cfq_allow_merge, 2251 .elevator_allow_merge_fn = cfq_allow_merge,
2301 .elevator_dispatch_fn = cfq_dispatch_requests, 2252 .elevator_dispatch_fn = cfq_dispatch_requests,
2302 .elevator_add_req_fn = cfq_insert_request, 2253 .elevator_add_req_fn = cfq_insert_request,
2303 .elevator_activate_req_fn = cfq_activate_request, 2254 .elevator_activate_req_fn = cfq_activate_request,
2304 .elevator_deactivate_req_fn = cfq_deactivate_request, 2255 .elevator_deactivate_req_fn = cfq_deactivate_request,
2305 .elevator_queue_empty_fn = cfq_queue_empty, 2256 .elevator_queue_empty_fn = cfq_queue_empty,
2306 .elevator_completed_req_fn = cfq_completed_request, 2257 .elevator_completed_req_fn = cfq_completed_request,
2307 .elevator_former_req_fn = elv_rb_former_request, 2258 .elevator_former_req_fn = elv_rb_former_request,
2308 .elevator_latter_req_fn = elv_rb_latter_request, 2259 .elevator_latter_req_fn = elv_rb_latter_request,
2309 .elevator_set_req_fn = cfq_set_request, 2260 .elevator_set_req_fn = cfq_set_request,
2310 .elevator_put_req_fn = cfq_put_request, 2261 .elevator_put_req_fn = cfq_put_request,
2311 .elevator_may_queue_fn = cfq_may_queue, 2262 .elevator_may_queue_fn = cfq_may_queue,
2312 .elevator_init_fn = cfq_init_queue, 2263 .elevator_init_fn = cfq_init_queue,
2313 .elevator_exit_fn = cfq_exit_queue, 2264 .elevator_exit_fn = cfq_exit_queue,
2314 .trim = cfq_free_io_context, 2265 .trim = cfq_free_io_context,
2315 }, 2266 },
2316 .elevator_attrs = cfq_attrs, 2267 .elevator_attrs = cfq_attrs,
2317 .elevator_name = "cfq", 2268 .elevator_name = "cfq",
2318 .elevator_owner = THIS_MODULE, 2269 .elevator_owner = THIS_MODULE,
2319 }; 2270 };
2320 2271
2321 static int __init cfq_init(void) 2272 static int __init cfq_init(void)
2322 { 2273 {
2323 /* 2274 /*
2324 * could be 0 on HZ < 1000 setups 2275 * could be 0 on HZ < 1000 setups
2325 */ 2276 */
2326 if (!cfq_slice_async) 2277 if (!cfq_slice_async)
2327 cfq_slice_async = 1; 2278 cfq_slice_async = 1;
2328 if (!cfq_slice_idle) 2279 if (!cfq_slice_idle)
2329 cfq_slice_idle = 1; 2280 cfq_slice_idle = 1;
2330 2281
2331 if (cfq_slab_setup()) 2282 if (cfq_slab_setup())
2332 return -ENOMEM; 2283 return -ENOMEM;
2333 2284
2334 elv_register(&iosched_cfq); 2285 elv_register(&iosched_cfq);
2335 2286
1 /* 1 /*
2 * fs/ioprio.c 2 * fs/ioprio.c
3 * 3 *
4 * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk> 4 * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk>
5 * 5 *
6 * Helper functions for setting/querying io priorities of processes. The 6 * Helper functions for setting/querying io priorities of processes. The
7 * system calls closely mimmick getpriority/setpriority, see the man page for 7 * system calls closely mimmick getpriority/setpriority, see the man page for
8 * those. The prio argument is a composite of prio class and prio data, where 8 * those. The prio argument is a composite of prio class and prio data, where
9 * the data argument has meaning within that class. The standard scheduling 9 * the data argument has meaning within that class. The standard scheduling
10 * classes have 8 distinct prio levels, with 0 being the highest prio and 7 10 * classes have 8 distinct prio levels, with 0 being the highest prio and 7
11 * being the lowest. 11 * being the lowest.
12 * 12 *
13 * IOW, setting BE scheduling class with prio 2 is done ala: 13 * IOW, setting BE scheduling class with prio 2 is done ala:
14 * 14 *
15 * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; 15 * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
16 * 16 *
17 * ioprio_set(PRIO_PROCESS, pid, prio); 17 * ioprio_set(PRIO_PROCESS, pid, prio);
18 * 18 *
19 * See also Documentation/block/ioprio.txt 19 * See also Documentation/block/ioprio.txt
20 * 20 *
21 */ 21 */
22 #include <linux/kernel.h> 22 #include <linux/kernel.h>
23 #include <linux/ioprio.h> 23 #include <linux/ioprio.h>
24 #include <linux/blkdev.h> 24 #include <linux/blkdev.h>
25 #include <linux/capability.h> 25 #include <linux/capability.h>
26 #include <linux/syscalls.h> 26 #include <linux/syscalls.h>
27 #include <linux/security.h> 27 #include <linux/security.h>
28 #include <linux/pid_namespace.h> 28 #include <linux/pid_namespace.h>
29 29
30 static int set_task_ioprio(struct task_struct *task, int ioprio) 30 static int set_task_ioprio(struct task_struct *task, int ioprio)
31 { 31 {
32 int err; 32 int err;
33 struct io_context *ioc; 33 struct io_context *ioc;
34 34
35 if (task->uid != current->euid && 35 if (task->uid != current->euid &&
36 task->uid != current->uid && !capable(CAP_SYS_NICE)) 36 task->uid != current->uid && !capable(CAP_SYS_NICE))
37 return -EPERM; 37 return -EPERM;
38 38
39 err = security_task_setioprio(task, ioprio); 39 err = security_task_setioprio(task, ioprio);
40 if (err) 40 if (err)
41 return err; 41 return err;
42 42
43 task_lock(task); 43 task_lock(task);
44 do { 44 do {
45 ioc = task->io_context; 45 ioc = task->io_context;
46 /* see wmb() in current_io_context() */ 46 /* see wmb() in current_io_context() */
47 smp_read_barrier_depends(); 47 smp_read_barrier_depends();
48 if (ioc) 48 if (ioc)
49 break; 49 break;
50 50
51 ioc = alloc_io_context(GFP_ATOMIC, -1); 51 ioc = alloc_io_context(GFP_ATOMIC, -1);
52 if (!ioc) { 52 if (!ioc) {
53 err = -ENOMEM; 53 err = -ENOMEM;
54 break; 54 break;
55 } 55 }
56 task->io_context = ioc; 56 task->io_context = ioc;
57 } while (1); 57 } while (1);
58 58
59 if (!err) { 59 if (!err) {
60 ioc->ioprio = ioprio; 60 ioc->ioprio = ioprio;
61 ioc->ioprio_changed = 1; 61 ioc->ioprio_changed = 1;
62 } 62 }
63 63
64 task_unlock(task); 64 task_unlock(task);
65 return err; 65 return err;
66 } 66 }
67 67
68 asmlinkage long sys_ioprio_set(int which, int who, int ioprio) 68 asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
69 { 69 {
70 int class = IOPRIO_PRIO_CLASS(ioprio); 70 int class = IOPRIO_PRIO_CLASS(ioprio);
71 int data = IOPRIO_PRIO_DATA(ioprio); 71 int data = IOPRIO_PRIO_DATA(ioprio);
72 struct task_struct *p, *g; 72 struct task_struct *p, *g;
73 struct user_struct *user; 73 struct user_struct *user;
74 struct pid *pgrp; 74 struct pid *pgrp;
75 int ret; 75 int ret;
76 76
77 switch (class) { 77 switch (class) {
78 case IOPRIO_CLASS_RT: 78 case IOPRIO_CLASS_RT:
79 if (!capable(CAP_SYS_ADMIN)) 79 if (!capable(CAP_SYS_ADMIN))
80 return -EPERM; 80 return -EPERM;
81 /* fall through, rt has prio field too */ 81 /* fall through, rt has prio field too */
82 case IOPRIO_CLASS_BE: 82 case IOPRIO_CLASS_BE:
83 if (data >= IOPRIO_BE_NR || data < 0) 83 if (data >= IOPRIO_BE_NR || data < 0)
84 return -EINVAL; 84 return -EINVAL;
85 85
86 break; 86 break;
87 case IOPRIO_CLASS_IDLE: 87 case IOPRIO_CLASS_IDLE:
88 if (!capable(CAP_SYS_ADMIN))
89 return -EPERM;
90 break; 88 break;
91 case IOPRIO_CLASS_NONE: 89 case IOPRIO_CLASS_NONE:
92 if (data) 90 if (data)
93 return -EINVAL; 91 return -EINVAL;
94 break; 92 break;
95 default: 93 default:
96 return -EINVAL; 94 return -EINVAL;
97 } 95 }
98 96
99 ret = -ESRCH; 97 ret = -ESRCH;
100 /* 98 /*
101 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic", 99 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
102 * so we can't use rcu_read_lock(). See re-copy of ->ioprio 100 * so we can't use rcu_read_lock(). See re-copy of ->ioprio
103 * in copy_process(). 101 * in copy_process().
104 */ 102 */
105 read_lock(&tasklist_lock); 103 read_lock(&tasklist_lock);
106 switch (which) { 104 switch (which) {
107 case IOPRIO_WHO_PROCESS: 105 case IOPRIO_WHO_PROCESS:
108 if (!who) 106 if (!who)
109 p = current; 107 p = current;
110 else 108 else
111 p = find_task_by_vpid(who); 109 p = find_task_by_vpid(who);
112 if (p) 110 if (p)
113 ret = set_task_ioprio(p, ioprio); 111 ret = set_task_ioprio(p, ioprio);
114 break; 112 break;
115 case IOPRIO_WHO_PGRP: 113 case IOPRIO_WHO_PGRP:
116 if (!who) 114 if (!who)
117 pgrp = task_pgrp(current); 115 pgrp = task_pgrp(current);
118 else 116 else
119 pgrp = find_vpid(who); 117 pgrp = find_vpid(who);
120 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 118 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
121 ret = set_task_ioprio(p, ioprio); 119 ret = set_task_ioprio(p, ioprio);
122 if (ret) 120 if (ret)
123 break; 121 break;
124 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 122 } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
125 break; 123 break;
126 case IOPRIO_WHO_USER: 124 case IOPRIO_WHO_USER:
127 if (!who) 125 if (!who)
128 user = current->user; 126 user = current->user;
129 else 127 else
130 user = find_user(who); 128 user = find_user(who);
131 129
132 if (!user) 130 if (!user)
133 break; 131 break;
134 132
135 do_each_thread(g, p) { 133 do_each_thread(g, p) {
136 if (p->uid != who) 134 if (p->uid != who)
137 continue; 135 continue;
138 ret = set_task_ioprio(p, ioprio); 136 ret = set_task_ioprio(p, ioprio);
139 if (ret) 137 if (ret)
140 goto free_uid; 138 goto free_uid;
141 } while_each_thread(g, p); 139 } while_each_thread(g, p);
142 free_uid: 140 free_uid:
143 if (who) 141 if (who)
144 free_uid(user); 142 free_uid(user);
145 break; 143 break;
146 default: 144 default:
147 ret = -EINVAL; 145 ret = -EINVAL;
148 } 146 }
149 147
150 read_unlock(&tasklist_lock); 148 read_unlock(&tasklist_lock);
151 return ret; 149 return ret;
152 } 150 }
153 151
154 static int get_task_ioprio(struct task_struct *p) 152 static int get_task_ioprio(struct task_struct *p)
155 { 153 {
156 int ret; 154 int ret;
157 155
158 ret = security_task_getioprio(p); 156 ret = security_task_getioprio(p);
159 if (ret) 157 if (ret)
160 goto out; 158 goto out;
161 ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM); 159 ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM);
162 if (p->io_context) 160 if (p->io_context)
163 ret = p->io_context->ioprio; 161 ret = p->io_context->ioprio;
164 out: 162 out:
165 return ret; 163 return ret;
166 } 164 }
167 165
168 int ioprio_best(unsigned short aprio, unsigned short bprio) 166 int ioprio_best(unsigned short aprio, unsigned short bprio)
169 { 167 {
170 unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); 168 unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
171 unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); 169 unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
172 170
173 if (aclass == IOPRIO_CLASS_NONE) 171 if (aclass == IOPRIO_CLASS_NONE)
174 aclass = IOPRIO_CLASS_BE; 172 aclass = IOPRIO_CLASS_BE;
175 if (bclass == IOPRIO_CLASS_NONE) 173 if (bclass == IOPRIO_CLASS_NONE)
176 bclass = IOPRIO_CLASS_BE; 174 bclass = IOPRIO_CLASS_BE;
177 175
178 if (aclass == bclass) 176 if (aclass == bclass)
179 return min(aprio, bprio); 177 return min(aprio, bprio);
180 if (aclass > bclass) 178 if (aclass > bclass)
181 return bprio; 179 return bprio;
182 else 180 else
183 return aprio; 181 return aprio;
184 } 182 }
185 183
186 asmlinkage long sys_ioprio_get(int which, int who) 184 asmlinkage long sys_ioprio_get(int which, int who)
187 { 185 {
188 struct task_struct *g, *p; 186 struct task_struct *g, *p;
189 struct user_struct *user; 187 struct user_struct *user;
190 struct pid *pgrp; 188 struct pid *pgrp;
191 int ret = -ESRCH; 189 int ret = -ESRCH;
192 int tmpio; 190 int tmpio;
193 191
194 read_lock(&tasklist_lock); 192 read_lock(&tasklist_lock);
195 switch (which) { 193 switch (which) {
196 case IOPRIO_WHO_PROCESS: 194 case IOPRIO_WHO_PROCESS:
197 if (!who) 195 if (!who)
198 p = current; 196 p = current;
199 else 197 else
200 p = find_task_by_vpid(who); 198 p = find_task_by_vpid(who);
201 if (p) 199 if (p)
202 ret = get_task_ioprio(p); 200 ret = get_task_ioprio(p);
203 break; 201 break;
204 case IOPRIO_WHO_PGRP: 202 case IOPRIO_WHO_PGRP:
205 if (!who) 203 if (!who)
206 pgrp = task_pgrp(current); 204 pgrp = task_pgrp(current);
207 else 205 else
208 pgrp = find_vpid(who); 206 pgrp = find_vpid(who);
209 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 207 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
210 tmpio = get_task_ioprio(p); 208 tmpio = get_task_ioprio(p);
211 if (tmpio < 0) 209 if (tmpio < 0)
212 continue; 210 continue;
213 if (ret == -ESRCH) 211 if (ret == -ESRCH)
214 ret = tmpio; 212 ret = tmpio;
215 else 213 else
216 ret = ioprio_best(ret, tmpio); 214 ret = ioprio_best(ret, tmpio);
217 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 215 } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
218 break; 216 break;
219 case IOPRIO_WHO_USER: 217 case IOPRIO_WHO_USER:
220 if (!who) 218 if (!who)
221 user = current->user; 219 user = current->user;
222 else 220 else
223 user = find_user(who); 221 user = find_user(who);
224 222
225 if (!user) 223 if (!user)
226 break; 224 break;
227 225
228 do_each_thread(g, p) { 226 do_each_thread(g, p) {
229 if (p->uid != user->uid) 227 if (p->uid != user->uid)
230 continue; 228 continue;
231 tmpio = get_task_ioprio(p); 229 tmpio = get_task_ioprio(p);
232 if (tmpio < 0) 230 if (tmpio < 0)
233 continue; 231 continue;
234 if (ret == -ESRCH) 232 if (ret == -ESRCH)
235 ret = tmpio; 233 ret = tmpio;
236 else 234 else
237 ret = ioprio_best(ret, tmpio); 235 ret = ioprio_best(ret, tmpio);
238 } while_each_thread(g, p); 236 } while_each_thread(g, p);
239 237
240 if (who) 238 if (who)
241 free_uid(user); 239 free_uid(user);
242 break; 240 break;
243 default: 241 default:
244 ret = -EINVAL; 242 ret = -EINVAL;
245 } 243 }
246 244
247 read_unlock(&tasklist_lock); 245 read_unlock(&tasklist_lock);
248 return ret; 246 return ret;
249 } 247 }
250 248
251 249