Commit 3440c49f5c5ecb4f29b0544aa87da71888404f8f

Authored by Divyesh Shah
Committed by Jens Axboe
1 parent 02246c4117

cfq-iosched: Fix the incorrect timeslice accounting with forced_dispatch

When CFQ dispatches requests forcefully due to a barrier or changing iosched,
it runs through all cfqq's dispatching requests and then expires each queue.
However, it does not activate a cfqq before flushing its IOs resulting in
using stale values for computing slice_used.
This patch fixes it by calling activate queue before flushing reuqests from
each queue.

This is useful mostly for barrier requests because when the iosched is changing
it really doesnt matter if we have incorrect accounting since we're going to
break down all structures anyway.

We also now expire the current timeslice before moving on with the dispatch
to accurately account slice used for that cfqq.

Signed-off-by: Divyesh Shah<dpshah@google.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 1 changed file with 5 additions and 2 deletions Inline Diff

1 /* 1 /*
2 * CFQ, or complete fairness queueing, disk scheduler. 2 * CFQ, or complete fairness queueing, disk scheduler.
3 * 3 *
4 * Based on ideas from a previously unfinished io 4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 * 6 *
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 */ 8 */
9 #include <linux/module.h> 9 #include <linux/module.h>
10 #include <linux/blkdev.h> 10 #include <linux/blkdev.h>
11 #include <linux/elevator.h> 11 #include <linux/elevator.h>
12 #include <linux/jiffies.h> 12 #include <linux/jiffies.h>
13 #include <linux/rbtree.h> 13 #include <linux/rbtree.h>
14 #include <linux/ioprio.h> 14 #include <linux/ioprio.h>
15 #include <linux/blktrace_api.h> 15 #include <linux/blktrace_api.h>
16 #include "blk-cgroup.h" 16 #include "blk-cgroup.h"
17 17
18 /* 18 /*
19 * tunables 19 * tunables
20 */ 20 */
21 /* max queue in one round of service */ 21 /* max queue in one round of service */
22 static const int cfq_quantum = 8; 22 static const int cfq_quantum = 8;
23 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 23 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
24 /* maximum backwards seek, in KiB */ 24 /* maximum backwards seek, in KiB */
25 static const int cfq_back_max = 16 * 1024; 25 static const int cfq_back_max = 16 * 1024;
26 /* penalty of a backwards seek */ 26 /* penalty of a backwards seek */
27 static const int cfq_back_penalty = 2; 27 static const int cfq_back_penalty = 2;
28 static const int cfq_slice_sync = HZ / 10; 28 static const int cfq_slice_sync = HZ / 10;
29 static int cfq_slice_async = HZ / 25; 29 static int cfq_slice_async = HZ / 25;
30 static const int cfq_slice_async_rq = 2; 30 static const int cfq_slice_async_rq = 2;
31 static int cfq_slice_idle = HZ / 125; 31 static int cfq_slice_idle = HZ / 125;
32 static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ 32 static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
33 static const int cfq_hist_divisor = 4; 33 static const int cfq_hist_divisor = 4;
34 34
35 /* 35 /*
36 * offset from end of service tree 36 * offset from end of service tree
37 */ 37 */
38 #define CFQ_IDLE_DELAY (HZ / 5) 38 #define CFQ_IDLE_DELAY (HZ / 5)
39 39
40 /* 40 /*
41 * below this threshold, we consider thinktime immediate 41 * below this threshold, we consider thinktime immediate
42 */ 42 */
43 #define CFQ_MIN_TT (2) 43 #define CFQ_MIN_TT (2)
44 44
45 #define CFQ_SLICE_SCALE (5) 45 #define CFQ_SLICE_SCALE (5)
46 #define CFQ_HW_QUEUE_MIN (5) 46 #define CFQ_HW_QUEUE_MIN (5)
47 #define CFQ_SERVICE_SHIFT 12 47 #define CFQ_SERVICE_SHIFT 12
48 48
49 #define CFQQ_SEEK_THR (sector_t)(8 * 100) 49 #define CFQQ_SEEK_THR (sector_t)(8 * 100)
50 #define CFQQ_CLOSE_THR (sector_t)(8 * 1024) 50 #define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
51 #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) 51 #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
52 #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) 52 #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
53 53
54 #define RQ_CIC(rq) \ 54 #define RQ_CIC(rq) \
55 ((struct cfq_io_context *) (rq)->elevator_private) 55 ((struct cfq_io_context *) (rq)->elevator_private)
56 #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) 56 #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
57 57
58 static struct kmem_cache *cfq_pool; 58 static struct kmem_cache *cfq_pool;
59 static struct kmem_cache *cfq_ioc_pool; 59 static struct kmem_cache *cfq_ioc_pool;
60 60
61 static DEFINE_PER_CPU(unsigned long, cfq_ioc_count); 61 static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
62 static struct completion *ioc_gone; 62 static struct completion *ioc_gone;
63 static DEFINE_SPINLOCK(ioc_gone_lock); 63 static DEFINE_SPINLOCK(ioc_gone_lock);
64 64
65 #define CFQ_PRIO_LISTS IOPRIO_BE_NR 65 #define CFQ_PRIO_LISTS IOPRIO_BE_NR
66 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 66 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
67 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 67 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
68 68
69 #define sample_valid(samples) ((samples) > 80) 69 #define sample_valid(samples) ((samples) > 80)
70 #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) 70 #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
71 71
72 /* 72 /*
73 * Most of our rbtree usage is for sorting with min extraction, so 73 * Most of our rbtree usage is for sorting with min extraction, so
74 * if we cache the leftmost node we don't have to walk down the tree 74 * if we cache the leftmost node we don't have to walk down the tree
75 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should 75 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
76 * move this into the elevator for the rq sorting as well. 76 * move this into the elevator for the rq sorting as well.
77 */ 77 */
78 struct cfq_rb_root { 78 struct cfq_rb_root {
79 struct rb_root rb; 79 struct rb_root rb;
80 struct rb_node *left; 80 struct rb_node *left;
81 unsigned count; 81 unsigned count;
82 unsigned total_weight; 82 unsigned total_weight;
83 u64 min_vdisktime; 83 u64 min_vdisktime;
84 struct rb_node *active; 84 struct rb_node *active;
85 }; 85 };
86 #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ 86 #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
87 .count = 0, .min_vdisktime = 0, } 87 .count = 0, .min_vdisktime = 0, }
88 88
89 /* 89 /*
90 * Per process-grouping structure 90 * Per process-grouping structure
91 */ 91 */
92 struct cfq_queue { 92 struct cfq_queue {
93 /* reference count */ 93 /* reference count */
94 atomic_t ref; 94 atomic_t ref;
95 /* various state flags, see below */ 95 /* various state flags, see below */
96 unsigned int flags; 96 unsigned int flags;
97 /* parent cfq_data */ 97 /* parent cfq_data */
98 struct cfq_data *cfqd; 98 struct cfq_data *cfqd;
99 /* service_tree member */ 99 /* service_tree member */
100 struct rb_node rb_node; 100 struct rb_node rb_node;
101 /* service_tree key */ 101 /* service_tree key */
102 unsigned long rb_key; 102 unsigned long rb_key;
103 /* prio tree member */ 103 /* prio tree member */
104 struct rb_node p_node; 104 struct rb_node p_node;
105 /* prio tree root we belong to, if any */ 105 /* prio tree root we belong to, if any */
106 struct rb_root *p_root; 106 struct rb_root *p_root;
107 /* sorted list of pending requests */ 107 /* sorted list of pending requests */
108 struct rb_root sort_list; 108 struct rb_root sort_list;
109 /* if fifo isn't expired, next request to serve */ 109 /* if fifo isn't expired, next request to serve */
110 struct request *next_rq; 110 struct request *next_rq;
111 /* requests queued in sort_list */ 111 /* requests queued in sort_list */
112 int queued[2]; 112 int queued[2];
113 /* currently allocated requests */ 113 /* currently allocated requests */
114 int allocated[2]; 114 int allocated[2];
115 /* fifo list of requests in sort_list */ 115 /* fifo list of requests in sort_list */
116 struct list_head fifo; 116 struct list_head fifo;
117 117
118 /* time when queue got scheduled in to dispatch first request. */ 118 /* time when queue got scheduled in to dispatch first request. */
119 unsigned long dispatch_start; 119 unsigned long dispatch_start;
120 unsigned int allocated_slice; 120 unsigned int allocated_slice;
121 unsigned int slice_dispatch; 121 unsigned int slice_dispatch;
122 /* time when first request from queue completed and slice started. */ 122 /* time when first request from queue completed and slice started. */
123 unsigned long slice_start; 123 unsigned long slice_start;
124 unsigned long slice_end; 124 unsigned long slice_end;
125 long slice_resid; 125 long slice_resid;
126 126
127 /* pending metadata requests */ 127 /* pending metadata requests */
128 int meta_pending; 128 int meta_pending;
129 /* number of requests that are on the dispatch list or inside driver */ 129 /* number of requests that are on the dispatch list or inside driver */
130 int dispatched; 130 int dispatched;
131 131
132 /* io prio of this group */ 132 /* io prio of this group */
133 unsigned short ioprio, org_ioprio; 133 unsigned short ioprio, org_ioprio;
134 unsigned short ioprio_class, org_ioprio_class; 134 unsigned short ioprio_class, org_ioprio_class;
135 135
136 pid_t pid; 136 pid_t pid;
137 137
138 u32 seek_history; 138 u32 seek_history;
139 sector_t last_request_pos; 139 sector_t last_request_pos;
140 140
141 struct cfq_rb_root *service_tree; 141 struct cfq_rb_root *service_tree;
142 struct cfq_queue *new_cfqq; 142 struct cfq_queue *new_cfqq;
143 struct cfq_group *cfqg; 143 struct cfq_group *cfqg;
144 struct cfq_group *orig_cfqg; 144 struct cfq_group *orig_cfqg;
145 /* Sectors dispatched in current dispatch round */ 145 /* Sectors dispatched in current dispatch round */
146 unsigned long nr_sectors; 146 unsigned long nr_sectors;
147 }; 147 };
148 148
149 /* 149 /*
150 * First index in the service_trees. 150 * First index in the service_trees.
151 * IDLE is handled separately, so it has negative index 151 * IDLE is handled separately, so it has negative index
152 */ 152 */
153 enum wl_prio_t { 153 enum wl_prio_t {
154 BE_WORKLOAD = 0, 154 BE_WORKLOAD = 0,
155 RT_WORKLOAD = 1, 155 RT_WORKLOAD = 1,
156 IDLE_WORKLOAD = 2, 156 IDLE_WORKLOAD = 2,
157 }; 157 };
158 158
159 /* 159 /*
160 * Second index in the service_trees. 160 * Second index in the service_trees.
161 */ 161 */
162 enum wl_type_t { 162 enum wl_type_t {
163 ASYNC_WORKLOAD = 0, 163 ASYNC_WORKLOAD = 0,
164 SYNC_NOIDLE_WORKLOAD = 1, 164 SYNC_NOIDLE_WORKLOAD = 1,
165 SYNC_WORKLOAD = 2 165 SYNC_WORKLOAD = 2
166 }; 166 };
167 167
168 /* This is per cgroup per device grouping structure */ 168 /* This is per cgroup per device grouping structure */
169 struct cfq_group { 169 struct cfq_group {
170 /* group service_tree member */ 170 /* group service_tree member */
171 struct rb_node rb_node; 171 struct rb_node rb_node;
172 172
173 /* group service_tree key */ 173 /* group service_tree key */
174 u64 vdisktime; 174 u64 vdisktime;
175 unsigned int weight; 175 unsigned int weight;
176 bool on_st; 176 bool on_st;
177 177
178 /* number of cfqq currently on this group */ 178 /* number of cfqq currently on this group */
179 int nr_cfqq; 179 int nr_cfqq;
180 180
181 /* Per group busy queus average. Useful for workload slice calc. */ 181 /* Per group busy queus average. Useful for workload slice calc. */
182 unsigned int busy_queues_avg[2]; 182 unsigned int busy_queues_avg[2];
183 /* 183 /*
184 * rr lists of queues with requests, onle rr for each priority class. 184 * rr lists of queues with requests, onle rr for each priority class.
185 * Counts are embedded in the cfq_rb_root 185 * Counts are embedded in the cfq_rb_root
186 */ 186 */
187 struct cfq_rb_root service_trees[2][3]; 187 struct cfq_rb_root service_trees[2][3];
188 struct cfq_rb_root service_tree_idle; 188 struct cfq_rb_root service_tree_idle;
189 189
190 unsigned long saved_workload_slice; 190 unsigned long saved_workload_slice;
191 enum wl_type_t saved_workload; 191 enum wl_type_t saved_workload;
192 enum wl_prio_t saved_serving_prio; 192 enum wl_prio_t saved_serving_prio;
193 struct blkio_group blkg; 193 struct blkio_group blkg;
194 #ifdef CONFIG_CFQ_GROUP_IOSCHED 194 #ifdef CONFIG_CFQ_GROUP_IOSCHED
195 struct hlist_node cfqd_node; 195 struct hlist_node cfqd_node;
196 atomic_t ref; 196 atomic_t ref;
197 #endif 197 #endif
198 }; 198 };
199 199
200 /* 200 /*
201 * Per block device queue structure 201 * Per block device queue structure
202 */ 202 */
203 struct cfq_data { 203 struct cfq_data {
204 struct request_queue *queue; 204 struct request_queue *queue;
205 /* Root service tree for cfq_groups */ 205 /* Root service tree for cfq_groups */
206 struct cfq_rb_root grp_service_tree; 206 struct cfq_rb_root grp_service_tree;
207 struct cfq_group root_group; 207 struct cfq_group root_group;
208 208
209 /* 209 /*
210 * The priority currently being served 210 * The priority currently being served
211 */ 211 */
212 enum wl_prio_t serving_prio; 212 enum wl_prio_t serving_prio;
213 enum wl_type_t serving_type; 213 enum wl_type_t serving_type;
214 unsigned long workload_expires; 214 unsigned long workload_expires;
215 struct cfq_group *serving_group; 215 struct cfq_group *serving_group;
216 bool noidle_tree_requires_idle; 216 bool noidle_tree_requires_idle;
217 217
218 /* 218 /*
219 * Each priority tree is sorted by next_request position. These 219 * Each priority tree is sorted by next_request position. These
220 * trees are used when determining if two or more queues are 220 * trees are used when determining if two or more queues are
221 * interleaving requests (see cfq_close_cooperator). 221 * interleaving requests (see cfq_close_cooperator).
222 */ 222 */
223 struct rb_root prio_trees[CFQ_PRIO_LISTS]; 223 struct rb_root prio_trees[CFQ_PRIO_LISTS];
224 224
225 unsigned int busy_queues; 225 unsigned int busy_queues;
226 226
227 int rq_in_driver; 227 int rq_in_driver;
228 int rq_in_flight[2]; 228 int rq_in_flight[2];
229 229
230 /* 230 /*
231 * queue-depth detection 231 * queue-depth detection
232 */ 232 */
233 int rq_queued; 233 int rq_queued;
234 int hw_tag; 234 int hw_tag;
235 /* 235 /*
236 * hw_tag can be 236 * hw_tag can be
237 * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection) 237 * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection)
238 * 1 => NCQ is present (hw_tag_est_depth is the estimated max depth) 238 * 1 => NCQ is present (hw_tag_est_depth is the estimated max depth)
239 * 0 => no NCQ 239 * 0 => no NCQ
240 */ 240 */
241 int hw_tag_est_depth; 241 int hw_tag_est_depth;
242 unsigned int hw_tag_samples; 242 unsigned int hw_tag_samples;
243 243
244 /* 244 /*
245 * idle window management 245 * idle window management
246 */ 246 */
247 struct timer_list idle_slice_timer; 247 struct timer_list idle_slice_timer;
248 struct work_struct unplug_work; 248 struct work_struct unplug_work;
249 249
250 struct cfq_queue *active_queue; 250 struct cfq_queue *active_queue;
251 struct cfq_io_context *active_cic; 251 struct cfq_io_context *active_cic;
252 252
253 /* 253 /*
254 * async queue for each priority case 254 * async queue for each priority case
255 */ 255 */
256 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; 256 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
257 struct cfq_queue *async_idle_cfqq; 257 struct cfq_queue *async_idle_cfqq;
258 258
259 sector_t last_position; 259 sector_t last_position;
260 260
261 /* 261 /*
262 * tunables, see top of file 262 * tunables, see top of file
263 */ 263 */
264 unsigned int cfq_quantum; 264 unsigned int cfq_quantum;
265 unsigned int cfq_fifo_expire[2]; 265 unsigned int cfq_fifo_expire[2];
266 unsigned int cfq_back_penalty; 266 unsigned int cfq_back_penalty;
267 unsigned int cfq_back_max; 267 unsigned int cfq_back_max;
268 unsigned int cfq_slice[2]; 268 unsigned int cfq_slice[2];
269 unsigned int cfq_slice_async_rq; 269 unsigned int cfq_slice_async_rq;
270 unsigned int cfq_slice_idle; 270 unsigned int cfq_slice_idle;
271 unsigned int cfq_latency; 271 unsigned int cfq_latency;
272 unsigned int cfq_group_isolation; 272 unsigned int cfq_group_isolation;
273 273
274 struct list_head cic_list; 274 struct list_head cic_list;
275 275
276 /* 276 /*
277 * Fallback dummy cfqq for extreme OOM conditions 277 * Fallback dummy cfqq for extreme OOM conditions
278 */ 278 */
279 struct cfq_queue oom_cfqq; 279 struct cfq_queue oom_cfqq;
280 280
281 unsigned long last_delayed_sync; 281 unsigned long last_delayed_sync;
282 282
283 /* List of cfq groups being managed on this device*/ 283 /* List of cfq groups being managed on this device*/
284 struct hlist_head cfqg_list; 284 struct hlist_head cfqg_list;
285 struct rcu_head rcu; 285 struct rcu_head rcu;
286 }; 286 };
287 287
288 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); 288 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
289 289
290 static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, 290 static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
291 enum wl_prio_t prio, 291 enum wl_prio_t prio,
292 enum wl_type_t type) 292 enum wl_type_t type)
293 { 293 {
294 if (!cfqg) 294 if (!cfqg)
295 return NULL; 295 return NULL;
296 296
297 if (prio == IDLE_WORKLOAD) 297 if (prio == IDLE_WORKLOAD)
298 return &cfqg->service_tree_idle; 298 return &cfqg->service_tree_idle;
299 299
300 return &cfqg->service_trees[prio][type]; 300 return &cfqg->service_trees[prio][type];
301 } 301 }
302 302
303 enum cfqq_state_flags { 303 enum cfqq_state_flags {
304 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ 304 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
305 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ 305 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
306 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */ 306 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
307 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ 307 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
308 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ 308 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
309 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ 309 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
310 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ 310 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
311 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ 311 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
312 CFQ_CFQQ_FLAG_sync, /* synchronous queue */ 312 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
313 CFQ_CFQQ_FLAG_coop, /* cfqq is shared */ 313 CFQ_CFQQ_FLAG_coop, /* cfqq is shared */
314 CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */ 314 CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */
315 CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */ 315 CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */
316 CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */ 316 CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */
317 }; 317 };
318 318
319 #define CFQ_CFQQ_FNS(name) \ 319 #define CFQ_CFQQ_FNS(name) \
320 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ 320 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
321 { \ 321 { \
322 (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ 322 (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
323 } \ 323 } \
324 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ 324 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
325 { \ 325 { \
326 (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ 326 (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
327 } \ 327 } \
328 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ 328 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
329 { \ 329 { \
330 return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ 330 return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
331 } 331 }
332 332
333 CFQ_CFQQ_FNS(on_rr); 333 CFQ_CFQQ_FNS(on_rr);
334 CFQ_CFQQ_FNS(wait_request); 334 CFQ_CFQQ_FNS(wait_request);
335 CFQ_CFQQ_FNS(must_dispatch); 335 CFQ_CFQQ_FNS(must_dispatch);
336 CFQ_CFQQ_FNS(must_alloc_slice); 336 CFQ_CFQQ_FNS(must_alloc_slice);
337 CFQ_CFQQ_FNS(fifo_expire); 337 CFQ_CFQQ_FNS(fifo_expire);
338 CFQ_CFQQ_FNS(idle_window); 338 CFQ_CFQQ_FNS(idle_window);
339 CFQ_CFQQ_FNS(prio_changed); 339 CFQ_CFQQ_FNS(prio_changed);
340 CFQ_CFQQ_FNS(slice_new); 340 CFQ_CFQQ_FNS(slice_new);
341 CFQ_CFQQ_FNS(sync); 341 CFQ_CFQQ_FNS(sync);
342 CFQ_CFQQ_FNS(coop); 342 CFQ_CFQQ_FNS(coop);
343 CFQ_CFQQ_FNS(split_coop); 343 CFQ_CFQQ_FNS(split_coop);
344 CFQ_CFQQ_FNS(deep); 344 CFQ_CFQQ_FNS(deep);
345 CFQ_CFQQ_FNS(wait_busy); 345 CFQ_CFQQ_FNS(wait_busy);
346 #undef CFQ_CFQQ_FNS 346 #undef CFQ_CFQQ_FNS
347 347
348 #ifdef CONFIG_DEBUG_CFQ_IOSCHED 348 #ifdef CONFIG_DEBUG_CFQ_IOSCHED
349 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ 349 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
350 blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ 350 blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
351 cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ 351 cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
352 blkg_path(&(cfqq)->cfqg->blkg), ##args); 352 blkg_path(&(cfqq)->cfqg->blkg), ##args);
353 353
354 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \ 354 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \
355 blk_add_trace_msg((cfqd)->queue, "%s " fmt, \ 355 blk_add_trace_msg((cfqd)->queue, "%s " fmt, \
356 blkg_path(&(cfqg)->blkg), ##args); \ 356 blkg_path(&(cfqg)->blkg), ##args); \
357 357
358 #else 358 #else
359 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ 359 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
360 blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) 360 blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
361 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0); 361 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0);
362 #endif 362 #endif
363 #define cfq_log(cfqd, fmt, args...) \ 363 #define cfq_log(cfqd, fmt, args...) \
364 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) 364 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
365 365
366 /* Traverses through cfq group service trees */ 366 /* Traverses through cfq group service trees */
367 #define for_each_cfqg_st(cfqg, i, j, st) \ 367 #define for_each_cfqg_st(cfqg, i, j, st) \
368 for (i = 0; i <= IDLE_WORKLOAD; i++) \ 368 for (i = 0; i <= IDLE_WORKLOAD; i++) \
369 for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\ 369 for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\
370 : &cfqg->service_tree_idle; \ 370 : &cfqg->service_tree_idle; \
371 (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \ 371 (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \
372 (i == IDLE_WORKLOAD && j == 0); \ 372 (i == IDLE_WORKLOAD && j == 0); \
373 j++, st = i < IDLE_WORKLOAD ? \ 373 j++, st = i < IDLE_WORKLOAD ? \
374 &cfqg->service_trees[i][j]: NULL) \ 374 &cfqg->service_trees[i][j]: NULL) \
375 375
376 376
377 static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) 377 static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
378 { 378 {
379 if (cfq_class_idle(cfqq)) 379 if (cfq_class_idle(cfqq))
380 return IDLE_WORKLOAD; 380 return IDLE_WORKLOAD;
381 if (cfq_class_rt(cfqq)) 381 if (cfq_class_rt(cfqq))
382 return RT_WORKLOAD; 382 return RT_WORKLOAD;
383 return BE_WORKLOAD; 383 return BE_WORKLOAD;
384 } 384 }
385 385
386 386
387 static enum wl_type_t cfqq_type(struct cfq_queue *cfqq) 387 static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
388 { 388 {
389 if (!cfq_cfqq_sync(cfqq)) 389 if (!cfq_cfqq_sync(cfqq))
390 return ASYNC_WORKLOAD; 390 return ASYNC_WORKLOAD;
391 if (!cfq_cfqq_idle_window(cfqq)) 391 if (!cfq_cfqq_idle_window(cfqq))
392 return SYNC_NOIDLE_WORKLOAD; 392 return SYNC_NOIDLE_WORKLOAD;
393 return SYNC_WORKLOAD; 393 return SYNC_WORKLOAD;
394 } 394 }
395 395
396 static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl, 396 static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
397 struct cfq_data *cfqd, 397 struct cfq_data *cfqd,
398 struct cfq_group *cfqg) 398 struct cfq_group *cfqg)
399 { 399 {
400 if (wl == IDLE_WORKLOAD) 400 if (wl == IDLE_WORKLOAD)
401 return cfqg->service_tree_idle.count; 401 return cfqg->service_tree_idle.count;
402 402
403 return cfqg->service_trees[wl][ASYNC_WORKLOAD].count 403 return cfqg->service_trees[wl][ASYNC_WORKLOAD].count
404 + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count 404 + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
405 + cfqg->service_trees[wl][SYNC_WORKLOAD].count; 405 + cfqg->service_trees[wl][SYNC_WORKLOAD].count;
406 } 406 }
407 407
408 static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, 408 static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
409 struct cfq_group *cfqg) 409 struct cfq_group *cfqg)
410 { 410 {
411 return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count 411 return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count
412 + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; 412 + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
413 } 413 }
414 414
415 static void cfq_dispatch_insert(struct request_queue *, struct request *); 415 static void cfq_dispatch_insert(struct request_queue *, struct request *);
416 static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, 416 static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
417 struct io_context *, gfp_t); 417 struct io_context *, gfp_t);
418 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, 418 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
419 struct io_context *); 419 struct io_context *);
420 420
421 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, 421 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
422 bool is_sync) 422 bool is_sync)
423 { 423 {
424 return cic->cfqq[is_sync]; 424 return cic->cfqq[is_sync];
425 } 425 }
426 426
427 static inline void cic_set_cfqq(struct cfq_io_context *cic, 427 static inline void cic_set_cfqq(struct cfq_io_context *cic,
428 struct cfq_queue *cfqq, bool is_sync) 428 struct cfq_queue *cfqq, bool is_sync)
429 { 429 {
430 cic->cfqq[is_sync] = cfqq; 430 cic->cfqq[is_sync] = cfqq;
431 } 431 }
432 432
433 /* 433 /*
434 * We regard a request as SYNC, if it's either a read or has the SYNC bit 434 * We regard a request as SYNC, if it's either a read or has the SYNC bit
435 * set (in which case it could also be direct WRITE). 435 * set (in which case it could also be direct WRITE).
436 */ 436 */
437 static inline bool cfq_bio_sync(struct bio *bio) 437 static inline bool cfq_bio_sync(struct bio *bio)
438 { 438 {
439 return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO); 439 return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO);
440 } 440 }
441 441
442 /* 442 /*
443 * scheduler run of queue, if there are requests pending and no one in the 443 * scheduler run of queue, if there are requests pending and no one in the
444 * driver that will restart queueing 444 * driver that will restart queueing
445 */ 445 */
446 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 446 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
447 { 447 {
448 if (cfqd->busy_queues) { 448 if (cfqd->busy_queues) {
449 cfq_log(cfqd, "schedule dispatch"); 449 cfq_log(cfqd, "schedule dispatch");
450 kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); 450 kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
451 } 451 }
452 } 452 }
453 453
454 static int cfq_queue_empty(struct request_queue *q) 454 static int cfq_queue_empty(struct request_queue *q)
455 { 455 {
456 struct cfq_data *cfqd = q->elevator->elevator_data; 456 struct cfq_data *cfqd = q->elevator->elevator_data;
457 457
458 return !cfqd->rq_queued; 458 return !cfqd->rq_queued;
459 } 459 }
460 460
461 /* 461 /*
462 * Scale schedule slice based on io priority. Use the sync time slice only 462 * Scale schedule slice based on io priority. Use the sync time slice only
463 * if a queue is marked sync and has sync io queued. A sync queue with async 463 * if a queue is marked sync and has sync io queued. A sync queue with async
464 * io only, should not get full sync slice length. 464 * io only, should not get full sync slice length.
465 */ 465 */
466 static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, 466 static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
467 unsigned short prio) 467 unsigned short prio)
468 { 468 {
469 const int base_slice = cfqd->cfq_slice[sync]; 469 const int base_slice = cfqd->cfq_slice[sync];
470 470
471 WARN_ON(prio >= IOPRIO_BE_NR); 471 WARN_ON(prio >= IOPRIO_BE_NR);
472 472
473 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); 473 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
474 } 474 }
475 475
476 static inline int 476 static inline int
477 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 477 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
478 { 478 {
479 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); 479 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
480 } 480 }
481 481
482 static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg) 482 static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
483 { 483 {
484 u64 d = delta << CFQ_SERVICE_SHIFT; 484 u64 d = delta << CFQ_SERVICE_SHIFT;
485 485
486 d = d * BLKIO_WEIGHT_DEFAULT; 486 d = d * BLKIO_WEIGHT_DEFAULT;
487 do_div(d, cfqg->weight); 487 do_div(d, cfqg->weight);
488 return d; 488 return d;
489 } 489 }
490 490
491 static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) 491 static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
492 { 492 {
493 s64 delta = (s64)(vdisktime - min_vdisktime); 493 s64 delta = (s64)(vdisktime - min_vdisktime);
494 if (delta > 0) 494 if (delta > 0)
495 min_vdisktime = vdisktime; 495 min_vdisktime = vdisktime;
496 496
497 return min_vdisktime; 497 return min_vdisktime;
498 } 498 }
499 499
500 static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) 500 static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
501 { 501 {
502 s64 delta = (s64)(vdisktime - min_vdisktime); 502 s64 delta = (s64)(vdisktime - min_vdisktime);
503 if (delta < 0) 503 if (delta < 0)
504 min_vdisktime = vdisktime; 504 min_vdisktime = vdisktime;
505 505
506 return min_vdisktime; 506 return min_vdisktime;
507 } 507 }
508 508
509 static void update_min_vdisktime(struct cfq_rb_root *st) 509 static void update_min_vdisktime(struct cfq_rb_root *st)
510 { 510 {
511 u64 vdisktime = st->min_vdisktime; 511 u64 vdisktime = st->min_vdisktime;
512 struct cfq_group *cfqg; 512 struct cfq_group *cfqg;
513 513
514 if (st->active) { 514 if (st->active) {
515 cfqg = rb_entry_cfqg(st->active); 515 cfqg = rb_entry_cfqg(st->active);
516 vdisktime = cfqg->vdisktime; 516 vdisktime = cfqg->vdisktime;
517 } 517 }
518 518
519 if (st->left) { 519 if (st->left) {
520 cfqg = rb_entry_cfqg(st->left); 520 cfqg = rb_entry_cfqg(st->left);
521 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); 521 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
522 } 522 }
523 523
524 st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime); 524 st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
525 } 525 }
526 526
527 /* 527 /*
528 * get averaged number of queues of RT/BE priority. 528 * get averaged number of queues of RT/BE priority.
529 * average is updated, with a formula that gives more weight to higher numbers, 529 * average is updated, with a formula that gives more weight to higher numbers,
530 * to quickly follows sudden increases and decrease slowly 530 * to quickly follows sudden increases and decrease slowly
531 */ 531 */
532 532
533 static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd, 533 static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
534 struct cfq_group *cfqg, bool rt) 534 struct cfq_group *cfqg, bool rt)
535 { 535 {
536 unsigned min_q, max_q; 536 unsigned min_q, max_q;
537 unsigned mult = cfq_hist_divisor - 1; 537 unsigned mult = cfq_hist_divisor - 1;
538 unsigned round = cfq_hist_divisor / 2; 538 unsigned round = cfq_hist_divisor / 2;
539 unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg); 539 unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
540 540
541 min_q = min(cfqg->busy_queues_avg[rt], busy); 541 min_q = min(cfqg->busy_queues_avg[rt], busy);
542 max_q = max(cfqg->busy_queues_avg[rt], busy); 542 max_q = max(cfqg->busy_queues_avg[rt], busy);
543 cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) / 543 cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
544 cfq_hist_divisor; 544 cfq_hist_divisor;
545 return cfqg->busy_queues_avg[rt]; 545 return cfqg->busy_queues_avg[rt];
546 } 546 }
547 547
548 static inline unsigned 548 static inline unsigned
549 cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) 549 cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
550 { 550 {
551 struct cfq_rb_root *st = &cfqd->grp_service_tree; 551 struct cfq_rb_root *st = &cfqd->grp_service_tree;
552 552
553 return cfq_target_latency * cfqg->weight / st->total_weight; 553 return cfq_target_latency * cfqg->weight / st->total_weight;
554 } 554 }
555 555
556 static inline void 556 static inline void
557 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 557 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
558 { 558 {
559 unsigned slice = cfq_prio_to_slice(cfqd, cfqq); 559 unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
560 if (cfqd->cfq_latency) { 560 if (cfqd->cfq_latency) {
561 /* 561 /*
562 * interested queues (we consider only the ones with the same 562 * interested queues (we consider only the ones with the same
563 * priority class in the cfq group) 563 * priority class in the cfq group)
564 */ 564 */
565 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg, 565 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
566 cfq_class_rt(cfqq)); 566 cfq_class_rt(cfqq));
567 unsigned sync_slice = cfqd->cfq_slice[1]; 567 unsigned sync_slice = cfqd->cfq_slice[1];
568 unsigned expect_latency = sync_slice * iq; 568 unsigned expect_latency = sync_slice * iq;
569 unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg); 569 unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
570 570
571 if (expect_latency > group_slice) { 571 if (expect_latency > group_slice) {
572 unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; 572 unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
573 /* scale low_slice according to IO priority 573 /* scale low_slice according to IO priority
574 * and sync vs async */ 574 * and sync vs async */
575 unsigned low_slice = 575 unsigned low_slice =
576 min(slice, base_low_slice * slice / sync_slice); 576 min(slice, base_low_slice * slice / sync_slice);
577 /* the adapted slice value is scaled to fit all iqs 577 /* the adapted slice value is scaled to fit all iqs
578 * into the target latency */ 578 * into the target latency */
579 slice = max(slice * group_slice / expect_latency, 579 slice = max(slice * group_slice / expect_latency,
580 low_slice); 580 low_slice);
581 } 581 }
582 } 582 }
583 cfqq->slice_start = jiffies; 583 cfqq->slice_start = jiffies;
584 cfqq->slice_end = jiffies + slice; 584 cfqq->slice_end = jiffies + slice;
585 cfqq->allocated_slice = slice; 585 cfqq->allocated_slice = slice;
586 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); 586 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
587 } 587 }
588 588
589 /* 589 /*
590 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end 590 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
591 * isn't valid until the first request from the dispatch is activated 591 * isn't valid until the first request from the dispatch is activated
592 * and the slice time set. 592 * and the slice time set.
593 */ 593 */
594 static inline bool cfq_slice_used(struct cfq_queue *cfqq) 594 static inline bool cfq_slice_used(struct cfq_queue *cfqq)
595 { 595 {
596 if (cfq_cfqq_slice_new(cfqq)) 596 if (cfq_cfqq_slice_new(cfqq))
597 return 0; 597 return 0;
598 if (time_before(jiffies, cfqq->slice_end)) 598 if (time_before(jiffies, cfqq->slice_end))
599 return 0; 599 return 0;
600 600
601 return 1; 601 return 1;
602 } 602 }
603 603
604 /* 604 /*
605 * Lifted from AS - choose which of rq1 and rq2 that is best served now. 605 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
606 * We choose the request that is closest to the head right now. Distance 606 * We choose the request that is closest to the head right now. Distance
607 * behind the head is penalized and only allowed to a certain extent. 607 * behind the head is penalized and only allowed to a certain extent.
608 */ 608 */
609 static struct request * 609 static struct request *
610 cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last) 610 cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last)
611 { 611 {
612 sector_t s1, s2, d1 = 0, d2 = 0; 612 sector_t s1, s2, d1 = 0, d2 = 0;
613 unsigned long back_max; 613 unsigned long back_max;
614 #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ 614 #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */
615 #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ 615 #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
616 unsigned wrap = 0; /* bit mask: requests behind the disk head? */ 616 unsigned wrap = 0; /* bit mask: requests behind the disk head? */
617 617
618 if (rq1 == NULL || rq1 == rq2) 618 if (rq1 == NULL || rq1 == rq2)
619 return rq2; 619 return rq2;
620 if (rq2 == NULL) 620 if (rq2 == NULL)
621 return rq1; 621 return rq1;
622 622
623 if (rq_is_sync(rq1) && !rq_is_sync(rq2)) 623 if (rq_is_sync(rq1) && !rq_is_sync(rq2))
624 return rq1; 624 return rq1;
625 else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) 625 else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
626 return rq2; 626 return rq2;
627 if (rq_is_meta(rq1) && !rq_is_meta(rq2)) 627 if (rq_is_meta(rq1) && !rq_is_meta(rq2))
628 return rq1; 628 return rq1;
629 else if (rq_is_meta(rq2) && !rq_is_meta(rq1)) 629 else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
630 return rq2; 630 return rq2;
631 631
632 s1 = blk_rq_pos(rq1); 632 s1 = blk_rq_pos(rq1);
633 s2 = blk_rq_pos(rq2); 633 s2 = blk_rq_pos(rq2);
634 634
635 /* 635 /*
636 * by definition, 1KiB is 2 sectors 636 * by definition, 1KiB is 2 sectors
637 */ 637 */
638 back_max = cfqd->cfq_back_max * 2; 638 back_max = cfqd->cfq_back_max * 2;
639 639
640 /* 640 /*
641 * Strict one way elevator _except_ in the case where we allow 641 * Strict one way elevator _except_ in the case where we allow
642 * short backward seeks which are biased as twice the cost of a 642 * short backward seeks which are biased as twice the cost of a
643 * similar forward seek. 643 * similar forward seek.
644 */ 644 */
645 if (s1 >= last) 645 if (s1 >= last)
646 d1 = s1 - last; 646 d1 = s1 - last;
647 else if (s1 + back_max >= last) 647 else if (s1 + back_max >= last)
648 d1 = (last - s1) * cfqd->cfq_back_penalty; 648 d1 = (last - s1) * cfqd->cfq_back_penalty;
649 else 649 else
650 wrap |= CFQ_RQ1_WRAP; 650 wrap |= CFQ_RQ1_WRAP;
651 651
652 if (s2 >= last) 652 if (s2 >= last)
653 d2 = s2 - last; 653 d2 = s2 - last;
654 else if (s2 + back_max >= last) 654 else if (s2 + back_max >= last)
655 d2 = (last - s2) * cfqd->cfq_back_penalty; 655 d2 = (last - s2) * cfqd->cfq_back_penalty;
656 else 656 else
657 wrap |= CFQ_RQ2_WRAP; 657 wrap |= CFQ_RQ2_WRAP;
658 658
659 /* Found required data */ 659 /* Found required data */
660 660
661 /* 661 /*
662 * By doing switch() on the bit mask "wrap" we avoid having to 662 * By doing switch() on the bit mask "wrap" we avoid having to
663 * check two variables for all permutations: --> faster! 663 * check two variables for all permutations: --> faster!
664 */ 664 */
665 switch (wrap) { 665 switch (wrap) {
666 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ 666 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
667 if (d1 < d2) 667 if (d1 < d2)
668 return rq1; 668 return rq1;
669 else if (d2 < d1) 669 else if (d2 < d1)
670 return rq2; 670 return rq2;
671 else { 671 else {
672 if (s1 >= s2) 672 if (s1 >= s2)
673 return rq1; 673 return rq1;
674 else 674 else
675 return rq2; 675 return rq2;
676 } 676 }
677 677
678 case CFQ_RQ2_WRAP: 678 case CFQ_RQ2_WRAP:
679 return rq1; 679 return rq1;
680 case CFQ_RQ1_WRAP: 680 case CFQ_RQ1_WRAP:
681 return rq2; 681 return rq2;
682 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */ 682 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
683 default: 683 default:
684 /* 684 /*
685 * Since both rqs are wrapped, 685 * Since both rqs are wrapped,
686 * start with the one that's further behind head 686 * start with the one that's further behind head
687 * (--> only *one* back seek required), 687 * (--> only *one* back seek required),
688 * since back seek takes more time than forward. 688 * since back seek takes more time than forward.
689 */ 689 */
690 if (s1 <= s2) 690 if (s1 <= s2)
691 return rq1; 691 return rq1;
692 else 692 else
693 return rq2; 693 return rq2;
694 } 694 }
695 } 695 }
696 696
697 /* 697 /*
698 * The below is leftmost cache rbtree addon 698 * The below is leftmost cache rbtree addon
699 */ 699 */
700 static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) 700 static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
701 { 701 {
702 /* Service tree is empty */ 702 /* Service tree is empty */
703 if (!root->count) 703 if (!root->count)
704 return NULL; 704 return NULL;
705 705
706 if (!root->left) 706 if (!root->left)
707 root->left = rb_first(&root->rb); 707 root->left = rb_first(&root->rb);
708 708
709 if (root->left) 709 if (root->left)
710 return rb_entry(root->left, struct cfq_queue, rb_node); 710 return rb_entry(root->left, struct cfq_queue, rb_node);
711 711
712 return NULL; 712 return NULL;
713 } 713 }
714 714
715 static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root) 715 static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
716 { 716 {
717 if (!root->left) 717 if (!root->left)
718 root->left = rb_first(&root->rb); 718 root->left = rb_first(&root->rb);
719 719
720 if (root->left) 720 if (root->left)
721 return rb_entry_cfqg(root->left); 721 return rb_entry_cfqg(root->left);
722 722
723 return NULL; 723 return NULL;
724 } 724 }
725 725
726 static void rb_erase_init(struct rb_node *n, struct rb_root *root) 726 static void rb_erase_init(struct rb_node *n, struct rb_root *root)
727 { 727 {
728 rb_erase(n, root); 728 rb_erase(n, root);
729 RB_CLEAR_NODE(n); 729 RB_CLEAR_NODE(n);
730 } 730 }
731 731
732 static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) 732 static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
733 { 733 {
734 if (root->left == n) 734 if (root->left == n)
735 root->left = NULL; 735 root->left = NULL;
736 rb_erase_init(n, &root->rb); 736 rb_erase_init(n, &root->rb);
737 --root->count; 737 --root->count;
738 } 738 }
739 739
740 /* 740 /*
741 * would be nice to take fifo expire time into account as well 741 * would be nice to take fifo expire time into account as well
742 */ 742 */
743 static struct request * 743 static struct request *
744 cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 744 cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
745 struct request *last) 745 struct request *last)
746 { 746 {
747 struct rb_node *rbnext = rb_next(&last->rb_node); 747 struct rb_node *rbnext = rb_next(&last->rb_node);
748 struct rb_node *rbprev = rb_prev(&last->rb_node); 748 struct rb_node *rbprev = rb_prev(&last->rb_node);
749 struct request *next = NULL, *prev = NULL; 749 struct request *next = NULL, *prev = NULL;
750 750
751 BUG_ON(RB_EMPTY_NODE(&last->rb_node)); 751 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
752 752
753 if (rbprev) 753 if (rbprev)
754 prev = rb_entry_rq(rbprev); 754 prev = rb_entry_rq(rbprev);
755 755
756 if (rbnext) 756 if (rbnext)
757 next = rb_entry_rq(rbnext); 757 next = rb_entry_rq(rbnext);
758 else { 758 else {
759 rbnext = rb_first(&cfqq->sort_list); 759 rbnext = rb_first(&cfqq->sort_list);
760 if (rbnext && rbnext != &last->rb_node) 760 if (rbnext && rbnext != &last->rb_node)
761 next = rb_entry_rq(rbnext); 761 next = rb_entry_rq(rbnext);
762 } 762 }
763 763
764 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last)); 764 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
765 } 765 }
766 766
767 static unsigned long cfq_slice_offset(struct cfq_data *cfqd, 767 static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
768 struct cfq_queue *cfqq) 768 struct cfq_queue *cfqq)
769 { 769 {
770 /* 770 /*
771 * just an approximation, should be ok. 771 * just an approximation, should be ok.
772 */ 772 */
773 return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) - 773 return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) -
774 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio)); 774 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
775 } 775 }
776 776
777 static inline s64 777 static inline s64
778 cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg) 778 cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
779 { 779 {
780 return cfqg->vdisktime - st->min_vdisktime; 780 return cfqg->vdisktime - st->min_vdisktime;
781 } 781 }
782 782
783 static void 783 static void
784 __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) 784 __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
785 { 785 {
786 struct rb_node **node = &st->rb.rb_node; 786 struct rb_node **node = &st->rb.rb_node;
787 struct rb_node *parent = NULL; 787 struct rb_node *parent = NULL;
788 struct cfq_group *__cfqg; 788 struct cfq_group *__cfqg;
789 s64 key = cfqg_key(st, cfqg); 789 s64 key = cfqg_key(st, cfqg);
790 int left = 1; 790 int left = 1;
791 791
792 while (*node != NULL) { 792 while (*node != NULL) {
793 parent = *node; 793 parent = *node;
794 __cfqg = rb_entry_cfqg(parent); 794 __cfqg = rb_entry_cfqg(parent);
795 795
796 if (key < cfqg_key(st, __cfqg)) 796 if (key < cfqg_key(st, __cfqg))
797 node = &parent->rb_left; 797 node = &parent->rb_left;
798 else { 798 else {
799 node = &parent->rb_right; 799 node = &parent->rb_right;
800 left = 0; 800 left = 0;
801 } 801 }
802 } 802 }
803 803
804 if (left) 804 if (left)
805 st->left = &cfqg->rb_node; 805 st->left = &cfqg->rb_node;
806 806
807 rb_link_node(&cfqg->rb_node, parent, node); 807 rb_link_node(&cfqg->rb_node, parent, node);
808 rb_insert_color(&cfqg->rb_node, &st->rb); 808 rb_insert_color(&cfqg->rb_node, &st->rb);
809 } 809 }
810 810
811 static void 811 static void
812 cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) 812 cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
813 { 813 {
814 struct cfq_rb_root *st = &cfqd->grp_service_tree; 814 struct cfq_rb_root *st = &cfqd->grp_service_tree;
815 struct cfq_group *__cfqg; 815 struct cfq_group *__cfqg;
816 struct rb_node *n; 816 struct rb_node *n;
817 817
818 cfqg->nr_cfqq++; 818 cfqg->nr_cfqq++;
819 if (cfqg->on_st) 819 if (cfqg->on_st)
820 return; 820 return;
821 821
822 /* 822 /*
823 * Currently put the group at the end. Later implement something 823 * Currently put the group at the end. Later implement something
824 * so that groups get lesser vtime based on their weights, so that 824 * so that groups get lesser vtime based on their weights, so that
825 * if group does not loose all if it was not continously backlogged. 825 * if group does not loose all if it was not continously backlogged.
826 */ 826 */
827 n = rb_last(&st->rb); 827 n = rb_last(&st->rb);
828 if (n) { 828 if (n) {
829 __cfqg = rb_entry_cfqg(n); 829 __cfqg = rb_entry_cfqg(n);
830 cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; 830 cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
831 } else 831 } else
832 cfqg->vdisktime = st->min_vdisktime; 832 cfqg->vdisktime = st->min_vdisktime;
833 833
834 __cfq_group_service_tree_add(st, cfqg); 834 __cfq_group_service_tree_add(st, cfqg);
835 cfqg->on_st = true; 835 cfqg->on_st = true;
836 st->total_weight += cfqg->weight; 836 st->total_weight += cfqg->weight;
837 } 837 }
838 838
839 static void 839 static void
840 cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) 840 cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
841 { 841 {
842 struct cfq_rb_root *st = &cfqd->grp_service_tree; 842 struct cfq_rb_root *st = &cfqd->grp_service_tree;
843 843
844 if (st->active == &cfqg->rb_node) 844 if (st->active == &cfqg->rb_node)
845 st->active = NULL; 845 st->active = NULL;
846 846
847 BUG_ON(cfqg->nr_cfqq < 1); 847 BUG_ON(cfqg->nr_cfqq < 1);
848 cfqg->nr_cfqq--; 848 cfqg->nr_cfqq--;
849 849
850 /* If there are other cfq queues under this group, don't delete it */ 850 /* If there are other cfq queues under this group, don't delete it */
851 if (cfqg->nr_cfqq) 851 if (cfqg->nr_cfqq)
852 return; 852 return;
853 853
854 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); 854 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
855 cfqg->on_st = false; 855 cfqg->on_st = false;
856 st->total_weight -= cfqg->weight; 856 st->total_weight -= cfqg->weight;
857 if (!RB_EMPTY_NODE(&cfqg->rb_node)) 857 if (!RB_EMPTY_NODE(&cfqg->rb_node))
858 cfq_rb_erase(&cfqg->rb_node, st); 858 cfq_rb_erase(&cfqg->rb_node, st);
859 cfqg->saved_workload_slice = 0; 859 cfqg->saved_workload_slice = 0;
860 blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1); 860 blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1);
861 } 861 }
862 862
863 static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) 863 static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
864 { 864 {
865 unsigned int slice_used; 865 unsigned int slice_used;
866 866
867 /* 867 /*
868 * Queue got expired before even a single request completed or 868 * Queue got expired before even a single request completed or
869 * got expired immediately after first request completion. 869 * got expired immediately after first request completion.
870 */ 870 */
871 if (!cfqq->slice_start || cfqq->slice_start == jiffies) { 871 if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
872 /* 872 /*
873 * Also charge the seek time incurred to the group, otherwise 873 * Also charge the seek time incurred to the group, otherwise
874 * if there are mutiple queues in the group, each can dispatch 874 * if there are mutiple queues in the group, each can dispatch
875 * a single request on seeky media and cause lots of seek time 875 * a single request on seeky media and cause lots of seek time
876 * and group will never know it. 876 * and group will never know it.
877 */ 877 */
878 slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start), 878 slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
879 1); 879 1);
880 } else { 880 } else {
881 slice_used = jiffies - cfqq->slice_start; 881 slice_used = jiffies - cfqq->slice_start;
882 if (slice_used > cfqq->allocated_slice) 882 if (slice_used > cfqq->allocated_slice)
883 slice_used = cfqq->allocated_slice; 883 slice_used = cfqq->allocated_slice;
884 } 884 }
885 885
886 cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used, 886 cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used,
887 cfqq->nr_sectors); 887 cfqq->nr_sectors);
888 return slice_used; 888 return slice_used;
889 } 889 }
890 890
891 static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, 891 static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
892 struct cfq_queue *cfqq) 892 struct cfq_queue *cfqq)
893 { 893 {
894 struct cfq_rb_root *st = &cfqd->grp_service_tree; 894 struct cfq_rb_root *st = &cfqd->grp_service_tree;
895 unsigned int used_sl, charge_sl; 895 unsigned int used_sl, charge_sl;
896 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) 896 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
897 - cfqg->service_tree_idle.count; 897 - cfqg->service_tree_idle.count;
898 898
899 BUG_ON(nr_sync < 0); 899 BUG_ON(nr_sync < 0);
900 used_sl = charge_sl = cfq_cfqq_slice_usage(cfqq); 900 used_sl = charge_sl = cfq_cfqq_slice_usage(cfqq);
901 901
902 if (!cfq_cfqq_sync(cfqq) && !nr_sync) 902 if (!cfq_cfqq_sync(cfqq) && !nr_sync)
903 charge_sl = cfqq->allocated_slice; 903 charge_sl = cfqq->allocated_slice;
904 904
905 /* Can't update vdisktime while group is on service tree */ 905 /* Can't update vdisktime while group is on service tree */
906 cfq_rb_erase(&cfqg->rb_node, st); 906 cfq_rb_erase(&cfqg->rb_node, st);
907 cfqg->vdisktime += cfq_scale_slice(charge_sl, cfqg); 907 cfqg->vdisktime += cfq_scale_slice(charge_sl, cfqg);
908 __cfq_group_service_tree_add(st, cfqg); 908 __cfq_group_service_tree_add(st, cfqg);
909 909
910 /* This group is being expired. Save the context */ 910 /* This group is being expired. Save the context */
911 if (time_after(cfqd->workload_expires, jiffies)) { 911 if (time_after(cfqd->workload_expires, jiffies)) {
912 cfqg->saved_workload_slice = cfqd->workload_expires 912 cfqg->saved_workload_slice = cfqd->workload_expires
913 - jiffies; 913 - jiffies;
914 cfqg->saved_workload = cfqd->serving_type; 914 cfqg->saved_workload = cfqd->serving_type;
915 cfqg->saved_serving_prio = cfqd->serving_prio; 915 cfqg->saved_serving_prio = cfqd->serving_prio;
916 } else 916 } else
917 cfqg->saved_workload_slice = 0; 917 cfqg->saved_workload_slice = 0;
918 918
919 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, 919 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
920 st->min_vdisktime); 920 st->min_vdisktime);
921 blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl, 921 blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl,
922 cfqq->nr_sectors); 922 cfqq->nr_sectors);
923 } 923 }
924 924
925 #ifdef CONFIG_CFQ_GROUP_IOSCHED 925 #ifdef CONFIG_CFQ_GROUP_IOSCHED
926 static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg) 926 static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
927 { 927 {
928 if (blkg) 928 if (blkg)
929 return container_of(blkg, struct cfq_group, blkg); 929 return container_of(blkg, struct cfq_group, blkg);
930 return NULL; 930 return NULL;
931 } 931 }
932 932
933 void 933 void
934 cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight) 934 cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight)
935 { 935 {
936 cfqg_of_blkg(blkg)->weight = weight; 936 cfqg_of_blkg(blkg)->weight = weight;
937 } 937 }
938 938
939 static struct cfq_group * 939 static struct cfq_group *
940 cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) 940 cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
941 { 941 {
942 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 942 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
943 struct cfq_group *cfqg = NULL; 943 struct cfq_group *cfqg = NULL;
944 void *key = cfqd; 944 void *key = cfqd;
945 int i, j; 945 int i, j;
946 struct cfq_rb_root *st; 946 struct cfq_rb_root *st;
947 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; 947 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
948 unsigned int major, minor; 948 unsigned int major, minor;
949 949
950 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); 950 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
951 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { 951 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
952 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); 952 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
953 cfqg->blkg.dev = MKDEV(major, minor); 953 cfqg->blkg.dev = MKDEV(major, minor);
954 goto done; 954 goto done;
955 } 955 }
956 if (cfqg || !create) 956 if (cfqg || !create)
957 goto done; 957 goto done;
958 958
959 cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); 959 cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
960 if (!cfqg) 960 if (!cfqg)
961 goto done; 961 goto done;
962 962
963 cfqg->weight = blkcg->weight; 963 cfqg->weight = blkcg->weight;
964 for_each_cfqg_st(cfqg, i, j, st) 964 for_each_cfqg_st(cfqg, i, j, st)
965 *st = CFQ_RB_ROOT; 965 *st = CFQ_RB_ROOT;
966 RB_CLEAR_NODE(&cfqg->rb_node); 966 RB_CLEAR_NODE(&cfqg->rb_node);
967 967
968 /* 968 /*
969 * Take the initial reference that will be released on destroy 969 * Take the initial reference that will be released on destroy
970 * This can be thought of a joint reference by cgroup and 970 * This can be thought of a joint reference by cgroup and
971 * elevator which will be dropped by either elevator exit 971 * elevator which will be dropped by either elevator exit
972 * or cgroup deletion path depending on who is exiting first. 972 * or cgroup deletion path depending on who is exiting first.
973 */ 973 */
974 atomic_set(&cfqg->ref, 1); 974 atomic_set(&cfqg->ref, 1);
975 975
976 /* Add group onto cgroup list */ 976 /* Add group onto cgroup list */
977 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); 977 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
978 blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, 978 blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
979 MKDEV(major, minor)); 979 MKDEV(major, minor));
980 980
981 /* Add group on cfqd list */ 981 /* Add group on cfqd list */
982 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); 982 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
983 983
984 done: 984 done:
985 return cfqg; 985 return cfqg;
986 } 986 }
987 987
988 /* 988 /*
989 * Search for the cfq group current task belongs to. If create = 1, then also 989 * Search for the cfq group current task belongs to. If create = 1, then also
990 * create the cfq group if it does not exist. request_queue lock must be held. 990 * create the cfq group if it does not exist. request_queue lock must be held.
991 */ 991 */
992 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) 992 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
993 { 993 {
994 struct cgroup *cgroup; 994 struct cgroup *cgroup;
995 struct cfq_group *cfqg = NULL; 995 struct cfq_group *cfqg = NULL;
996 996
997 rcu_read_lock(); 997 rcu_read_lock();
998 cgroup = task_cgroup(current, blkio_subsys_id); 998 cgroup = task_cgroup(current, blkio_subsys_id);
999 cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create); 999 cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create);
1000 if (!cfqg && create) 1000 if (!cfqg && create)
1001 cfqg = &cfqd->root_group; 1001 cfqg = &cfqd->root_group;
1002 rcu_read_unlock(); 1002 rcu_read_unlock();
1003 return cfqg; 1003 return cfqg;
1004 } 1004 }
1005 1005
1006 static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) 1006 static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
1007 { 1007 {
1008 /* Currently, all async queues are mapped to root group */ 1008 /* Currently, all async queues are mapped to root group */
1009 if (!cfq_cfqq_sync(cfqq)) 1009 if (!cfq_cfqq_sync(cfqq))
1010 cfqg = &cfqq->cfqd->root_group; 1010 cfqg = &cfqq->cfqd->root_group;
1011 1011
1012 cfqq->cfqg = cfqg; 1012 cfqq->cfqg = cfqg;
1013 /* cfqq reference on cfqg */ 1013 /* cfqq reference on cfqg */
1014 atomic_inc(&cfqq->cfqg->ref); 1014 atomic_inc(&cfqq->cfqg->ref);
1015 } 1015 }
1016 1016
1017 static void cfq_put_cfqg(struct cfq_group *cfqg) 1017 static void cfq_put_cfqg(struct cfq_group *cfqg)
1018 { 1018 {
1019 struct cfq_rb_root *st; 1019 struct cfq_rb_root *st;
1020 int i, j; 1020 int i, j;
1021 1021
1022 BUG_ON(atomic_read(&cfqg->ref) <= 0); 1022 BUG_ON(atomic_read(&cfqg->ref) <= 0);
1023 if (!atomic_dec_and_test(&cfqg->ref)) 1023 if (!atomic_dec_and_test(&cfqg->ref))
1024 return; 1024 return;
1025 for_each_cfqg_st(cfqg, i, j, st) 1025 for_each_cfqg_st(cfqg, i, j, st)
1026 BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); 1026 BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
1027 kfree(cfqg); 1027 kfree(cfqg);
1028 } 1028 }
1029 1029
1030 static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) 1030 static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1031 { 1031 {
1032 /* Something wrong if we are trying to remove same group twice */ 1032 /* Something wrong if we are trying to remove same group twice */
1033 BUG_ON(hlist_unhashed(&cfqg->cfqd_node)); 1033 BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
1034 1034
1035 hlist_del_init(&cfqg->cfqd_node); 1035 hlist_del_init(&cfqg->cfqd_node);
1036 1036
1037 /* 1037 /*
1038 * Put the reference taken at the time of creation so that when all 1038 * Put the reference taken at the time of creation so that when all
1039 * queues are gone, group can be destroyed. 1039 * queues are gone, group can be destroyed.
1040 */ 1040 */
1041 cfq_put_cfqg(cfqg); 1041 cfq_put_cfqg(cfqg);
1042 } 1042 }
1043 1043
1044 static void cfq_release_cfq_groups(struct cfq_data *cfqd) 1044 static void cfq_release_cfq_groups(struct cfq_data *cfqd)
1045 { 1045 {
1046 struct hlist_node *pos, *n; 1046 struct hlist_node *pos, *n;
1047 struct cfq_group *cfqg; 1047 struct cfq_group *cfqg;
1048 1048
1049 hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) { 1049 hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
1050 /* 1050 /*
1051 * If cgroup removal path got to blk_group first and removed 1051 * If cgroup removal path got to blk_group first and removed
1052 * it from cgroup list, then it will take care of destroying 1052 * it from cgroup list, then it will take care of destroying
1053 * cfqg also. 1053 * cfqg also.
1054 */ 1054 */
1055 if (!blkiocg_del_blkio_group(&cfqg->blkg)) 1055 if (!blkiocg_del_blkio_group(&cfqg->blkg))
1056 cfq_destroy_cfqg(cfqd, cfqg); 1056 cfq_destroy_cfqg(cfqd, cfqg);
1057 } 1057 }
1058 } 1058 }
1059 1059
1060 /* 1060 /*
1061 * Blk cgroup controller notification saying that blkio_group object is being 1061 * Blk cgroup controller notification saying that blkio_group object is being
1062 * delinked as associated cgroup object is going away. That also means that 1062 * delinked as associated cgroup object is going away. That also means that
1063 * no new IO will come in this group. So get rid of this group as soon as 1063 * no new IO will come in this group. So get rid of this group as soon as
1064 * any pending IO in the group is finished. 1064 * any pending IO in the group is finished.
1065 * 1065 *
1066 * This function is called under rcu_read_lock(). key is the rcu protected 1066 * This function is called under rcu_read_lock(). key is the rcu protected
1067 * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu 1067 * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
1068 * read lock. 1068 * read lock.
1069 * 1069 *
1070 * "key" was fetched from blkio_group under blkio_cgroup->lock. That means 1070 * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
1071 * it should not be NULL as even if elevator was exiting, cgroup deltion 1071 * it should not be NULL as even if elevator was exiting, cgroup deltion
1072 * path got to it first. 1072 * path got to it first.
1073 */ 1073 */
1074 void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) 1074 void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
1075 { 1075 {
1076 unsigned long flags; 1076 unsigned long flags;
1077 struct cfq_data *cfqd = key; 1077 struct cfq_data *cfqd = key;
1078 1078
1079 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1079 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1080 cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg)); 1080 cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
1081 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 1081 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1082 } 1082 }
1083 1083
1084 #else /* GROUP_IOSCHED */ 1084 #else /* GROUP_IOSCHED */
1085 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) 1085 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
1086 { 1086 {
1087 return &cfqd->root_group; 1087 return &cfqd->root_group;
1088 } 1088 }
1089 static inline void 1089 static inline void
1090 cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { 1090 cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
1091 cfqq->cfqg = cfqg; 1091 cfqq->cfqg = cfqg;
1092 } 1092 }
1093 1093
1094 static void cfq_release_cfq_groups(struct cfq_data *cfqd) {} 1094 static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
1095 static inline void cfq_put_cfqg(struct cfq_group *cfqg) {} 1095 static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
1096 1096
1097 #endif /* GROUP_IOSCHED */ 1097 #endif /* GROUP_IOSCHED */
1098 1098
1099 /* 1099 /*
1100 * The cfqd->service_trees holds all pending cfq_queue's that have 1100 * The cfqd->service_trees holds all pending cfq_queue's that have
1101 * requests waiting to be processed. It is sorted in the order that 1101 * requests waiting to be processed. It is sorted in the order that
1102 * we will service the queues. 1102 * we will service the queues.
1103 */ 1103 */
1104 static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1104 static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1105 bool add_front) 1105 bool add_front)
1106 { 1106 {
1107 struct rb_node **p, *parent; 1107 struct rb_node **p, *parent;
1108 struct cfq_queue *__cfqq; 1108 struct cfq_queue *__cfqq;
1109 unsigned long rb_key; 1109 unsigned long rb_key;
1110 struct cfq_rb_root *service_tree; 1110 struct cfq_rb_root *service_tree;
1111 int left; 1111 int left;
1112 int new_cfqq = 1; 1112 int new_cfqq = 1;
1113 int group_changed = 0; 1113 int group_changed = 0;
1114 1114
1115 #ifdef CONFIG_CFQ_GROUP_IOSCHED 1115 #ifdef CONFIG_CFQ_GROUP_IOSCHED
1116 if (!cfqd->cfq_group_isolation 1116 if (!cfqd->cfq_group_isolation
1117 && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD 1117 && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD
1118 && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) { 1118 && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) {
1119 /* Move this cfq to root group */ 1119 /* Move this cfq to root group */
1120 cfq_log_cfqq(cfqd, cfqq, "moving to root group"); 1120 cfq_log_cfqq(cfqd, cfqq, "moving to root group");
1121 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 1121 if (!RB_EMPTY_NODE(&cfqq->rb_node))
1122 cfq_group_service_tree_del(cfqd, cfqq->cfqg); 1122 cfq_group_service_tree_del(cfqd, cfqq->cfqg);
1123 cfqq->orig_cfqg = cfqq->cfqg; 1123 cfqq->orig_cfqg = cfqq->cfqg;
1124 cfqq->cfqg = &cfqd->root_group; 1124 cfqq->cfqg = &cfqd->root_group;
1125 atomic_inc(&cfqd->root_group.ref); 1125 atomic_inc(&cfqd->root_group.ref);
1126 group_changed = 1; 1126 group_changed = 1;
1127 } else if (!cfqd->cfq_group_isolation 1127 } else if (!cfqd->cfq_group_isolation
1128 && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { 1128 && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
1129 /* cfqq is sequential now needs to go to its original group */ 1129 /* cfqq is sequential now needs to go to its original group */
1130 BUG_ON(cfqq->cfqg != &cfqd->root_group); 1130 BUG_ON(cfqq->cfqg != &cfqd->root_group);
1131 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 1131 if (!RB_EMPTY_NODE(&cfqq->rb_node))
1132 cfq_group_service_tree_del(cfqd, cfqq->cfqg); 1132 cfq_group_service_tree_del(cfqd, cfqq->cfqg);
1133 cfq_put_cfqg(cfqq->cfqg); 1133 cfq_put_cfqg(cfqq->cfqg);
1134 cfqq->cfqg = cfqq->orig_cfqg; 1134 cfqq->cfqg = cfqq->orig_cfqg;
1135 cfqq->orig_cfqg = NULL; 1135 cfqq->orig_cfqg = NULL;
1136 group_changed = 1; 1136 group_changed = 1;
1137 cfq_log_cfqq(cfqd, cfqq, "moved to origin group"); 1137 cfq_log_cfqq(cfqd, cfqq, "moved to origin group");
1138 } 1138 }
1139 #endif 1139 #endif
1140 1140
1141 service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), 1141 service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
1142 cfqq_type(cfqq)); 1142 cfqq_type(cfqq));
1143 if (cfq_class_idle(cfqq)) { 1143 if (cfq_class_idle(cfqq)) {
1144 rb_key = CFQ_IDLE_DELAY; 1144 rb_key = CFQ_IDLE_DELAY;
1145 parent = rb_last(&service_tree->rb); 1145 parent = rb_last(&service_tree->rb);
1146 if (parent && parent != &cfqq->rb_node) { 1146 if (parent && parent != &cfqq->rb_node) {
1147 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 1147 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
1148 rb_key += __cfqq->rb_key; 1148 rb_key += __cfqq->rb_key;
1149 } else 1149 } else
1150 rb_key += jiffies; 1150 rb_key += jiffies;
1151 } else if (!add_front) { 1151 } else if (!add_front) {
1152 /* 1152 /*
1153 * Get our rb key offset. Subtract any residual slice 1153 * Get our rb key offset. Subtract any residual slice
1154 * value carried from last service. A negative resid 1154 * value carried from last service. A negative resid
1155 * count indicates slice overrun, and this should position 1155 * count indicates slice overrun, and this should position
1156 * the next service time further away in the tree. 1156 * the next service time further away in the tree.
1157 */ 1157 */
1158 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; 1158 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
1159 rb_key -= cfqq->slice_resid; 1159 rb_key -= cfqq->slice_resid;
1160 cfqq->slice_resid = 0; 1160 cfqq->slice_resid = 0;
1161 } else { 1161 } else {
1162 rb_key = -HZ; 1162 rb_key = -HZ;
1163 __cfqq = cfq_rb_first(service_tree); 1163 __cfqq = cfq_rb_first(service_tree);
1164 rb_key += __cfqq ? __cfqq->rb_key : jiffies; 1164 rb_key += __cfqq ? __cfqq->rb_key : jiffies;
1165 } 1165 }
1166 1166
1167 if (!RB_EMPTY_NODE(&cfqq->rb_node)) { 1167 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
1168 new_cfqq = 0; 1168 new_cfqq = 0;
1169 /* 1169 /*
1170 * same position, nothing more to do 1170 * same position, nothing more to do
1171 */ 1171 */
1172 if (rb_key == cfqq->rb_key && 1172 if (rb_key == cfqq->rb_key &&
1173 cfqq->service_tree == service_tree) 1173 cfqq->service_tree == service_tree)
1174 return; 1174 return;
1175 1175
1176 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); 1176 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
1177 cfqq->service_tree = NULL; 1177 cfqq->service_tree = NULL;
1178 } 1178 }
1179 1179
1180 left = 1; 1180 left = 1;
1181 parent = NULL; 1181 parent = NULL;
1182 cfqq->service_tree = service_tree; 1182 cfqq->service_tree = service_tree;
1183 p = &service_tree->rb.rb_node; 1183 p = &service_tree->rb.rb_node;
1184 while (*p) { 1184 while (*p) {
1185 struct rb_node **n; 1185 struct rb_node **n;
1186 1186
1187 parent = *p; 1187 parent = *p;
1188 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 1188 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
1189 1189
1190 /* 1190 /*
1191 * sort by key, that represents service time. 1191 * sort by key, that represents service time.
1192 */ 1192 */
1193 if (time_before(rb_key, __cfqq->rb_key)) 1193 if (time_before(rb_key, __cfqq->rb_key))
1194 n = &(*p)->rb_left; 1194 n = &(*p)->rb_left;
1195 else { 1195 else {
1196 n = &(*p)->rb_right; 1196 n = &(*p)->rb_right;
1197 left = 0; 1197 left = 0;
1198 } 1198 }
1199 1199
1200 p = n; 1200 p = n;
1201 } 1201 }
1202 1202
1203 if (left) 1203 if (left)
1204 service_tree->left = &cfqq->rb_node; 1204 service_tree->left = &cfqq->rb_node;
1205 1205
1206 cfqq->rb_key = rb_key; 1206 cfqq->rb_key = rb_key;
1207 rb_link_node(&cfqq->rb_node, parent, p); 1207 rb_link_node(&cfqq->rb_node, parent, p);
1208 rb_insert_color(&cfqq->rb_node, &service_tree->rb); 1208 rb_insert_color(&cfqq->rb_node, &service_tree->rb);
1209 service_tree->count++; 1209 service_tree->count++;
1210 if ((add_front || !new_cfqq) && !group_changed) 1210 if ((add_front || !new_cfqq) && !group_changed)
1211 return; 1211 return;
1212 cfq_group_service_tree_add(cfqd, cfqq->cfqg); 1212 cfq_group_service_tree_add(cfqd, cfqq->cfqg);
1213 } 1213 }
1214 1214
1215 static struct cfq_queue * 1215 static struct cfq_queue *
1216 cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root, 1216 cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
1217 sector_t sector, struct rb_node **ret_parent, 1217 sector_t sector, struct rb_node **ret_parent,
1218 struct rb_node ***rb_link) 1218 struct rb_node ***rb_link)
1219 { 1219 {
1220 struct rb_node **p, *parent; 1220 struct rb_node **p, *parent;
1221 struct cfq_queue *cfqq = NULL; 1221 struct cfq_queue *cfqq = NULL;
1222 1222
1223 parent = NULL; 1223 parent = NULL;
1224 p = &root->rb_node; 1224 p = &root->rb_node;
1225 while (*p) { 1225 while (*p) {
1226 struct rb_node **n; 1226 struct rb_node **n;
1227 1227
1228 parent = *p; 1228 parent = *p;
1229 cfqq = rb_entry(parent, struct cfq_queue, p_node); 1229 cfqq = rb_entry(parent, struct cfq_queue, p_node);
1230 1230
1231 /* 1231 /*
1232 * Sort strictly based on sector. Smallest to the left, 1232 * Sort strictly based on sector. Smallest to the left,
1233 * largest to the right. 1233 * largest to the right.
1234 */ 1234 */
1235 if (sector > blk_rq_pos(cfqq->next_rq)) 1235 if (sector > blk_rq_pos(cfqq->next_rq))
1236 n = &(*p)->rb_right; 1236 n = &(*p)->rb_right;
1237 else if (sector < blk_rq_pos(cfqq->next_rq)) 1237 else if (sector < blk_rq_pos(cfqq->next_rq))
1238 n = &(*p)->rb_left; 1238 n = &(*p)->rb_left;
1239 else 1239 else
1240 break; 1240 break;
1241 p = n; 1241 p = n;
1242 cfqq = NULL; 1242 cfqq = NULL;
1243 } 1243 }
1244 1244
1245 *ret_parent = parent; 1245 *ret_parent = parent;
1246 if (rb_link) 1246 if (rb_link)
1247 *rb_link = p; 1247 *rb_link = p;
1248 return cfqq; 1248 return cfqq;
1249 } 1249 }
1250 1250
1251 static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1251 static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1252 { 1252 {
1253 struct rb_node **p, *parent; 1253 struct rb_node **p, *parent;
1254 struct cfq_queue *__cfqq; 1254 struct cfq_queue *__cfqq;
1255 1255
1256 if (cfqq->p_root) { 1256 if (cfqq->p_root) {
1257 rb_erase(&cfqq->p_node, cfqq->p_root); 1257 rb_erase(&cfqq->p_node, cfqq->p_root);
1258 cfqq->p_root = NULL; 1258 cfqq->p_root = NULL;
1259 } 1259 }
1260 1260
1261 if (cfq_class_idle(cfqq)) 1261 if (cfq_class_idle(cfqq))
1262 return; 1262 return;
1263 if (!cfqq->next_rq) 1263 if (!cfqq->next_rq)
1264 return; 1264 return;
1265 1265
1266 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio]; 1266 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
1267 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, 1267 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
1268 blk_rq_pos(cfqq->next_rq), &parent, &p); 1268 blk_rq_pos(cfqq->next_rq), &parent, &p);
1269 if (!__cfqq) { 1269 if (!__cfqq) {
1270 rb_link_node(&cfqq->p_node, parent, p); 1270 rb_link_node(&cfqq->p_node, parent, p);
1271 rb_insert_color(&cfqq->p_node, cfqq->p_root); 1271 rb_insert_color(&cfqq->p_node, cfqq->p_root);
1272 } else 1272 } else
1273 cfqq->p_root = NULL; 1273 cfqq->p_root = NULL;
1274 } 1274 }
1275 1275
1276 /* 1276 /*
1277 * Update cfqq's position in the service tree. 1277 * Update cfqq's position in the service tree.
1278 */ 1278 */
1279 static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1279 static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1280 { 1280 {
1281 /* 1281 /*
1282 * Resorting requires the cfqq to be on the RR list already. 1282 * Resorting requires the cfqq to be on the RR list already.
1283 */ 1283 */
1284 if (cfq_cfqq_on_rr(cfqq)) { 1284 if (cfq_cfqq_on_rr(cfqq)) {
1285 cfq_service_tree_add(cfqd, cfqq, 0); 1285 cfq_service_tree_add(cfqd, cfqq, 0);
1286 cfq_prio_tree_add(cfqd, cfqq); 1286 cfq_prio_tree_add(cfqd, cfqq);
1287 } 1287 }
1288 } 1288 }
1289 1289
1290 /* 1290 /*
1291 * add to busy list of queues for service, trying to be fair in ordering 1291 * add to busy list of queues for service, trying to be fair in ordering
1292 * the pending list according to last request service 1292 * the pending list according to last request service
1293 */ 1293 */
1294 static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1294 static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1295 { 1295 {
1296 cfq_log_cfqq(cfqd, cfqq, "add_to_rr"); 1296 cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
1297 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1297 BUG_ON(cfq_cfqq_on_rr(cfqq));
1298 cfq_mark_cfqq_on_rr(cfqq); 1298 cfq_mark_cfqq_on_rr(cfqq);
1299 cfqd->busy_queues++; 1299 cfqd->busy_queues++;
1300 1300
1301 cfq_resort_rr_list(cfqd, cfqq); 1301 cfq_resort_rr_list(cfqd, cfqq);
1302 } 1302 }
1303 1303
1304 /* 1304 /*
1305 * Called when the cfqq no longer has requests pending, remove it from 1305 * Called when the cfqq no longer has requests pending, remove it from
1306 * the service tree. 1306 * the service tree.
1307 */ 1307 */
1308 static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1308 static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1309 { 1309 {
1310 cfq_log_cfqq(cfqd, cfqq, "del_from_rr"); 1310 cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
1311 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 1311 BUG_ON(!cfq_cfqq_on_rr(cfqq));
1312 cfq_clear_cfqq_on_rr(cfqq); 1312 cfq_clear_cfqq_on_rr(cfqq);
1313 1313
1314 if (!RB_EMPTY_NODE(&cfqq->rb_node)) { 1314 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
1315 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); 1315 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
1316 cfqq->service_tree = NULL; 1316 cfqq->service_tree = NULL;
1317 } 1317 }
1318 if (cfqq->p_root) { 1318 if (cfqq->p_root) {
1319 rb_erase(&cfqq->p_node, cfqq->p_root); 1319 rb_erase(&cfqq->p_node, cfqq->p_root);
1320 cfqq->p_root = NULL; 1320 cfqq->p_root = NULL;
1321 } 1321 }
1322 1322
1323 cfq_group_service_tree_del(cfqd, cfqq->cfqg); 1323 cfq_group_service_tree_del(cfqd, cfqq->cfqg);
1324 BUG_ON(!cfqd->busy_queues); 1324 BUG_ON(!cfqd->busy_queues);
1325 cfqd->busy_queues--; 1325 cfqd->busy_queues--;
1326 } 1326 }
1327 1327
1328 /* 1328 /*
1329 * rb tree support functions 1329 * rb tree support functions
1330 */ 1330 */
1331 static void cfq_del_rq_rb(struct request *rq) 1331 static void cfq_del_rq_rb(struct request *rq)
1332 { 1332 {
1333 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1333 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1334 const int sync = rq_is_sync(rq); 1334 const int sync = rq_is_sync(rq);
1335 1335
1336 BUG_ON(!cfqq->queued[sync]); 1336 BUG_ON(!cfqq->queued[sync]);
1337 cfqq->queued[sync]--; 1337 cfqq->queued[sync]--;
1338 1338
1339 elv_rb_del(&cfqq->sort_list, rq); 1339 elv_rb_del(&cfqq->sort_list, rq);
1340 1340
1341 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) { 1341 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) {
1342 /* 1342 /*
1343 * Queue will be deleted from service tree when we actually 1343 * Queue will be deleted from service tree when we actually
1344 * expire it later. Right now just remove it from prio tree 1344 * expire it later. Right now just remove it from prio tree
1345 * as it is empty. 1345 * as it is empty.
1346 */ 1346 */
1347 if (cfqq->p_root) { 1347 if (cfqq->p_root) {
1348 rb_erase(&cfqq->p_node, cfqq->p_root); 1348 rb_erase(&cfqq->p_node, cfqq->p_root);
1349 cfqq->p_root = NULL; 1349 cfqq->p_root = NULL;
1350 } 1350 }
1351 } 1351 }
1352 } 1352 }
1353 1353
1354 static void cfq_add_rq_rb(struct request *rq) 1354 static void cfq_add_rq_rb(struct request *rq)
1355 { 1355 {
1356 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1356 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1357 struct cfq_data *cfqd = cfqq->cfqd; 1357 struct cfq_data *cfqd = cfqq->cfqd;
1358 struct request *__alias, *prev; 1358 struct request *__alias, *prev;
1359 1359
1360 cfqq->queued[rq_is_sync(rq)]++; 1360 cfqq->queued[rq_is_sync(rq)]++;
1361 1361
1362 /* 1362 /*
1363 * looks a little odd, but the first insert might return an alias. 1363 * looks a little odd, but the first insert might return an alias.
1364 * if that happens, put the alias on the dispatch list 1364 * if that happens, put the alias on the dispatch list
1365 */ 1365 */
1366 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) 1366 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
1367 cfq_dispatch_insert(cfqd->queue, __alias); 1367 cfq_dispatch_insert(cfqd->queue, __alias);
1368 1368
1369 if (!cfq_cfqq_on_rr(cfqq)) 1369 if (!cfq_cfqq_on_rr(cfqq))
1370 cfq_add_cfqq_rr(cfqd, cfqq); 1370 cfq_add_cfqq_rr(cfqd, cfqq);
1371 1371
1372 /* 1372 /*
1373 * check if this request is a better next-serve candidate 1373 * check if this request is a better next-serve candidate
1374 */ 1374 */
1375 prev = cfqq->next_rq; 1375 prev = cfqq->next_rq;
1376 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position); 1376 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position);
1377 1377
1378 /* 1378 /*
1379 * adjust priority tree position, if ->next_rq changes 1379 * adjust priority tree position, if ->next_rq changes
1380 */ 1380 */
1381 if (prev != cfqq->next_rq) 1381 if (prev != cfqq->next_rq)
1382 cfq_prio_tree_add(cfqd, cfqq); 1382 cfq_prio_tree_add(cfqd, cfqq);
1383 1383
1384 BUG_ON(!cfqq->next_rq); 1384 BUG_ON(!cfqq->next_rq);
1385 } 1385 }
1386 1386
1387 static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) 1387 static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
1388 { 1388 {
1389 elv_rb_del(&cfqq->sort_list, rq); 1389 elv_rb_del(&cfqq->sort_list, rq);
1390 cfqq->queued[rq_is_sync(rq)]--; 1390 cfqq->queued[rq_is_sync(rq)]--;
1391 cfq_add_rq_rb(rq); 1391 cfq_add_rq_rb(rq);
1392 } 1392 }
1393 1393
1394 static struct request * 1394 static struct request *
1395 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) 1395 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
1396 { 1396 {
1397 struct task_struct *tsk = current; 1397 struct task_struct *tsk = current;
1398 struct cfq_io_context *cic; 1398 struct cfq_io_context *cic;
1399 struct cfq_queue *cfqq; 1399 struct cfq_queue *cfqq;
1400 1400
1401 cic = cfq_cic_lookup(cfqd, tsk->io_context); 1401 cic = cfq_cic_lookup(cfqd, tsk->io_context);
1402 if (!cic) 1402 if (!cic)
1403 return NULL; 1403 return NULL;
1404 1404
1405 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 1405 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
1406 if (cfqq) { 1406 if (cfqq) {
1407 sector_t sector = bio->bi_sector + bio_sectors(bio); 1407 sector_t sector = bio->bi_sector + bio_sectors(bio);
1408 1408
1409 return elv_rb_find(&cfqq->sort_list, sector); 1409 return elv_rb_find(&cfqq->sort_list, sector);
1410 } 1410 }
1411 1411
1412 return NULL; 1412 return NULL;
1413 } 1413 }
1414 1414
1415 static void cfq_activate_request(struct request_queue *q, struct request *rq) 1415 static void cfq_activate_request(struct request_queue *q, struct request *rq)
1416 { 1416 {
1417 struct cfq_data *cfqd = q->elevator->elevator_data; 1417 struct cfq_data *cfqd = q->elevator->elevator_data;
1418 1418
1419 cfqd->rq_in_driver++; 1419 cfqd->rq_in_driver++;
1420 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", 1420 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
1421 cfqd->rq_in_driver); 1421 cfqd->rq_in_driver);
1422 1422
1423 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); 1423 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
1424 } 1424 }
1425 1425
1426 static void cfq_deactivate_request(struct request_queue *q, struct request *rq) 1426 static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
1427 { 1427 {
1428 struct cfq_data *cfqd = q->elevator->elevator_data; 1428 struct cfq_data *cfqd = q->elevator->elevator_data;
1429 1429
1430 WARN_ON(!cfqd->rq_in_driver); 1430 WARN_ON(!cfqd->rq_in_driver);
1431 cfqd->rq_in_driver--; 1431 cfqd->rq_in_driver--;
1432 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", 1432 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
1433 cfqd->rq_in_driver); 1433 cfqd->rq_in_driver);
1434 } 1434 }
1435 1435
1436 static void cfq_remove_request(struct request *rq) 1436 static void cfq_remove_request(struct request *rq)
1437 { 1437 {
1438 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1438 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1439 1439
1440 if (cfqq->next_rq == rq) 1440 if (cfqq->next_rq == rq)
1441 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq); 1441 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
1442 1442
1443 list_del_init(&rq->queuelist); 1443 list_del_init(&rq->queuelist);
1444 cfq_del_rq_rb(rq); 1444 cfq_del_rq_rb(rq);
1445 1445
1446 cfqq->cfqd->rq_queued--; 1446 cfqq->cfqd->rq_queued--;
1447 if (rq_is_meta(rq)) { 1447 if (rq_is_meta(rq)) {
1448 WARN_ON(!cfqq->meta_pending); 1448 WARN_ON(!cfqq->meta_pending);
1449 cfqq->meta_pending--; 1449 cfqq->meta_pending--;
1450 } 1450 }
1451 } 1451 }
1452 1452
1453 static int cfq_merge(struct request_queue *q, struct request **req, 1453 static int cfq_merge(struct request_queue *q, struct request **req,
1454 struct bio *bio) 1454 struct bio *bio)
1455 { 1455 {
1456 struct cfq_data *cfqd = q->elevator->elevator_data; 1456 struct cfq_data *cfqd = q->elevator->elevator_data;
1457 struct request *__rq; 1457 struct request *__rq;
1458 1458
1459 __rq = cfq_find_rq_fmerge(cfqd, bio); 1459 __rq = cfq_find_rq_fmerge(cfqd, bio);
1460 if (__rq && elv_rq_merge_ok(__rq, bio)) { 1460 if (__rq && elv_rq_merge_ok(__rq, bio)) {
1461 *req = __rq; 1461 *req = __rq;
1462 return ELEVATOR_FRONT_MERGE; 1462 return ELEVATOR_FRONT_MERGE;
1463 } 1463 }
1464 1464
1465 return ELEVATOR_NO_MERGE; 1465 return ELEVATOR_NO_MERGE;
1466 } 1466 }
1467 1467
1468 static void cfq_merged_request(struct request_queue *q, struct request *req, 1468 static void cfq_merged_request(struct request_queue *q, struct request *req,
1469 int type) 1469 int type)
1470 { 1470 {
1471 if (type == ELEVATOR_FRONT_MERGE) { 1471 if (type == ELEVATOR_FRONT_MERGE) {
1472 struct cfq_queue *cfqq = RQ_CFQQ(req); 1472 struct cfq_queue *cfqq = RQ_CFQQ(req);
1473 1473
1474 cfq_reposition_rq_rb(cfqq, req); 1474 cfq_reposition_rq_rb(cfqq, req);
1475 } 1475 }
1476 } 1476 }
1477 1477
1478 static void 1478 static void
1479 cfq_merged_requests(struct request_queue *q, struct request *rq, 1479 cfq_merged_requests(struct request_queue *q, struct request *rq,
1480 struct request *next) 1480 struct request *next)
1481 { 1481 {
1482 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1482 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1483 /* 1483 /*
1484 * reposition in fifo if next is older than rq 1484 * reposition in fifo if next is older than rq
1485 */ 1485 */
1486 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 1486 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
1487 time_before(rq_fifo_time(next), rq_fifo_time(rq))) { 1487 time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
1488 list_move(&rq->queuelist, &next->queuelist); 1488 list_move(&rq->queuelist, &next->queuelist);
1489 rq_set_fifo_time(rq, rq_fifo_time(next)); 1489 rq_set_fifo_time(rq, rq_fifo_time(next));
1490 } 1490 }
1491 1491
1492 if (cfqq->next_rq == next) 1492 if (cfqq->next_rq == next)
1493 cfqq->next_rq = rq; 1493 cfqq->next_rq = rq;
1494 cfq_remove_request(next); 1494 cfq_remove_request(next);
1495 } 1495 }
1496 1496
1497 static int cfq_allow_merge(struct request_queue *q, struct request *rq, 1497 static int cfq_allow_merge(struct request_queue *q, struct request *rq,
1498 struct bio *bio) 1498 struct bio *bio)
1499 { 1499 {
1500 struct cfq_data *cfqd = q->elevator->elevator_data; 1500 struct cfq_data *cfqd = q->elevator->elevator_data;
1501 struct cfq_io_context *cic; 1501 struct cfq_io_context *cic;
1502 struct cfq_queue *cfqq; 1502 struct cfq_queue *cfqq;
1503 1503
1504 /* 1504 /*
1505 * Disallow merge of a sync bio into an async request. 1505 * Disallow merge of a sync bio into an async request.
1506 */ 1506 */
1507 if (cfq_bio_sync(bio) && !rq_is_sync(rq)) 1507 if (cfq_bio_sync(bio) && !rq_is_sync(rq))
1508 return false; 1508 return false;
1509 1509
1510 /* 1510 /*
1511 * Lookup the cfqq that this bio will be queued with. Allow 1511 * Lookup the cfqq that this bio will be queued with. Allow
1512 * merge only if rq is queued there. 1512 * merge only if rq is queued there.
1513 */ 1513 */
1514 cic = cfq_cic_lookup(cfqd, current->io_context); 1514 cic = cfq_cic_lookup(cfqd, current->io_context);
1515 if (!cic) 1515 if (!cic)
1516 return false; 1516 return false;
1517 1517
1518 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 1518 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
1519 return cfqq == RQ_CFQQ(rq); 1519 return cfqq == RQ_CFQQ(rq);
1520 } 1520 }
1521 1521
1522 static void __cfq_set_active_queue(struct cfq_data *cfqd, 1522 static void __cfq_set_active_queue(struct cfq_data *cfqd,
1523 struct cfq_queue *cfqq) 1523 struct cfq_queue *cfqq)
1524 { 1524 {
1525 if (cfqq) { 1525 if (cfqq) {
1526 cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", 1526 cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
1527 cfqd->serving_prio, cfqd->serving_type); 1527 cfqd->serving_prio, cfqd->serving_type);
1528 cfqq->slice_start = 0; 1528 cfqq->slice_start = 0;
1529 cfqq->dispatch_start = jiffies; 1529 cfqq->dispatch_start = jiffies;
1530 cfqq->allocated_slice = 0; 1530 cfqq->allocated_slice = 0;
1531 cfqq->slice_end = 0; 1531 cfqq->slice_end = 0;
1532 cfqq->slice_dispatch = 0; 1532 cfqq->slice_dispatch = 0;
1533 cfqq->nr_sectors = 0; 1533 cfqq->nr_sectors = 0;
1534 1534
1535 cfq_clear_cfqq_wait_request(cfqq); 1535 cfq_clear_cfqq_wait_request(cfqq);
1536 cfq_clear_cfqq_must_dispatch(cfqq); 1536 cfq_clear_cfqq_must_dispatch(cfqq);
1537 cfq_clear_cfqq_must_alloc_slice(cfqq); 1537 cfq_clear_cfqq_must_alloc_slice(cfqq);
1538 cfq_clear_cfqq_fifo_expire(cfqq); 1538 cfq_clear_cfqq_fifo_expire(cfqq);
1539 cfq_mark_cfqq_slice_new(cfqq); 1539 cfq_mark_cfqq_slice_new(cfqq);
1540 1540
1541 del_timer(&cfqd->idle_slice_timer); 1541 del_timer(&cfqd->idle_slice_timer);
1542 } 1542 }
1543 1543
1544 cfqd->active_queue = cfqq; 1544 cfqd->active_queue = cfqq;
1545 } 1545 }
1546 1546
1547 /* 1547 /*
1548 * current cfqq expired its slice (or was too idle), select new one 1548 * current cfqq expired its slice (or was too idle), select new one
1549 */ 1549 */
1550 static void 1550 static void
1551 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1551 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1552 bool timed_out) 1552 bool timed_out)
1553 { 1553 {
1554 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); 1554 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
1555 1555
1556 if (cfq_cfqq_wait_request(cfqq)) 1556 if (cfq_cfqq_wait_request(cfqq))
1557 del_timer(&cfqd->idle_slice_timer); 1557 del_timer(&cfqd->idle_slice_timer);
1558 1558
1559 cfq_clear_cfqq_wait_request(cfqq); 1559 cfq_clear_cfqq_wait_request(cfqq);
1560 cfq_clear_cfqq_wait_busy(cfqq); 1560 cfq_clear_cfqq_wait_busy(cfqq);
1561 1561
1562 /* 1562 /*
1563 * If this cfqq is shared between multiple processes, check to 1563 * If this cfqq is shared between multiple processes, check to
1564 * make sure that those processes are still issuing I/Os within 1564 * make sure that those processes are still issuing I/Os within
1565 * the mean seek distance. If not, it may be time to break the 1565 * the mean seek distance. If not, it may be time to break the
1566 * queues apart again. 1566 * queues apart again.
1567 */ 1567 */
1568 if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq)) 1568 if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
1569 cfq_mark_cfqq_split_coop(cfqq); 1569 cfq_mark_cfqq_split_coop(cfqq);
1570 1570
1571 /* 1571 /*
1572 * store what was left of this slice, if the queue idled/timed out 1572 * store what was left of this slice, if the queue idled/timed out
1573 */ 1573 */
1574 if (timed_out && !cfq_cfqq_slice_new(cfqq)) { 1574 if (timed_out && !cfq_cfqq_slice_new(cfqq)) {
1575 cfqq->slice_resid = cfqq->slice_end - jiffies; 1575 cfqq->slice_resid = cfqq->slice_end - jiffies;
1576 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); 1576 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
1577 } 1577 }
1578 1578
1579 cfq_group_served(cfqd, cfqq->cfqg, cfqq); 1579 cfq_group_served(cfqd, cfqq->cfqg, cfqq);
1580 1580
1581 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) 1581 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
1582 cfq_del_cfqq_rr(cfqd, cfqq); 1582 cfq_del_cfqq_rr(cfqd, cfqq);
1583 1583
1584 cfq_resort_rr_list(cfqd, cfqq); 1584 cfq_resort_rr_list(cfqd, cfqq);
1585 1585
1586 if (cfqq == cfqd->active_queue) 1586 if (cfqq == cfqd->active_queue)
1587 cfqd->active_queue = NULL; 1587 cfqd->active_queue = NULL;
1588 1588
1589 if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active) 1589 if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
1590 cfqd->grp_service_tree.active = NULL; 1590 cfqd->grp_service_tree.active = NULL;
1591 1591
1592 if (cfqd->active_cic) { 1592 if (cfqd->active_cic) {
1593 put_io_context(cfqd->active_cic->ioc); 1593 put_io_context(cfqd->active_cic->ioc);
1594 cfqd->active_cic = NULL; 1594 cfqd->active_cic = NULL;
1595 } 1595 }
1596 } 1596 }
1597 1597
1598 static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) 1598 static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
1599 { 1599 {
1600 struct cfq_queue *cfqq = cfqd->active_queue; 1600 struct cfq_queue *cfqq = cfqd->active_queue;
1601 1601
1602 if (cfqq) 1602 if (cfqq)
1603 __cfq_slice_expired(cfqd, cfqq, timed_out); 1603 __cfq_slice_expired(cfqd, cfqq, timed_out);
1604 } 1604 }
1605 1605
1606 /* 1606 /*
1607 * Get next queue for service. Unless we have a queue preemption, 1607 * Get next queue for service. Unless we have a queue preemption,
1608 * we'll simply select the first cfqq in the service tree. 1608 * we'll simply select the first cfqq in the service tree.
1609 */ 1609 */
1610 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) 1610 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
1611 { 1611 {
1612 struct cfq_rb_root *service_tree = 1612 struct cfq_rb_root *service_tree =
1613 service_tree_for(cfqd->serving_group, cfqd->serving_prio, 1613 service_tree_for(cfqd->serving_group, cfqd->serving_prio,
1614 cfqd->serving_type); 1614 cfqd->serving_type);
1615 1615
1616 if (!cfqd->rq_queued) 1616 if (!cfqd->rq_queued)
1617 return NULL; 1617 return NULL;
1618 1618
1619 /* There is nothing to dispatch */ 1619 /* There is nothing to dispatch */
1620 if (!service_tree) 1620 if (!service_tree)
1621 return NULL; 1621 return NULL;
1622 if (RB_EMPTY_ROOT(&service_tree->rb)) 1622 if (RB_EMPTY_ROOT(&service_tree->rb))
1623 return NULL; 1623 return NULL;
1624 return cfq_rb_first(service_tree); 1624 return cfq_rb_first(service_tree);
1625 } 1625 }
1626 1626
1627 static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) 1627 static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
1628 { 1628 {
1629 struct cfq_group *cfqg; 1629 struct cfq_group *cfqg;
1630 struct cfq_queue *cfqq; 1630 struct cfq_queue *cfqq;
1631 int i, j; 1631 int i, j;
1632 struct cfq_rb_root *st; 1632 struct cfq_rb_root *st;
1633 1633
1634 if (!cfqd->rq_queued) 1634 if (!cfqd->rq_queued)
1635 return NULL; 1635 return NULL;
1636 1636
1637 cfqg = cfq_get_next_cfqg(cfqd); 1637 cfqg = cfq_get_next_cfqg(cfqd);
1638 if (!cfqg) 1638 if (!cfqg)
1639 return NULL; 1639 return NULL;
1640 1640
1641 for_each_cfqg_st(cfqg, i, j, st) 1641 for_each_cfqg_st(cfqg, i, j, st)
1642 if ((cfqq = cfq_rb_first(st)) != NULL) 1642 if ((cfqq = cfq_rb_first(st)) != NULL)
1643 return cfqq; 1643 return cfqq;
1644 return NULL; 1644 return NULL;
1645 } 1645 }
1646 1646
1647 /* 1647 /*
1648 * Get and set a new active queue for service. 1648 * Get and set a new active queue for service.
1649 */ 1649 */
1650 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd, 1650 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
1651 struct cfq_queue *cfqq) 1651 struct cfq_queue *cfqq)
1652 { 1652 {
1653 if (!cfqq) 1653 if (!cfqq)
1654 cfqq = cfq_get_next_queue(cfqd); 1654 cfqq = cfq_get_next_queue(cfqd);
1655 1655
1656 __cfq_set_active_queue(cfqd, cfqq); 1656 __cfq_set_active_queue(cfqd, cfqq);
1657 return cfqq; 1657 return cfqq;
1658 } 1658 }
1659 1659
1660 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, 1660 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
1661 struct request *rq) 1661 struct request *rq)
1662 { 1662 {
1663 if (blk_rq_pos(rq) >= cfqd->last_position) 1663 if (blk_rq_pos(rq) >= cfqd->last_position)
1664 return blk_rq_pos(rq) - cfqd->last_position; 1664 return blk_rq_pos(rq) - cfqd->last_position;
1665 else 1665 else
1666 return cfqd->last_position - blk_rq_pos(rq); 1666 return cfqd->last_position - blk_rq_pos(rq);
1667 } 1667 }
1668 1668
1669 static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1669 static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1670 struct request *rq) 1670 struct request *rq)
1671 { 1671 {
1672 return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR; 1672 return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
1673 } 1673 }
1674 1674
1675 static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, 1675 static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
1676 struct cfq_queue *cur_cfqq) 1676 struct cfq_queue *cur_cfqq)
1677 { 1677 {
1678 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio]; 1678 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio];
1679 struct rb_node *parent, *node; 1679 struct rb_node *parent, *node;
1680 struct cfq_queue *__cfqq; 1680 struct cfq_queue *__cfqq;
1681 sector_t sector = cfqd->last_position; 1681 sector_t sector = cfqd->last_position;
1682 1682
1683 if (RB_EMPTY_ROOT(root)) 1683 if (RB_EMPTY_ROOT(root))
1684 return NULL; 1684 return NULL;
1685 1685
1686 /* 1686 /*
1687 * First, if we find a request starting at the end of the last 1687 * First, if we find a request starting at the end of the last
1688 * request, choose it. 1688 * request, choose it.
1689 */ 1689 */
1690 __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL); 1690 __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
1691 if (__cfqq) 1691 if (__cfqq)
1692 return __cfqq; 1692 return __cfqq;
1693 1693
1694 /* 1694 /*
1695 * If the exact sector wasn't found, the parent of the NULL leaf 1695 * If the exact sector wasn't found, the parent of the NULL leaf
1696 * will contain the closest sector. 1696 * will contain the closest sector.
1697 */ 1697 */
1698 __cfqq = rb_entry(parent, struct cfq_queue, p_node); 1698 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
1699 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) 1699 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
1700 return __cfqq; 1700 return __cfqq;
1701 1701
1702 if (blk_rq_pos(__cfqq->next_rq) < sector) 1702 if (blk_rq_pos(__cfqq->next_rq) < sector)
1703 node = rb_next(&__cfqq->p_node); 1703 node = rb_next(&__cfqq->p_node);
1704 else 1704 else
1705 node = rb_prev(&__cfqq->p_node); 1705 node = rb_prev(&__cfqq->p_node);
1706 if (!node) 1706 if (!node)
1707 return NULL; 1707 return NULL;
1708 1708
1709 __cfqq = rb_entry(node, struct cfq_queue, p_node); 1709 __cfqq = rb_entry(node, struct cfq_queue, p_node);
1710 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) 1710 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
1711 return __cfqq; 1711 return __cfqq;
1712 1712
1713 return NULL; 1713 return NULL;
1714 } 1714 }
1715 1715
1716 /* 1716 /*
1717 * cfqd - obvious 1717 * cfqd - obvious
1718 * cur_cfqq - passed in so that we don't decide that the current queue is 1718 * cur_cfqq - passed in so that we don't decide that the current queue is
1719 * closely cooperating with itself. 1719 * closely cooperating with itself.
1720 * 1720 *
1721 * So, basically we're assuming that that cur_cfqq has dispatched at least 1721 * So, basically we're assuming that that cur_cfqq has dispatched at least
1722 * one request, and that cfqd->last_position reflects a position on the disk 1722 * one request, and that cfqd->last_position reflects a position on the disk
1723 * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid 1723 * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid
1724 * assumption. 1724 * assumption.
1725 */ 1725 */
1726 static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, 1726 static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1727 struct cfq_queue *cur_cfqq) 1727 struct cfq_queue *cur_cfqq)
1728 { 1728 {
1729 struct cfq_queue *cfqq; 1729 struct cfq_queue *cfqq;
1730 1730
1731 if (cfq_class_idle(cur_cfqq)) 1731 if (cfq_class_idle(cur_cfqq))
1732 return NULL; 1732 return NULL;
1733 if (!cfq_cfqq_sync(cur_cfqq)) 1733 if (!cfq_cfqq_sync(cur_cfqq))
1734 return NULL; 1734 return NULL;
1735 if (CFQQ_SEEKY(cur_cfqq)) 1735 if (CFQQ_SEEKY(cur_cfqq))
1736 return NULL; 1736 return NULL;
1737 1737
1738 /* 1738 /*
1739 * Don't search priority tree if it's the only queue in the group. 1739 * Don't search priority tree if it's the only queue in the group.
1740 */ 1740 */
1741 if (cur_cfqq->cfqg->nr_cfqq == 1) 1741 if (cur_cfqq->cfqg->nr_cfqq == 1)
1742 return NULL; 1742 return NULL;
1743 1743
1744 /* 1744 /*
1745 * We should notice if some of the queues are cooperating, eg 1745 * We should notice if some of the queues are cooperating, eg
1746 * working closely on the same area of the disk. In that case, 1746 * working closely on the same area of the disk. In that case,
1747 * we can group them together and don't waste time idling. 1747 * we can group them together and don't waste time idling.
1748 */ 1748 */
1749 cfqq = cfqq_close(cfqd, cur_cfqq); 1749 cfqq = cfqq_close(cfqd, cur_cfqq);
1750 if (!cfqq) 1750 if (!cfqq)
1751 return NULL; 1751 return NULL;
1752 1752
1753 /* If new queue belongs to different cfq_group, don't choose it */ 1753 /* If new queue belongs to different cfq_group, don't choose it */
1754 if (cur_cfqq->cfqg != cfqq->cfqg) 1754 if (cur_cfqq->cfqg != cfqq->cfqg)
1755 return NULL; 1755 return NULL;
1756 1756
1757 /* 1757 /*
1758 * It only makes sense to merge sync queues. 1758 * It only makes sense to merge sync queues.
1759 */ 1759 */
1760 if (!cfq_cfqq_sync(cfqq)) 1760 if (!cfq_cfqq_sync(cfqq))
1761 return NULL; 1761 return NULL;
1762 if (CFQQ_SEEKY(cfqq)) 1762 if (CFQQ_SEEKY(cfqq))
1763 return NULL; 1763 return NULL;
1764 1764
1765 /* 1765 /*
1766 * Do not merge queues of different priority classes 1766 * Do not merge queues of different priority classes
1767 */ 1767 */
1768 if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq)) 1768 if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq))
1769 return NULL; 1769 return NULL;
1770 1770
1771 return cfqq; 1771 return cfqq;
1772 } 1772 }
1773 1773
1774 /* 1774 /*
1775 * Determine whether we should enforce idle window for this queue. 1775 * Determine whether we should enforce idle window for this queue.
1776 */ 1776 */
1777 1777
1778 static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1778 static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1779 { 1779 {
1780 enum wl_prio_t prio = cfqq_prio(cfqq); 1780 enum wl_prio_t prio = cfqq_prio(cfqq);
1781 struct cfq_rb_root *service_tree = cfqq->service_tree; 1781 struct cfq_rb_root *service_tree = cfqq->service_tree;
1782 1782
1783 BUG_ON(!service_tree); 1783 BUG_ON(!service_tree);
1784 BUG_ON(!service_tree->count); 1784 BUG_ON(!service_tree->count);
1785 1785
1786 /* We never do for idle class queues. */ 1786 /* We never do for idle class queues. */
1787 if (prio == IDLE_WORKLOAD) 1787 if (prio == IDLE_WORKLOAD)
1788 return false; 1788 return false;
1789 1789
1790 /* We do for queues that were marked with idle window flag. */ 1790 /* We do for queues that were marked with idle window flag. */
1791 if (cfq_cfqq_idle_window(cfqq) && 1791 if (cfq_cfqq_idle_window(cfqq) &&
1792 !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)) 1792 !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag))
1793 return true; 1793 return true;
1794 1794
1795 /* 1795 /*
1796 * Otherwise, we do only if they are the last ones 1796 * Otherwise, we do only if they are the last ones
1797 * in their service tree. 1797 * in their service tree.
1798 */ 1798 */
1799 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) 1799 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
1800 return 1; 1800 return 1;
1801 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", 1801 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
1802 service_tree->count); 1802 service_tree->count);
1803 return 0; 1803 return 0;
1804 } 1804 }
1805 1805
1806 static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1806 static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1807 { 1807 {
1808 struct cfq_queue *cfqq = cfqd->active_queue; 1808 struct cfq_queue *cfqq = cfqd->active_queue;
1809 struct cfq_io_context *cic; 1809 struct cfq_io_context *cic;
1810 unsigned long sl; 1810 unsigned long sl;
1811 1811
1812 /* 1812 /*
1813 * SSD device without seek penalty, disable idling. But only do so 1813 * SSD device without seek penalty, disable idling. But only do so
1814 * for devices that support queuing, otherwise we still have a problem 1814 * for devices that support queuing, otherwise we still have a problem
1815 * with sync vs async workloads. 1815 * with sync vs async workloads.
1816 */ 1816 */
1817 if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) 1817 if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
1818 return; 1818 return;
1819 1819
1820 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); 1820 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
1821 WARN_ON(cfq_cfqq_slice_new(cfqq)); 1821 WARN_ON(cfq_cfqq_slice_new(cfqq));
1822 1822
1823 /* 1823 /*
1824 * idle is disabled, either manually or by past process history 1824 * idle is disabled, either manually or by past process history
1825 */ 1825 */
1826 if (!cfqd->cfq_slice_idle || !cfq_should_idle(cfqd, cfqq)) 1826 if (!cfqd->cfq_slice_idle || !cfq_should_idle(cfqd, cfqq))
1827 return; 1827 return;
1828 1828
1829 /* 1829 /*
1830 * still active requests from this queue, don't idle 1830 * still active requests from this queue, don't idle
1831 */ 1831 */
1832 if (cfqq->dispatched) 1832 if (cfqq->dispatched)
1833 return; 1833 return;
1834 1834
1835 /* 1835 /*
1836 * task has exited, don't wait 1836 * task has exited, don't wait
1837 */ 1837 */
1838 cic = cfqd->active_cic; 1838 cic = cfqd->active_cic;
1839 if (!cic || !atomic_read(&cic->ioc->nr_tasks)) 1839 if (!cic || !atomic_read(&cic->ioc->nr_tasks))
1840 return; 1840 return;
1841 1841
1842 /* 1842 /*
1843 * If our average think time is larger than the remaining time 1843 * If our average think time is larger than the remaining time
1844 * slice, then don't idle. This avoids overrunning the allotted 1844 * slice, then don't idle. This avoids overrunning the allotted
1845 * time slice. 1845 * time slice.
1846 */ 1846 */
1847 if (sample_valid(cic->ttime_samples) && 1847 if (sample_valid(cic->ttime_samples) &&
1848 (cfqq->slice_end - jiffies < cic->ttime_mean)) { 1848 (cfqq->slice_end - jiffies < cic->ttime_mean)) {
1849 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d", 1849 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
1850 cic->ttime_mean); 1850 cic->ttime_mean);
1851 return; 1851 return;
1852 } 1852 }
1853 1853
1854 cfq_mark_cfqq_wait_request(cfqq); 1854 cfq_mark_cfqq_wait_request(cfqq);
1855 1855
1856 sl = cfqd->cfq_slice_idle; 1856 sl = cfqd->cfq_slice_idle;
1857 1857
1858 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 1858 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
1859 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); 1859 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
1860 } 1860 }
1861 1861
1862 /* 1862 /*
1863 * Move request from internal lists to the request queue dispatch list. 1863 * Move request from internal lists to the request queue dispatch list.
1864 */ 1864 */
1865 static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) 1865 static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
1866 { 1866 {
1867 struct cfq_data *cfqd = q->elevator->elevator_data; 1867 struct cfq_data *cfqd = q->elevator->elevator_data;
1868 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1868 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1869 1869
1870 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert"); 1870 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
1871 1871
1872 cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq); 1872 cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
1873 cfq_remove_request(rq); 1873 cfq_remove_request(rq);
1874 cfqq->dispatched++; 1874 cfqq->dispatched++;
1875 elv_dispatch_sort(q, rq); 1875 elv_dispatch_sort(q, rq);
1876 1876
1877 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; 1877 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
1878 cfqq->nr_sectors += blk_rq_sectors(rq); 1878 cfqq->nr_sectors += blk_rq_sectors(rq);
1879 } 1879 }
1880 1880
1881 /* 1881 /*
1882 * return expired entry, or NULL to just start from scratch in rbtree 1882 * return expired entry, or NULL to just start from scratch in rbtree
1883 */ 1883 */
1884 static struct request *cfq_check_fifo(struct cfq_queue *cfqq) 1884 static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
1885 { 1885 {
1886 struct request *rq = NULL; 1886 struct request *rq = NULL;
1887 1887
1888 if (cfq_cfqq_fifo_expire(cfqq)) 1888 if (cfq_cfqq_fifo_expire(cfqq))
1889 return NULL; 1889 return NULL;
1890 1890
1891 cfq_mark_cfqq_fifo_expire(cfqq); 1891 cfq_mark_cfqq_fifo_expire(cfqq);
1892 1892
1893 if (list_empty(&cfqq->fifo)) 1893 if (list_empty(&cfqq->fifo))
1894 return NULL; 1894 return NULL;
1895 1895
1896 rq = rq_entry_fifo(cfqq->fifo.next); 1896 rq = rq_entry_fifo(cfqq->fifo.next);
1897 if (time_before(jiffies, rq_fifo_time(rq))) 1897 if (time_before(jiffies, rq_fifo_time(rq)))
1898 rq = NULL; 1898 rq = NULL;
1899 1899
1900 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); 1900 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
1901 return rq; 1901 return rq;
1902 } 1902 }
1903 1903
1904 static inline int 1904 static inline int
1905 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1905 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1906 { 1906 {
1907 const int base_rq = cfqd->cfq_slice_async_rq; 1907 const int base_rq = cfqd->cfq_slice_async_rq;
1908 1908
1909 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 1909 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
1910 1910
1911 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 1911 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
1912 } 1912 }
1913 1913
1914 /* 1914 /*
1915 * Must be called with the queue_lock held. 1915 * Must be called with the queue_lock held.
1916 */ 1916 */
1917 static int cfqq_process_refs(struct cfq_queue *cfqq) 1917 static int cfqq_process_refs(struct cfq_queue *cfqq)
1918 { 1918 {
1919 int process_refs, io_refs; 1919 int process_refs, io_refs;
1920 1920
1921 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; 1921 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
1922 process_refs = atomic_read(&cfqq->ref) - io_refs; 1922 process_refs = atomic_read(&cfqq->ref) - io_refs;
1923 BUG_ON(process_refs < 0); 1923 BUG_ON(process_refs < 0);
1924 return process_refs; 1924 return process_refs;
1925 } 1925 }
1926 1926
1927 static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) 1927 static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
1928 { 1928 {
1929 int process_refs, new_process_refs; 1929 int process_refs, new_process_refs;
1930 struct cfq_queue *__cfqq; 1930 struct cfq_queue *__cfqq;
1931 1931
1932 /* Avoid a circular list and skip interim queue merges */ 1932 /* Avoid a circular list and skip interim queue merges */
1933 while ((__cfqq = new_cfqq->new_cfqq)) { 1933 while ((__cfqq = new_cfqq->new_cfqq)) {
1934 if (__cfqq == cfqq) 1934 if (__cfqq == cfqq)
1935 return; 1935 return;
1936 new_cfqq = __cfqq; 1936 new_cfqq = __cfqq;
1937 } 1937 }
1938 1938
1939 process_refs = cfqq_process_refs(cfqq); 1939 process_refs = cfqq_process_refs(cfqq);
1940 /* 1940 /*
1941 * If the process for the cfqq has gone away, there is no 1941 * If the process for the cfqq has gone away, there is no
1942 * sense in merging the queues. 1942 * sense in merging the queues.
1943 */ 1943 */
1944 if (process_refs == 0) 1944 if (process_refs == 0)
1945 return; 1945 return;
1946 1946
1947 /* 1947 /*
1948 * Merge in the direction of the lesser amount of work. 1948 * Merge in the direction of the lesser amount of work.
1949 */ 1949 */
1950 new_process_refs = cfqq_process_refs(new_cfqq); 1950 new_process_refs = cfqq_process_refs(new_cfqq);
1951 if (new_process_refs >= process_refs) { 1951 if (new_process_refs >= process_refs) {
1952 cfqq->new_cfqq = new_cfqq; 1952 cfqq->new_cfqq = new_cfqq;
1953 atomic_add(process_refs, &new_cfqq->ref); 1953 atomic_add(process_refs, &new_cfqq->ref);
1954 } else { 1954 } else {
1955 new_cfqq->new_cfqq = cfqq; 1955 new_cfqq->new_cfqq = cfqq;
1956 atomic_add(new_process_refs, &cfqq->ref); 1956 atomic_add(new_process_refs, &cfqq->ref);
1957 } 1957 }
1958 } 1958 }
1959 1959
1960 static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, 1960 static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
1961 struct cfq_group *cfqg, enum wl_prio_t prio) 1961 struct cfq_group *cfqg, enum wl_prio_t prio)
1962 { 1962 {
1963 struct cfq_queue *queue; 1963 struct cfq_queue *queue;
1964 int i; 1964 int i;
1965 bool key_valid = false; 1965 bool key_valid = false;
1966 unsigned long lowest_key = 0; 1966 unsigned long lowest_key = 0;
1967 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; 1967 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
1968 1968
1969 for (i = 0; i <= SYNC_WORKLOAD; ++i) { 1969 for (i = 0; i <= SYNC_WORKLOAD; ++i) {
1970 /* select the one with lowest rb_key */ 1970 /* select the one with lowest rb_key */
1971 queue = cfq_rb_first(service_tree_for(cfqg, prio, i)); 1971 queue = cfq_rb_first(service_tree_for(cfqg, prio, i));
1972 if (queue && 1972 if (queue &&
1973 (!key_valid || time_before(queue->rb_key, lowest_key))) { 1973 (!key_valid || time_before(queue->rb_key, lowest_key))) {
1974 lowest_key = queue->rb_key; 1974 lowest_key = queue->rb_key;
1975 cur_best = i; 1975 cur_best = i;
1976 key_valid = true; 1976 key_valid = true;
1977 } 1977 }
1978 } 1978 }
1979 1979
1980 return cur_best; 1980 return cur_best;
1981 } 1981 }
1982 1982
1983 static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) 1983 static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
1984 { 1984 {
1985 unsigned slice; 1985 unsigned slice;
1986 unsigned count; 1986 unsigned count;
1987 struct cfq_rb_root *st; 1987 struct cfq_rb_root *st;
1988 unsigned group_slice; 1988 unsigned group_slice;
1989 1989
1990 if (!cfqg) { 1990 if (!cfqg) {
1991 cfqd->serving_prio = IDLE_WORKLOAD; 1991 cfqd->serving_prio = IDLE_WORKLOAD;
1992 cfqd->workload_expires = jiffies + 1; 1992 cfqd->workload_expires = jiffies + 1;
1993 return; 1993 return;
1994 } 1994 }
1995 1995
1996 /* Choose next priority. RT > BE > IDLE */ 1996 /* Choose next priority. RT > BE > IDLE */
1997 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) 1997 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
1998 cfqd->serving_prio = RT_WORKLOAD; 1998 cfqd->serving_prio = RT_WORKLOAD;
1999 else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg)) 1999 else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
2000 cfqd->serving_prio = BE_WORKLOAD; 2000 cfqd->serving_prio = BE_WORKLOAD;
2001 else { 2001 else {
2002 cfqd->serving_prio = IDLE_WORKLOAD; 2002 cfqd->serving_prio = IDLE_WORKLOAD;
2003 cfqd->workload_expires = jiffies + 1; 2003 cfqd->workload_expires = jiffies + 1;
2004 return; 2004 return;
2005 } 2005 }
2006 2006
2007 /* 2007 /*
2008 * For RT and BE, we have to choose also the type 2008 * For RT and BE, we have to choose also the type
2009 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload 2009 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
2010 * expiration time 2010 * expiration time
2011 */ 2011 */
2012 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); 2012 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
2013 count = st->count; 2013 count = st->count;
2014 2014
2015 /* 2015 /*
2016 * check workload expiration, and that we still have other queues ready 2016 * check workload expiration, and that we still have other queues ready
2017 */ 2017 */
2018 if (count && !time_after(jiffies, cfqd->workload_expires)) 2018 if (count && !time_after(jiffies, cfqd->workload_expires))
2019 return; 2019 return;
2020 2020
2021 /* otherwise select new workload type */ 2021 /* otherwise select new workload type */
2022 cfqd->serving_type = 2022 cfqd->serving_type =
2023 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); 2023 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
2024 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); 2024 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
2025 count = st->count; 2025 count = st->count;
2026 2026
2027 /* 2027 /*
2028 * the workload slice is computed as a fraction of target latency 2028 * the workload slice is computed as a fraction of target latency
2029 * proportional to the number of queues in that workload, over 2029 * proportional to the number of queues in that workload, over
2030 * all the queues in the same priority class 2030 * all the queues in the same priority class
2031 */ 2031 */
2032 group_slice = cfq_group_slice(cfqd, cfqg); 2032 group_slice = cfq_group_slice(cfqd, cfqg);
2033 2033
2034 slice = group_slice * count / 2034 slice = group_slice * count /
2035 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio], 2035 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
2036 cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg)); 2036 cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
2037 2037
2038 if (cfqd->serving_type == ASYNC_WORKLOAD) { 2038 if (cfqd->serving_type == ASYNC_WORKLOAD) {
2039 unsigned int tmp; 2039 unsigned int tmp;
2040 2040
2041 /* 2041 /*
2042 * Async queues are currently system wide. Just taking 2042 * Async queues are currently system wide. Just taking
2043 * proportion of queues with-in same group will lead to higher 2043 * proportion of queues with-in same group will lead to higher
2044 * async ratio system wide as generally root group is going 2044 * async ratio system wide as generally root group is going
2045 * to have higher weight. A more accurate thing would be to 2045 * to have higher weight. A more accurate thing would be to
2046 * calculate system wide asnc/sync ratio. 2046 * calculate system wide asnc/sync ratio.
2047 */ 2047 */
2048 tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg); 2048 tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg);
2049 tmp = tmp/cfqd->busy_queues; 2049 tmp = tmp/cfqd->busy_queues;
2050 slice = min_t(unsigned, slice, tmp); 2050 slice = min_t(unsigned, slice, tmp);
2051 2051
2052 /* async workload slice is scaled down according to 2052 /* async workload slice is scaled down according to
2053 * the sync/async slice ratio. */ 2053 * the sync/async slice ratio. */
2054 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]; 2054 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
2055 } else 2055 } else
2056 /* sync workload slice is at least 2 * cfq_slice_idle */ 2056 /* sync workload slice is at least 2 * cfq_slice_idle */
2057 slice = max(slice, 2 * cfqd->cfq_slice_idle); 2057 slice = max(slice, 2 * cfqd->cfq_slice_idle);
2058 2058
2059 slice = max_t(unsigned, slice, CFQ_MIN_TT); 2059 slice = max_t(unsigned, slice, CFQ_MIN_TT);
2060 cfq_log(cfqd, "workload slice:%d", slice); 2060 cfq_log(cfqd, "workload slice:%d", slice);
2061 cfqd->workload_expires = jiffies + slice; 2061 cfqd->workload_expires = jiffies + slice;
2062 cfqd->noidle_tree_requires_idle = false; 2062 cfqd->noidle_tree_requires_idle = false;
2063 } 2063 }
2064 2064
2065 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) 2065 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
2066 { 2066 {
2067 struct cfq_rb_root *st = &cfqd->grp_service_tree; 2067 struct cfq_rb_root *st = &cfqd->grp_service_tree;
2068 struct cfq_group *cfqg; 2068 struct cfq_group *cfqg;
2069 2069
2070 if (RB_EMPTY_ROOT(&st->rb)) 2070 if (RB_EMPTY_ROOT(&st->rb))
2071 return NULL; 2071 return NULL;
2072 cfqg = cfq_rb_first_group(st); 2072 cfqg = cfq_rb_first_group(st);
2073 st->active = &cfqg->rb_node; 2073 st->active = &cfqg->rb_node;
2074 update_min_vdisktime(st); 2074 update_min_vdisktime(st);
2075 return cfqg; 2075 return cfqg;
2076 } 2076 }
2077 2077
2078 static void cfq_choose_cfqg(struct cfq_data *cfqd) 2078 static void cfq_choose_cfqg(struct cfq_data *cfqd)
2079 { 2079 {
2080 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd); 2080 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
2081 2081
2082 cfqd->serving_group = cfqg; 2082 cfqd->serving_group = cfqg;
2083 2083
2084 /* Restore the workload type data */ 2084 /* Restore the workload type data */
2085 if (cfqg->saved_workload_slice) { 2085 if (cfqg->saved_workload_slice) {
2086 cfqd->workload_expires = jiffies + cfqg->saved_workload_slice; 2086 cfqd->workload_expires = jiffies + cfqg->saved_workload_slice;
2087 cfqd->serving_type = cfqg->saved_workload; 2087 cfqd->serving_type = cfqg->saved_workload;
2088 cfqd->serving_prio = cfqg->saved_serving_prio; 2088 cfqd->serving_prio = cfqg->saved_serving_prio;
2089 } else 2089 } else
2090 cfqd->workload_expires = jiffies - 1; 2090 cfqd->workload_expires = jiffies - 1;
2091 2091
2092 choose_service_tree(cfqd, cfqg); 2092 choose_service_tree(cfqd, cfqg);
2093 } 2093 }
2094 2094
2095 /* 2095 /*
2096 * Select a queue for service. If we have a current active queue, 2096 * Select a queue for service. If we have a current active queue,
2097 * check whether to continue servicing it, or retrieve and set a new one. 2097 * check whether to continue servicing it, or retrieve and set a new one.
2098 */ 2098 */
2099 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 2099 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2100 { 2100 {
2101 struct cfq_queue *cfqq, *new_cfqq = NULL; 2101 struct cfq_queue *cfqq, *new_cfqq = NULL;
2102 2102
2103 cfqq = cfqd->active_queue; 2103 cfqq = cfqd->active_queue;
2104 if (!cfqq) 2104 if (!cfqq)
2105 goto new_queue; 2105 goto new_queue;
2106 2106
2107 if (!cfqd->rq_queued) 2107 if (!cfqd->rq_queued)
2108 return NULL; 2108 return NULL;
2109 2109
2110 /* 2110 /*
2111 * We were waiting for group to get backlogged. Expire the queue 2111 * We were waiting for group to get backlogged. Expire the queue
2112 */ 2112 */
2113 if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list)) 2113 if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list))
2114 goto expire; 2114 goto expire;
2115 2115
2116 /* 2116 /*
2117 * The active queue has run out of time, expire it and select new. 2117 * The active queue has run out of time, expire it and select new.
2118 */ 2118 */
2119 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) { 2119 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) {
2120 /* 2120 /*
2121 * If slice had not expired at the completion of last request 2121 * If slice had not expired at the completion of last request
2122 * we might not have turned on wait_busy flag. Don't expire 2122 * we might not have turned on wait_busy flag. Don't expire
2123 * the queue yet. Allow the group to get backlogged. 2123 * the queue yet. Allow the group to get backlogged.
2124 * 2124 *
2125 * The very fact that we have used the slice, that means we 2125 * The very fact that we have used the slice, that means we
2126 * have been idling all along on this queue and it should be 2126 * have been idling all along on this queue and it should be
2127 * ok to wait for this request to complete. 2127 * ok to wait for this request to complete.
2128 */ 2128 */
2129 if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list) 2129 if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list)
2130 && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { 2130 && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
2131 cfqq = NULL; 2131 cfqq = NULL;
2132 goto keep_queue; 2132 goto keep_queue;
2133 } else 2133 } else
2134 goto expire; 2134 goto expire;
2135 } 2135 }
2136 2136
2137 /* 2137 /*
2138 * The active queue has requests and isn't expired, allow it to 2138 * The active queue has requests and isn't expired, allow it to
2139 * dispatch. 2139 * dispatch.
2140 */ 2140 */
2141 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 2141 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
2142 goto keep_queue; 2142 goto keep_queue;
2143 2143
2144 /* 2144 /*
2145 * If another queue has a request waiting within our mean seek 2145 * If another queue has a request waiting within our mean seek
2146 * distance, let it run. The expire code will check for close 2146 * distance, let it run. The expire code will check for close
2147 * cooperators and put the close queue at the front of the service 2147 * cooperators and put the close queue at the front of the service
2148 * tree. If possible, merge the expiring queue with the new cfqq. 2148 * tree. If possible, merge the expiring queue with the new cfqq.
2149 */ 2149 */
2150 new_cfqq = cfq_close_cooperator(cfqd, cfqq); 2150 new_cfqq = cfq_close_cooperator(cfqd, cfqq);
2151 if (new_cfqq) { 2151 if (new_cfqq) {
2152 if (!cfqq->new_cfqq) 2152 if (!cfqq->new_cfqq)
2153 cfq_setup_merge(cfqq, new_cfqq); 2153 cfq_setup_merge(cfqq, new_cfqq);
2154 goto expire; 2154 goto expire;
2155 } 2155 }
2156 2156
2157 /* 2157 /*
2158 * No requests pending. If the active queue still has requests in 2158 * No requests pending. If the active queue still has requests in
2159 * flight or is idling for a new request, allow either of these 2159 * flight or is idling for a new request, allow either of these
2160 * conditions to happen (or time out) before selecting a new queue. 2160 * conditions to happen (or time out) before selecting a new queue.
2161 */ 2161 */
2162 if (timer_pending(&cfqd->idle_slice_timer) || 2162 if (timer_pending(&cfqd->idle_slice_timer) ||
2163 (cfqq->dispatched && cfq_should_idle(cfqd, cfqq))) { 2163 (cfqq->dispatched && cfq_should_idle(cfqd, cfqq))) {
2164 cfqq = NULL; 2164 cfqq = NULL;
2165 goto keep_queue; 2165 goto keep_queue;
2166 } 2166 }
2167 2167
2168 expire: 2168 expire:
2169 cfq_slice_expired(cfqd, 0); 2169 cfq_slice_expired(cfqd, 0);
2170 new_queue: 2170 new_queue:
2171 /* 2171 /*
2172 * Current queue expired. Check if we have to switch to a new 2172 * Current queue expired. Check if we have to switch to a new
2173 * service tree 2173 * service tree
2174 */ 2174 */
2175 if (!new_cfqq) 2175 if (!new_cfqq)
2176 cfq_choose_cfqg(cfqd); 2176 cfq_choose_cfqg(cfqd);
2177 2177
2178 cfqq = cfq_set_active_queue(cfqd, new_cfqq); 2178 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
2179 keep_queue: 2179 keep_queue:
2180 return cfqq; 2180 return cfqq;
2181 } 2181 }
2182 2182
2183 static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) 2183 static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
2184 { 2184 {
2185 int dispatched = 0; 2185 int dispatched = 0;
2186 2186
2187 while (cfqq->next_rq) { 2187 while (cfqq->next_rq) {
2188 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); 2188 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
2189 dispatched++; 2189 dispatched++;
2190 } 2190 }
2191 2191
2192 BUG_ON(!list_empty(&cfqq->fifo)); 2192 BUG_ON(!list_empty(&cfqq->fifo));
2193 2193
2194 /* By default cfqq is not expired if it is empty. Do it explicitly */ 2194 /* By default cfqq is not expired if it is empty. Do it explicitly */
2195 __cfq_slice_expired(cfqq->cfqd, cfqq, 0); 2195 __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
2196 return dispatched; 2196 return dispatched;
2197 } 2197 }
2198 2198
2199 /* 2199 /*
2200 * Drain our current requests. Used for barriers and when switching 2200 * Drain our current requests. Used for barriers and when switching
2201 * io schedulers on-the-fly. 2201 * io schedulers on-the-fly.
2202 */ 2202 */
2203 static int cfq_forced_dispatch(struct cfq_data *cfqd) 2203 static int cfq_forced_dispatch(struct cfq_data *cfqd)
2204 { 2204 {
2205 struct cfq_queue *cfqq; 2205 struct cfq_queue *cfqq;
2206 int dispatched = 0; 2206 int dispatched = 0;
2207 2207
2208 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) 2208 /* Expire the timeslice of the current active queue first */
2209 cfq_slice_expired(cfqd, 0);
2210 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
2211 __cfq_set_active_queue(cfqd, cfqq);
2209 dispatched += __cfq_forced_dispatch_cfqq(cfqq); 2212 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
2213 }
2210 2214
2211 cfq_slice_expired(cfqd, 0);
2212 BUG_ON(cfqd->busy_queues); 2215 BUG_ON(cfqd->busy_queues);
2213 2216
2214 cfq_log(cfqd, "forced_dispatch=%d", dispatched); 2217 cfq_log(cfqd, "forced_dispatch=%d", dispatched);
2215 return dispatched; 2218 return dispatched;
2216 } 2219 }
2217 2220
2218 static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, 2221 static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
2219 struct cfq_queue *cfqq) 2222 struct cfq_queue *cfqq)
2220 { 2223 {
2221 /* the queue hasn't finished any request, can't estimate */ 2224 /* the queue hasn't finished any request, can't estimate */
2222 if (cfq_cfqq_slice_new(cfqq)) 2225 if (cfq_cfqq_slice_new(cfqq))
2223 return 1; 2226 return 1;
2224 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, 2227 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
2225 cfqq->slice_end)) 2228 cfqq->slice_end))
2226 return 1; 2229 return 1;
2227 2230
2228 return 0; 2231 return 0;
2229 } 2232 }
2230 2233
2231 static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2234 static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2232 { 2235 {
2233 unsigned int max_dispatch; 2236 unsigned int max_dispatch;
2234 2237
2235 /* 2238 /*
2236 * Drain async requests before we start sync IO 2239 * Drain async requests before we start sync IO
2237 */ 2240 */
2238 if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC]) 2241 if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC])
2239 return false; 2242 return false;
2240 2243
2241 /* 2244 /*
2242 * If this is an async queue and we have sync IO in flight, let it wait 2245 * If this is an async queue and we have sync IO in flight, let it wait
2243 */ 2246 */
2244 if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq)) 2247 if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq))
2245 return false; 2248 return false;
2246 2249
2247 max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1); 2250 max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1);
2248 if (cfq_class_idle(cfqq)) 2251 if (cfq_class_idle(cfqq))
2249 max_dispatch = 1; 2252 max_dispatch = 1;
2250 2253
2251 /* 2254 /*
2252 * Does this cfqq already have too much IO in flight? 2255 * Does this cfqq already have too much IO in flight?
2253 */ 2256 */
2254 if (cfqq->dispatched >= max_dispatch) { 2257 if (cfqq->dispatched >= max_dispatch) {
2255 /* 2258 /*
2256 * idle queue must always only have a single IO in flight 2259 * idle queue must always only have a single IO in flight
2257 */ 2260 */
2258 if (cfq_class_idle(cfqq)) 2261 if (cfq_class_idle(cfqq))
2259 return false; 2262 return false;
2260 2263
2261 /* 2264 /*
2262 * We have other queues, don't allow more IO from this one 2265 * We have other queues, don't allow more IO from this one
2263 */ 2266 */
2264 if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq)) 2267 if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq))
2265 return false; 2268 return false;
2266 2269
2267 /* 2270 /*
2268 * Sole queue user, no limit 2271 * Sole queue user, no limit
2269 */ 2272 */
2270 if (cfqd->busy_queues == 1) 2273 if (cfqd->busy_queues == 1)
2271 max_dispatch = -1; 2274 max_dispatch = -1;
2272 else 2275 else
2273 /* 2276 /*
2274 * Normally we start throttling cfqq when cfq_quantum/2 2277 * Normally we start throttling cfqq when cfq_quantum/2
2275 * requests have been dispatched. But we can drive 2278 * requests have been dispatched. But we can drive
2276 * deeper queue depths at the beginning of slice 2279 * deeper queue depths at the beginning of slice
2277 * subjected to upper limit of cfq_quantum. 2280 * subjected to upper limit of cfq_quantum.
2278 * */ 2281 * */
2279 max_dispatch = cfqd->cfq_quantum; 2282 max_dispatch = cfqd->cfq_quantum;
2280 } 2283 }
2281 2284
2282 /* 2285 /*
2283 * Async queues must wait a bit before being allowed dispatch. 2286 * Async queues must wait a bit before being allowed dispatch.
2284 * We also ramp up the dispatch depth gradually for async IO, 2287 * We also ramp up the dispatch depth gradually for async IO,
2285 * based on the last sync IO we serviced 2288 * based on the last sync IO we serviced
2286 */ 2289 */
2287 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { 2290 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
2288 unsigned long last_sync = jiffies - cfqd->last_delayed_sync; 2291 unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
2289 unsigned int depth; 2292 unsigned int depth;
2290 2293
2291 depth = last_sync / cfqd->cfq_slice[1]; 2294 depth = last_sync / cfqd->cfq_slice[1];
2292 if (!depth && !cfqq->dispatched) 2295 if (!depth && !cfqq->dispatched)
2293 depth = 1; 2296 depth = 1;
2294 if (depth < max_dispatch) 2297 if (depth < max_dispatch)
2295 max_dispatch = depth; 2298 max_dispatch = depth;
2296 } 2299 }
2297 2300
2298 /* 2301 /*
2299 * If we're below the current max, allow a dispatch 2302 * If we're below the current max, allow a dispatch
2300 */ 2303 */
2301 return cfqq->dispatched < max_dispatch; 2304 return cfqq->dispatched < max_dispatch;
2302 } 2305 }
2303 2306
2304 /* 2307 /*
2305 * Dispatch a request from cfqq, moving them to the request queue 2308 * Dispatch a request from cfqq, moving them to the request queue
2306 * dispatch list. 2309 * dispatch list.
2307 */ 2310 */
2308 static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2311 static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2309 { 2312 {
2310 struct request *rq; 2313 struct request *rq;
2311 2314
2312 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); 2315 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
2313 2316
2314 if (!cfq_may_dispatch(cfqd, cfqq)) 2317 if (!cfq_may_dispatch(cfqd, cfqq))
2315 return false; 2318 return false;
2316 2319
2317 /* 2320 /*
2318 * follow expired path, else get first next available 2321 * follow expired path, else get first next available
2319 */ 2322 */
2320 rq = cfq_check_fifo(cfqq); 2323 rq = cfq_check_fifo(cfqq);
2321 if (!rq) 2324 if (!rq)
2322 rq = cfqq->next_rq; 2325 rq = cfqq->next_rq;
2323 2326
2324 /* 2327 /*
2325 * insert request into driver dispatch list 2328 * insert request into driver dispatch list
2326 */ 2329 */
2327 cfq_dispatch_insert(cfqd->queue, rq); 2330 cfq_dispatch_insert(cfqd->queue, rq);
2328 2331
2329 if (!cfqd->active_cic) { 2332 if (!cfqd->active_cic) {
2330 struct cfq_io_context *cic = RQ_CIC(rq); 2333 struct cfq_io_context *cic = RQ_CIC(rq);
2331 2334
2332 atomic_long_inc(&cic->ioc->refcount); 2335 atomic_long_inc(&cic->ioc->refcount);
2333 cfqd->active_cic = cic; 2336 cfqd->active_cic = cic;
2334 } 2337 }
2335 2338
2336 return true; 2339 return true;
2337 } 2340 }
2338 2341
2339 /* 2342 /*
2340 * Find the cfqq that we need to service and move a request from that to the 2343 * Find the cfqq that we need to service and move a request from that to the
2341 * dispatch list 2344 * dispatch list
2342 */ 2345 */
2343 static int cfq_dispatch_requests(struct request_queue *q, int force) 2346 static int cfq_dispatch_requests(struct request_queue *q, int force)
2344 { 2347 {
2345 struct cfq_data *cfqd = q->elevator->elevator_data; 2348 struct cfq_data *cfqd = q->elevator->elevator_data;
2346 struct cfq_queue *cfqq; 2349 struct cfq_queue *cfqq;
2347 2350
2348 if (!cfqd->busy_queues) 2351 if (!cfqd->busy_queues)
2349 return 0; 2352 return 0;
2350 2353
2351 if (unlikely(force)) 2354 if (unlikely(force))
2352 return cfq_forced_dispatch(cfqd); 2355 return cfq_forced_dispatch(cfqd);
2353 2356
2354 cfqq = cfq_select_queue(cfqd); 2357 cfqq = cfq_select_queue(cfqd);
2355 if (!cfqq) 2358 if (!cfqq)
2356 return 0; 2359 return 0;
2357 2360
2358 /* 2361 /*
2359 * Dispatch a request from this cfqq, if it is allowed 2362 * Dispatch a request from this cfqq, if it is allowed
2360 */ 2363 */
2361 if (!cfq_dispatch_request(cfqd, cfqq)) 2364 if (!cfq_dispatch_request(cfqd, cfqq))
2362 return 0; 2365 return 0;
2363 2366
2364 cfqq->slice_dispatch++; 2367 cfqq->slice_dispatch++;
2365 cfq_clear_cfqq_must_dispatch(cfqq); 2368 cfq_clear_cfqq_must_dispatch(cfqq);
2366 2369
2367 /* 2370 /*
2368 * expire an async queue immediately if it has used up its slice. idle 2371 * expire an async queue immediately if it has used up its slice. idle
2369 * queue always expire after 1 dispatch round. 2372 * queue always expire after 1 dispatch round.
2370 */ 2373 */
2371 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && 2374 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
2372 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || 2375 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
2373 cfq_class_idle(cfqq))) { 2376 cfq_class_idle(cfqq))) {
2374 cfqq->slice_end = jiffies + 1; 2377 cfqq->slice_end = jiffies + 1;
2375 cfq_slice_expired(cfqd, 0); 2378 cfq_slice_expired(cfqd, 0);
2376 } 2379 }
2377 2380
2378 cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); 2381 cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
2379 return 1; 2382 return 1;
2380 } 2383 }
2381 2384
2382 /* 2385 /*
2383 * task holds one reference to the queue, dropped when task exits. each rq 2386 * task holds one reference to the queue, dropped when task exits. each rq
2384 * in-flight on this queue also holds a reference, dropped when rq is freed. 2387 * in-flight on this queue also holds a reference, dropped when rq is freed.
2385 * 2388 *
2386 * Each cfq queue took a reference on the parent group. Drop it now. 2389 * Each cfq queue took a reference on the parent group. Drop it now.
2387 * queue lock must be held here. 2390 * queue lock must be held here.
2388 */ 2391 */
2389 static void cfq_put_queue(struct cfq_queue *cfqq) 2392 static void cfq_put_queue(struct cfq_queue *cfqq)
2390 { 2393 {
2391 struct cfq_data *cfqd = cfqq->cfqd; 2394 struct cfq_data *cfqd = cfqq->cfqd;
2392 struct cfq_group *cfqg, *orig_cfqg; 2395 struct cfq_group *cfqg, *orig_cfqg;
2393 2396
2394 BUG_ON(atomic_read(&cfqq->ref) <= 0); 2397 BUG_ON(atomic_read(&cfqq->ref) <= 0);
2395 2398
2396 if (!atomic_dec_and_test(&cfqq->ref)) 2399 if (!atomic_dec_and_test(&cfqq->ref))
2397 return; 2400 return;
2398 2401
2399 cfq_log_cfqq(cfqd, cfqq, "put_queue"); 2402 cfq_log_cfqq(cfqd, cfqq, "put_queue");
2400 BUG_ON(rb_first(&cfqq->sort_list)); 2403 BUG_ON(rb_first(&cfqq->sort_list));
2401 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 2404 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
2402 cfqg = cfqq->cfqg; 2405 cfqg = cfqq->cfqg;
2403 orig_cfqg = cfqq->orig_cfqg; 2406 orig_cfqg = cfqq->orig_cfqg;
2404 2407
2405 if (unlikely(cfqd->active_queue == cfqq)) { 2408 if (unlikely(cfqd->active_queue == cfqq)) {
2406 __cfq_slice_expired(cfqd, cfqq, 0); 2409 __cfq_slice_expired(cfqd, cfqq, 0);
2407 cfq_schedule_dispatch(cfqd); 2410 cfq_schedule_dispatch(cfqd);
2408 } 2411 }
2409 2412
2410 BUG_ON(cfq_cfqq_on_rr(cfqq)); 2413 BUG_ON(cfq_cfqq_on_rr(cfqq));
2411 kmem_cache_free(cfq_pool, cfqq); 2414 kmem_cache_free(cfq_pool, cfqq);
2412 cfq_put_cfqg(cfqg); 2415 cfq_put_cfqg(cfqg);
2413 if (orig_cfqg) 2416 if (orig_cfqg)
2414 cfq_put_cfqg(orig_cfqg); 2417 cfq_put_cfqg(orig_cfqg);
2415 } 2418 }
2416 2419
2417 /* 2420 /*
2418 * Must always be called with the rcu_read_lock() held 2421 * Must always be called with the rcu_read_lock() held
2419 */ 2422 */
2420 static void 2423 static void
2421 __call_for_each_cic(struct io_context *ioc, 2424 __call_for_each_cic(struct io_context *ioc,
2422 void (*func)(struct io_context *, struct cfq_io_context *)) 2425 void (*func)(struct io_context *, struct cfq_io_context *))
2423 { 2426 {
2424 struct cfq_io_context *cic; 2427 struct cfq_io_context *cic;
2425 struct hlist_node *n; 2428 struct hlist_node *n;
2426 2429
2427 hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) 2430 hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
2428 func(ioc, cic); 2431 func(ioc, cic);
2429 } 2432 }
2430 2433
2431 /* 2434 /*
2432 * Call func for each cic attached to this ioc. 2435 * Call func for each cic attached to this ioc.
2433 */ 2436 */
2434 static void 2437 static void
2435 call_for_each_cic(struct io_context *ioc, 2438 call_for_each_cic(struct io_context *ioc,
2436 void (*func)(struct io_context *, struct cfq_io_context *)) 2439 void (*func)(struct io_context *, struct cfq_io_context *))
2437 { 2440 {
2438 rcu_read_lock(); 2441 rcu_read_lock();
2439 __call_for_each_cic(ioc, func); 2442 __call_for_each_cic(ioc, func);
2440 rcu_read_unlock(); 2443 rcu_read_unlock();
2441 } 2444 }
2442 2445
2443 static void cfq_cic_free_rcu(struct rcu_head *head) 2446 static void cfq_cic_free_rcu(struct rcu_head *head)
2444 { 2447 {
2445 struct cfq_io_context *cic; 2448 struct cfq_io_context *cic;
2446 2449
2447 cic = container_of(head, struct cfq_io_context, rcu_head); 2450 cic = container_of(head, struct cfq_io_context, rcu_head);
2448 2451
2449 kmem_cache_free(cfq_ioc_pool, cic); 2452 kmem_cache_free(cfq_ioc_pool, cic);
2450 elv_ioc_count_dec(cfq_ioc_count); 2453 elv_ioc_count_dec(cfq_ioc_count);
2451 2454
2452 if (ioc_gone) { 2455 if (ioc_gone) {
2453 /* 2456 /*
2454 * CFQ scheduler is exiting, grab exit lock and check 2457 * CFQ scheduler is exiting, grab exit lock and check
2455 * the pending io context count. If it hits zero, 2458 * the pending io context count. If it hits zero,
2456 * complete ioc_gone and set it back to NULL 2459 * complete ioc_gone and set it back to NULL
2457 */ 2460 */
2458 spin_lock(&ioc_gone_lock); 2461 spin_lock(&ioc_gone_lock);
2459 if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) { 2462 if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
2460 complete(ioc_gone); 2463 complete(ioc_gone);
2461 ioc_gone = NULL; 2464 ioc_gone = NULL;
2462 } 2465 }
2463 spin_unlock(&ioc_gone_lock); 2466 spin_unlock(&ioc_gone_lock);
2464 } 2467 }
2465 } 2468 }
2466 2469
2467 static void cfq_cic_free(struct cfq_io_context *cic) 2470 static void cfq_cic_free(struct cfq_io_context *cic)
2468 { 2471 {
2469 call_rcu(&cic->rcu_head, cfq_cic_free_rcu); 2472 call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
2470 } 2473 }
2471 2474
2472 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) 2475 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
2473 { 2476 {
2474 unsigned long flags; 2477 unsigned long flags;
2475 2478
2476 BUG_ON(!cic->dead_key); 2479 BUG_ON(!cic->dead_key);
2477 2480
2478 spin_lock_irqsave(&ioc->lock, flags); 2481 spin_lock_irqsave(&ioc->lock, flags);
2479 radix_tree_delete(&ioc->radix_root, cic->dead_key); 2482 radix_tree_delete(&ioc->radix_root, cic->dead_key);
2480 hlist_del_rcu(&cic->cic_list); 2483 hlist_del_rcu(&cic->cic_list);
2481 spin_unlock_irqrestore(&ioc->lock, flags); 2484 spin_unlock_irqrestore(&ioc->lock, flags);
2482 2485
2483 cfq_cic_free(cic); 2486 cfq_cic_free(cic);
2484 } 2487 }
2485 2488
2486 /* 2489 /*
2487 * Must be called with rcu_read_lock() held or preemption otherwise disabled. 2490 * Must be called with rcu_read_lock() held or preemption otherwise disabled.
2488 * Only two callers of this - ->dtor() which is called with the rcu_read_lock(), 2491 * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
2489 * and ->trim() which is called with the task lock held 2492 * and ->trim() which is called with the task lock held
2490 */ 2493 */
2491 static void cfq_free_io_context(struct io_context *ioc) 2494 static void cfq_free_io_context(struct io_context *ioc)
2492 { 2495 {
2493 /* 2496 /*
2494 * ioc->refcount is zero here, or we are called from elv_unregister(), 2497 * ioc->refcount is zero here, or we are called from elv_unregister(),
2495 * so no more cic's are allowed to be linked into this ioc. So it 2498 * so no more cic's are allowed to be linked into this ioc. So it
2496 * should be ok to iterate over the known list, we will see all cic's 2499 * should be ok to iterate over the known list, we will see all cic's
2497 * since no new ones are added. 2500 * since no new ones are added.
2498 */ 2501 */
2499 __call_for_each_cic(ioc, cic_free_func); 2502 __call_for_each_cic(ioc, cic_free_func);
2500 } 2503 }
2501 2504
2502 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2505 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2503 { 2506 {
2504 struct cfq_queue *__cfqq, *next; 2507 struct cfq_queue *__cfqq, *next;
2505 2508
2506 if (unlikely(cfqq == cfqd->active_queue)) { 2509 if (unlikely(cfqq == cfqd->active_queue)) {
2507 __cfq_slice_expired(cfqd, cfqq, 0); 2510 __cfq_slice_expired(cfqd, cfqq, 0);
2508 cfq_schedule_dispatch(cfqd); 2511 cfq_schedule_dispatch(cfqd);
2509 } 2512 }
2510 2513
2511 /* 2514 /*
2512 * If this queue was scheduled to merge with another queue, be 2515 * If this queue was scheduled to merge with another queue, be
2513 * sure to drop the reference taken on that queue (and others in 2516 * sure to drop the reference taken on that queue (and others in
2514 * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs. 2517 * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs.
2515 */ 2518 */
2516 __cfqq = cfqq->new_cfqq; 2519 __cfqq = cfqq->new_cfqq;
2517 while (__cfqq) { 2520 while (__cfqq) {
2518 if (__cfqq == cfqq) { 2521 if (__cfqq == cfqq) {
2519 WARN(1, "cfqq->new_cfqq loop detected\n"); 2522 WARN(1, "cfqq->new_cfqq loop detected\n");
2520 break; 2523 break;
2521 } 2524 }
2522 next = __cfqq->new_cfqq; 2525 next = __cfqq->new_cfqq;
2523 cfq_put_queue(__cfqq); 2526 cfq_put_queue(__cfqq);
2524 __cfqq = next; 2527 __cfqq = next;
2525 } 2528 }
2526 2529
2527 cfq_put_queue(cfqq); 2530 cfq_put_queue(cfqq);
2528 } 2531 }
2529 2532
2530 static void __cfq_exit_single_io_context(struct cfq_data *cfqd, 2533 static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
2531 struct cfq_io_context *cic) 2534 struct cfq_io_context *cic)
2532 { 2535 {
2533 struct io_context *ioc = cic->ioc; 2536 struct io_context *ioc = cic->ioc;
2534 2537
2535 list_del_init(&cic->queue_list); 2538 list_del_init(&cic->queue_list);
2536 2539
2537 /* 2540 /*
2538 * Make sure key == NULL is seen for dead queues 2541 * Make sure key == NULL is seen for dead queues
2539 */ 2542 */
2540 smp_wmb(); 2543 smp_wmb();
2541 cic->dead_key = (unsigned long) cic->key; 2544 cic->dead_key = (unsigned long) cic->key;
2542 cic->key = NULL; 2545 cic->key = NULL;
2543 2546
2544 if (ioc->ioc_data == cic) 2547 if (ioc->ioc_data == cic)
2545 rcu_assign_pointer(ioc->ioc_data, NULL); 2548 rcu_assign_pointer(ioc->ioc_data, NULL);
2546 2549
2547 if (cic->cfqq[BLK_RW_ASYNC]) { 2550 if (cic->cfqq[BLK_RW_ASYNC]) {
2548 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); 2551 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
2549 cic->cfqq[BLK_RW_ASYNC] = NULL; 2552 cic->cfqq[BLK_RW_ASYNC] = NULL;
2550 } 2553 }
2551 2554
2552 if (cic->cfqq[BLK_RW_SYNC]) { 2555 if (cic->cfqq[BLK_RW_SYNC]) {
2553 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]); 2556 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]);
2554 cic->cfqq[BLK_RW_SYNC] = NULL; 2557 cic->cfqq[BLK_RW_SYNC] = NULL;
2555 } 2558 }
2556 } 2559 }
2557 2560
2558 static void cfq_exit_single_io_context(struct io_context *ioc, 2561 static void cfq_exit_single_io_context(struct io_context *ioc,
2559 struct cfq_io_context *cic) 2562 struct cfq_io_context *cic)
2560 { 2563 {
2561 struct cfq_data *cfqd = cic->key; 2564 struct cfq_data *cfqd = cic->key;
2562 2565
2563 if (cfqd) { 2566 if (cfqd) {
2564 struct request_queue *q = cfqd->queue; 2567 struct request_queue *q = cfqd->queue;
2565 unsigned long flags; 2568 unsigned long flags;
2566 2569
2567 spin_lock_irqsave(q->queue_lock, flags); 2570 spin_lock_irqsave(q->queue_lock, flags);
2568 2571
2569 /* 2572 /*
2570 * Ensure we get a fresh copy of the ->key to prevent 2573 * Ensure we get a fresh copy of the ->key to prevent
2571 * race between exiting task and queue 2574 * race between exiting task and queue
2572 */ 2575 */
2573 smp_read_barrier_depends(); 2576 smp_read_barrier_depends();
2574 if (cic->key) 2577 if (cic->key)
2575 __cfq_exit_single_io_context(cfqd, cic); 2578 __cfq_exit_single_io_context(cfqd, cic);
2576 2579
2577 spin_unlock_irqrestore(q->queue_lock, flags); 2580 spin_unlock_irqrestore(q->queue_lock, flags);
2578 } 2581 }
2579 } 2582 }
2580 2583
2581 /* 2584 /*
2582 * The process that ioc belongs to has exited, we need to clean up 2585 * The process that ioc belongs to has exited, we need to clean up
2583 * and put the internal structures we have that belongs to that process. 2586 * and put the internal structures we have that belongs to that process.
2584 */ 2587 */
2585 static void cfq_exit_io_context(struct io_context *ioc) 2588 static void cfq_exit_io_context(struct io_context *ioc)
2586 { 2589 {
2587 call_for_each_cic(ioc, cfq_exit_single_io_context); 2590 call_for_each_cic(ioc, cfq_exit_single_io_context);
2588 } 2591 }
2589 2592
2590 static struct cfq_io_context * 2593 static struct cfq_io_context *
2591 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 2594 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
2592 { 2595 {
2593 struct cfq_io_context *cic; 2596 struct cfq_io_context *cic;
2594 2597
2595 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO, 2598 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
2596 cfqd->queue->node); 2599 cfqd->queue->node);
2597 if (cic) { 2600 if (cic) {
2598 cic->last_end_request = jiffies; 2601 cic->last_end_request = jiffies;
2599 INIT_LIST_HEAD(&cic->queue_list); 2602 INIT_LIST_HEAD(&cic->queue_list);
2600 INIT_HLIST_NODE(&cic->cic_list); 2603 INIT_HLIST_NODE(&cic->cic_list);
2601 cic->dtor = cfq_free_io_context; 2604 cic->dtor = cfq_free_io_context;
2602 cic->exit = cfq_exit_io_context; 2605 cic->exit = cfq_exit_io_context;
2603 elv_ioc_count_inc(cfq_ioc_count); 2606 elv_ioc_count_inc(cfq_ioc_count);
2604 } 2607 }
2605 2608
2606 return cic; 2609 return cic;
2607 } 2610 }
2608 2611
2609 static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) 2612 static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
2610 { 2613 {
2611 struct task_struct *tsk = current; 2614 struct task_struct *tsk = current;
2612 int ioprio_class; 2615 int ioprio_class;
2613 2616
2614 if (!cfq_cfqq_prio_changed(cfqq)) 2617 if (!cfq_cfqq_prio_changed(cfqq))
2615 return; 2618 return;
2616 2619
2617 ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); 2620 ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
2618 switch (ioprio_class) { 2621 switch (ioprio_class) {
2619 default: 2622 default:
2620 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); 2623 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
2621 case IOPRIO_CLASS_NONE: 2624 case IOPRIO_CLASS_NONE:
2622 /* 2625 /*
2623 * no prio set, inherit CPU scheduling settings 2626 * no prio set, inherit CPU scheduling settings
2624 */ 2627 */
2625 cfqq->ioprio = task_nice_ioprio(tsk); 2628 cfqq->ioprio = task_nice_ioprio(tsk);
2626 cfqq->ioprio_class = task_nice_ioclass(tsk); 2629 cfqq->ioprio_class = task_nice_ioclass(tsk);
2627 break; 2630 break;
2628 case IOPRIO_CLASS_RT: 2631 case IOPRIO_CLASS_RT:
2629 cfqq->ioprio = task_ioprio(ioc); 2632 cfqq->ioprio = task_ioprio(ioc);
2630 cfqq->ioprio_class = IOPRIO_CLASS_RT; 2633 cfqq->ioprio_class = IOPRIO_CLASS_RT;
2631 break; 2634 break;
2632 case IOPRIO_CLASS_BE: 2635 case IOPRIO_CLASS_BE:
2633 cfqq->ioprio = task_ioprio(ioc); 2636 cfqq->ioprio = task_ioprio(ioc);
2634 cfqq->ioprio_class = IOPRIO_CLASS_BE; 2637 cfqq->ioprio_class = IOPRIO_CLASS_BE;
2635 break; 2638 break;
2636 case IOPRIO_CLASS_IDLE: 2639 case IOPRIO_CLASS_IDLE:
2637 cfqq->ioprio_class = IOPRIO_CLASS_IDLE; 2640 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
2638 cfqq->ioprio = 7; 2641 cfqq->ioprio = 7;
2639 cfq_clear_cfqq_idle_window(cfqq); 2642 cfq_clear_cfqq_idle_window(cfqq);
2640 break; 2643 break;
2641 } 2644 }
2642 2645
2643 /* 2646 /*
2644 * keep track of original prio settings in case we have to temporarily 2647 * keep track of original prio settings in case we have to temporarily
2645 * elevate the priority of this queue 2648 * elevate the priority of this queue
2646 */ 2649 */
2647 cfqq->org_ioprio = cfqq->ioprio; 2650 cfqq->org_ioprio = cfqq->ioprio;
2648 cfqq->org_ioprio_class = cfqq->ioprio_class; 2651 cfqq->org_ioprio_class = cfqq->ioprio_class;
2649 cfq_clear_cfqq_prio_changed(cfqq); 2652 cfq_clear_cfqq_prio_changed(cfqq);
2650 } 2653 }
2651 2654
2652 static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) 2655 static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
2653 { 2656 {
2654 struct cfq_data *cfqd = cic->key; 2657 struct cfq_data *cfqd = cic->key;
2655 struct cfq_queue *cfqq; 2658 struct cfq_queue *cfqq;
2656 unsigned long flags; 2659 unsigned long flags;
2657 2660
2658 if (unlikely(!cfqd)) 2661 if (unlikely(!cfqd))
2659 return; 2662 return;
2660 2663
2661 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2664 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2662 2665
2663 cfqq = cic->cfqq[BLK_RW_ASYNC]; 2666 cfqq = cic->cfqq[BLK_RW_ASYNC];
2664 if (cfqq) { 2667 if (cfqq) {
2665 struct cfq_queue *new_cfqq; 2668 struct cfq_queue *new_cfqq;
2666 new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc, 2669 new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
2667 GFP_ATOMIC); 2670 GFP_ATOMIC);
2668 if (new_cfqq) { 2671 if (new_cfqq) {
2669 cic->cfqq[BLK_RW_ASYNC] = new_cfqq; 2672 cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
2670 cfq_put_queue(cfqq); 2673 cfq_put_queue(cfqq);
2671 } 2674 }
2672 } 2675 }
2673 2676
2674 cfqq = cic->cfqq[BLK_RW_SYNC]; 2677 cfqq = cic->cfqq[BLK_RW_SYNC];
2675 if (cfqq) 2678 if (cfqq)
2676 cfq_mark_cfqq_prio_changed(cfqq); 2679 cfq_mark_cfqq_prio_changed(cfqq);
2677 2680
2678 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2681 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2679 } 2682 }
2680 2683
2681 static void cfq_ioc_set_ioprio(struct io_context *ioc) 2684 static void cfq_ioc_set_ioprio(struct io_context *ioc)
2682 { 2685 {
2683 call_for_each_cic(ioc, changed_ioprio); 2686 call_for_each_cic(ioc, changed_ioprio);
2684 ioc->ioprio_changed = 0; 2687 ioc->ioprio_changed = 0;
2685 } 2688 }
2686 2689
2687 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 2690 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2688 pid_t pid, bool is_sync) 2691 pid_t pid, bool is_sync)
2689 { 2692 {
2690 RB_CLEAR_NODE(&cfqq->rb_node); 2693 RB_CLEAR_NODE(&cfqq->rb_node);
2691 RB_CLEAR_NODE(&cfqq->p_node); 2694 RB_CLEAR_NODE(&cfqq->p_node);
2692 INIT_LIST_HEAD(&cfqq->fifo); 2695 INIT_LIST_HEAD(&cfqq->fifo);
2693 2696
2694 atomic_set(&cfqq->ref, 0); 2697 atomic_set(&cfqq->ref, 0);
2695 cfqq->cfqd = cfqd; 2698 cfqq->cfqd = cfqd;
2696 2699
2697 cfq_mark_cfqq_prio_changed(cfqq); 2700 cfq_mark_cfqq_prio_changed(cfqq);
2698 2701
2699 if (is_sync) { 2702 if (is_sync) {
2700 if (!cfq_class_idle(cfqq)) 2703 if (!cfq_class_idle(cfqq))
2701 cfq_mark_cfqq_idle_window(cfqq); 2704 cfq_mark_cfqq_idle_window(cfqq);
2702 cfq_mark_cfqq_sync(cfqq); 2705 cfq_mark_cfqq_sync(cfqq);
2703 } 2706 }
2704 cfqq->pid = pid; 2707 cfqq->pid = pid;
2705 } 2708 }
2706 2709
2707 #ifdef CONFIG_CFQ_GROUP_IOSCHED 2710 #ifdef CONFIG_CFQ_GROUP_IOSCHED
2708 static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) 2711 static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
2709 { 2712 {
2710 struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); 2713 struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
2711 struct cfq_data *cfqd = cic->key; 2714 struct cfq_data *cfqd = cic->key;
2712 unsigned long flags; 2715 unsigned long flags;
2713 struct request_queue *q; 2716 struct request_queue *q;
2714 2717
2715 if (unlikely(!cfqd)) 2718 if (unlikely(!cfqd))
2716 return; 2719 return;
2717 2720
2718 q = cfqd->queue; 2721 q = cfqd->queue;
2719 2722
2720 spin_lock_irqsave(q->queue_lock, flags); 2723 spin_lock_irqsave(q->queue_lock, flags);
2721 2724
2722 if (sync_cfqq) { 2725 if (sync_cfqq) {
2723 /* 2726 /*
2724 * Drop reference to sync queue. A new sync queue will be 2727 * Drop reference to sync queue. A new sync queue will be
2725 * assigned in new group upon arrival of a fresh request. 2728 * assigned in new group upon arrival of a fresh request.
2726 */ 2729 */
2727 cfq_log_cfqq(cfqd, sync_cfqq, "changed cgroup"); 2730 cfq_log_cfqq(cfqd, sync_cfqq, "changed cgroup");
2728 cic_set_cfqq(cic, NULL, 1); 2731 cic_set_cfqq(cic, NULL, 1);
2729 cfq_put_queue(sync_cfqq); 2732 cfq_put_queue(sync_cfqq);
2730 } 2733 }
2731 2734
2732 spin_unlock_irqrestore(q->queue_lock, flags); 2735 spin_unlock_irqrestore(q->queue_lock, flags);
2733 } 2736 }
2734 2737
2735 static void cfq_ioc_set_cgroup(struct io_context *ioc) 2738 static void cfq_ioc_set_cgroup(struct io_context *ioc)
2736 { 2739 {
2737 call_for_each_cic(ioc, changed_cgroup); 2740 call_for_each_cic(ioc, changed_cgroup);
2738 ioc->cgroup_changed = 0; 2741 ioc->cgroup_changed = 0;
2739 } 2742 }
2740 #endif /* CONFIG_CFQ_GROUP_IOSCHED */ 2743 #endif /* CONFIG_CFQ_GROUP_IOSCHED */
2741 2744
2742 static struct cfq_queue * 2745 static struct cfq_queue *
2743 cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, 2746 cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
2744 struct io_context *ioc, gfp_t gfp_mask) 2747 struct io_context *ioc, gfp_t gfp_mask)
2745 { 2748 {
2746 struct cfq_queue *cfqq, *new_cfqq = NULL; 2749 struct cfq_queue *cfqq, *new_cfqq = NULL;
2747 struct cfq_io_context *cic; 2750 struct cfq_io_context *cic;
2748 struct cfq_group *cfqg; 2751 struct cfq_group *cfqg;
2749 2752
2750 retry: 2753 retry:
2751 cfqg = cfq_get_cfqg(cfqd, 1); 2754 cfqg = cfq_get_cfqg(cfqd, 1);
2752 cic = cfq_cic_lookup(cfqd, ioc); 2755 cic = cfq_cic_lookup(cfqd, ioc);
2753 /* cic always exists here */ 2756 /* cic always exists here */
2754 cfqq = cic_to_cfqq(cic, is_sync); 2757 cfqq = cic_to_cfqq(cic, is_sync);
2755 2758
2756 /* 2759 /*
2757 * Always try a new alloc if we fell back to the OOM cfqq 2760 * Always try a new alloc if we fell back to the OOM cfqq
2758 * originally, since it should just be a temporary situation. 2761 * originally, since it should just be a temporary situation.
2759 */ 2762 */
2760 if (!cfqq || cfqq == &cfqd->oom_cfqq) { 2763 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
2761 cfqq = NULL; 2764 cfqq = NULL;
2762 if (new_cfqq) { 2765 if (new_cfqq) {
2763 cfqq = new_cfqq; 2766 cfqq = new_cfqq;
2764 new_cfqq = NULL; 2767 new_cfqq = NULL;
2765 } else if (gfp_mask & __GFP_WAIT) { 2768 } else if (gfp_mask & __GFP_WAIT) {
2766 spin_unlock_irq(cfqd->queue->queue_lock); 2769 spin_unlock_irq(cfqd->queue->queue_lock);
2767 new_cfqq = kmem_cache_alloc_node(cfq_pool, 2770 new_cfqq = kmem_cache_alloc_node(cfq_pool,
2768 gfp_mask | __GFP_ZERO, 2771 gfp_mask | __GFP_ZERO,
2769 cfqd->queue->node); 2772 cfqd->queue->node);
2770 spin_lock_irq(cfqd->queue->queue_lock); 2773 spin_lock_irq(cfqd->queue->queue_lock);
2771 if (new_cfqq) 2774 if (new_cfqq)
2772 goto retry; 2775 goto retry;
2773 } else { 2776 } else {
2774 cfqq = kmem_cache_alloc_node(cfq_pool, 2777 cfqq = kmem_cache_alloc_node(cfq_pool,
2775 gfp_mask | __GFP_ZERO, 2778 gfp_mask | __GFP_ZERO,
2776 cfqd->queue->node); 2779 cfqd->queue->node);
2777 } 2780 }
2778 2781
2779 if (cfqq) { 2782 if (cfqq) {
2780 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); 2783 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
2781 cfq_init_prio_data(cfqq, ioc); 2784 cfq_init_prio_data(cfqq, ioc);
2782 cfq_link_cfqq_cfqg(cfqq, cfqg); 2785 cfq_link_cfqq_cfqg(cfqq, cfqg);
2783 cfq_log_cfqq(cfqd, cfqq, "alloced"); 2786 cfq_log_cfqq(cfqd, cfqq, "alloced");
2784 } else 2787 } else
2785 cfqq = &cfqd->oom_cfqq; 2788 cfqq = &cfqd->oom_cfqq;
2786 } 2789 }
2787 2790
2788 if (new_cfqq) 2791 if (new_cfqq)
2789 kmem_cache_free(cfq_pool, new_cfqq); 2792 kmem_cache_free(cfq_pool, new_cfqq);
2790 2793
2791 return cfqq; 2794 return cfqq;
2792 } 2795 }
2793 2796
2794 static struct cfq_queue ** 2797 static struct cfq_queue **
2795 cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) 2798 cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
2796 { 2799 {
2797 switch (ioprio_class) { 2800 switch (ioprio_class) {
2798 case IOPRIO_CLASS_RT: 2801 case IOPRIO_CLASS_RT:
2799 return &cfqd->async_cfqq[0][ioprio]; 2802 return &cfqd->async_cfqq[0][ioprio];
2800 case IOPRIO_CLASS_BE: 2803 case IOPRIO_CLASS_BE:
2801 return &cfqd->async_cfqq[1][ioprio]; 2804 return &cfqd->async_cfqq[1][ioprio];
2802 case IOPRIO_CLASS_IDLE: 2805 case IOPRIO_CLASS_IDLE:
2803 return &cfqd->async_idle_cfqq; 2806 return &cfqd->async_idle_cfqq;
2804 default: 2807 default:
2805 BUG(); 2808 BUG();
2806 } 2809 }
2807 } 2810 }
2808 2811
2809 static struct cfq_queue * 2812 static struct cfq_queue *
2810 cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, 2813 cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
2811 gfp_t gfp_mask) 2814 gfp_t gfp_mask)
2812 { 2815 {
2813 const int ioprio = task_ioprio(ioc); 2816 const int ioprio = task_ioprio(ioc);
2814 const int ioprio_class = task_ioprio_class(ioc); 2817 const int ioprio_class = task_ioprio_class(ioc);
2815 struct cfq_queue **async_cfqq = NULL; 2818 struct cfq_queue **async_cfqq = NULL;
2816 struct cfq_queue *cfqq = NULL; 2819 struct cfq_queue *cfqq = NULL;
2817 2820
2818 if (!is_sync) { 2821 if (!is_sync) {
2819 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); 2822 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
2820 cfqq = *async_cfqq; 2823 cfqq = *async_cfqq;
2821 } 2824 }
2822 2825
2823 if (!cfqq) 2826 if (!cfqq)
2824 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); 2827 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
2825 2828
2826 /* 2829 /*
2827 * pin the queue now that it's allocated, scheduler exit will prune it 2830 * pin the queue now that it's allocated, scheduler exit will prune it
2828 */ 2831 */
2829 if (!is_sync && !(*async_cfqq)) { 2832 if (!is_sync && !(*async_cfqq)) {
2830 atomic_inc(&cfqq->ref); 2833 atomic_inc(&cfqq->ref);
2831 *async_cfqq = cfqq; 2834 *async_cfqq = cfqq;
2832 } 2835 }
2833 2836
2834 atomic_inc(&cfqq->ref); 2837 atomic_inc(&cfqq->ref);
2835 return cfqq; 2838 return cfqq;
2836 } 2839 }
2837 2840
2838 /* 2841 /*
2839 * We drop cfq io contexts lazily, so we may find a dead one. 2842 * We drop cfq io contexts lazily, so we may find a dead one.
2840 */ 2843 */
2841 static void 2844 static void
2842 cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, 2845 cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
2843 struct cfq_io_context *cic) 2846 struct cfq_io_context *cic)
2844 { 2847 {
2845 unsigned long flags; 2848 unsigned long flags;
2846 2849
2847 WARN_ON(!list_empty(&cic->queue_list)); 2850 WARN_ON(!list_empty(&cic->queue_list));
2848 2851
2849 spin_lock_irqsave(&ioc->lock, flags); 2852 spin_lock_irqsave(&ioc->lock, flags);
2850 2853
2851 BUG_ON(ioc->ioc_data == cic); 2854 BUG_ON(ioc->ioc_data == cic);
2852 2855
2853 radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); 2856 radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd);
2854 hlist_del_rcu(&cic->cic_list); 2857 hlist_del_rcu(&cic->cic_list);
2855 spin_unlock_irqrestore(&ioc->lock, flags); 2858 spin_unlock_irqrestore(&ioc->lock, flags);
2856 2859
2857 cfq_cic_free(cic); 2860 cfq_cic_free(cic);
2858 } 2861 }
2859 2862
2860 static struct cfq_io_context * 2863 static struct cfq_io_context *
2861 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) 2864 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
2862 { 2865 {
2863 struct cfq_io_context *cic; 2866 struct cfq_io_context *cic;
2864 unsigned long flags; 2867 unsigned long flags;
2865 void *k; 2868 void *k;
2866 2869
2867 if (unlikely(!ioc)) 2870 if (unlikely(!ioc))
2868 return NULL; 2871 return NULL;
2869 2872
2870 rcu_read_lock(); 2873 rcu_read_lock();
2871 2874
2872 /* 2875 /*
2873 * we maintain a last-hit cache, to avoid browsing over the tree 2876 * we maintain a last-hit cache, to avoid browsing over the tree
2874 */ 2877 */
2875 cic = rcu_dereference(ioc->ioc_data); 2878 cic = rcu_dereference(ioc->ioc_data);
2876 if (cic && cic->key == cfqd) { 2879 if (cic && cic->key == cfqd) {
2877 rcu_read_unlock(); 2880 rcu_read_unlock();
2878 return cic; 2881 return cic;
2879 } 2882 }
2880 2883
2881 do { 2884 do {
2882 cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); 2885 cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
2883 rcu_read_unlock(); 2886 rcu_read_unlock();
2884 if (!cic) 2887 if (!cic)
2885 break; 2888 break;
2886 /* ->key must be copied to avoid race with cfq_exit_queue() */ 2889 /* ->key must be copied to avoid race with cfq_exit_queue() */
2887 k = cic->key; 2890 k = cic->key;
2888 if (unlikely(!k)) { 2891 if (unlikely(!k)) {
2889 cfq_drop_dead_cic(cfqd, ioc, cic); 2892 cfq_drop_dead_cic(cfqd, ioc, cic);
2890 rcu_read_lock(); 2893 rcu_read_lock();
2891 continue; 2894 continue;
2892 } 2895 }
2893 2896
2894 spin_lock_irqsave(&ioc->lock, flags); 2897 spin_lock_irqsave(&ioc->lock, flags);
2895 rcu_assign_pointer(ioc->ioc_data, cic); 2898 rcu_assign_pointer(ioc->ioc_data, cic);
2896 spin_unlock_irqrestore(&ioc->lock, flags); 2899 spin_unlock_irqrestore(&ioc->lock, flags);
2897 break; 2900 break;
2898 } while (1); 2901 } while (1);
2899 2902
2900 return cic; 2903 return cic;
2901 } 2904 }
2902 2905
2903 /* 2906 /*
2904 * Add cic into ioc, using cfqd as the search key. This enables us to lookup 2907 * Add cic into ioc, using cfqd as the search key. This enables us to lookup
2905 * the process specific cfq io context when entered from the block layer. 2908 * the process specific cfq io context when entered from the block layer.
2906 * Also adds the cic to a per-cfqd list, used when this queue is removed. 2909 * Also adds the cic to a per-cfqd list, used when this queue is removed.
2907 */ 2910 */
2908 static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, 2911 static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
2909 struct cfq_io_context *cic, gfp_t gfp_mask) 2912 struct cfq_io_context *cic, gfp_t gfp_mask)
2910 { 2913 {
2911 unsigned long flags; 2914 unsigned long flags;
2912 int ret; 2915 int ret;
2913 2916
2914 ret = radix_tree_preload(gfp_mask); 2917 ret = radix_tree_preload(gfp_mask);
2915 if (!ret) { 2918 if (!ret) {
2916 cic->ioc = ioc; 2919 cic->ioc = ioc;
2917 cic->key = cfqd; 2920 cic->key = cfqd;
2918 2921
2919 spin_lock_irqsave(&ioc->lock, flags); 2922 spin_lock_irqsave(&ioc->lock, flags);
2920 ret = radix_tree_insert(&ioc->radix_root, 2923 ret = radix_tree_insert(&ioc->radix_root,
2921 (unsigned long) cfqd, cic); 2924 (unsigned long) cfqd, cic);
2922 if (!ret) 2925 if (!ret)
2923 hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); 2926 hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
2924 spin_unlock_irqrestore(&ioc->lock, flags); 2927 spin_unlock_irqrestore(&ioc->lock, flags);
2925 2928
2926 radix_tree_preload_end(); 2929 radix_tree_preload_end();
2927 2930
2928 if (!ret) { 2931 if (!ret) {
2929 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2932 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2930 list_add(&cic->queue_list, &cfqd->cic_list); 2933 list_add(&cic->queue_list, &cfqd->cic_list);
2931 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2934 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2932 } 2935 }
2933 } 2936 }
2934 2937
2935 if (ret) 2938 if (ret)
2936 printk(KERN_ERR "cfq: cic link failed!\n"); 2939 printk(KERN_ERR "cfq: cic link failed!\n");
2937 2940
2938 return ret; 2941 return ret;
2939 } 2942 }
2940 2943
2941 /* 2944 /*
2942 * Setup general io context and cfq io context. There can be several cfq 2945 * Setup general io context and cfq io context. There can be several cfq
2943 * io contexts per general io context, if this process is doing io to more 2946 * io contexts per general io context, if this process is doing io to more
2944 * than one device managed by cfq. 2947 * than one device managed by cfq.
2945 */ 2948 */
2946 static struct cfq_io_context * 2949 static struct cfq_io_context *
2947 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 2950 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
2948 { 2951 {
2949 struct io_context *ioc = NULL; 2952 struct io_context *ioc = NULL;
2950 struct cfq_io_context *cic; 2953 struct cfq_io_context *cic;
2951 2954
2952 might_sleep_if(gfp_mask & __GFP_WAIT); 2955 might_sleep_if(gfp_mask & __GFP_WAIT);
2953 2956
2954 ioc = get_io_context(gfp_mask, cfqd->queue->node); 2957 ioc = get_io_context(gfp_mask, cfqd->queue->node);
2955 if (!ioc) 2958 if (!ioc)
2956 return NULL; 2959 return NULL;
2957 2960
2958 cic = cfq_cic_lookup(cfqd, ioc); 2961 cic = cfq_cic_lookup(cfqd, ioc);
2959 if (cic) 2962 if (cic)
2960 goto out; 2963 goto out;
2961 2964
2962 cic = cfq_alloc_io_context(cfqd, gfp_mask); 2965 cic = cfq_alloc_io_context(cfqd, gfp_mask);
2963 if (cic == NULL) 2966 if (cic == NULL)
2964 goto err; 2967 goto err;
2965 2968
2966 if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) 2969 if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
2967 goto err_free; 2970 goto err_free;
2968 2971
2969 out: 2972 out:
2970 smp_read_barrier_depends(); 2973 smp_read_barrier_depends();
2971 if (unlikely(ioc->ioprio_changed)) 2974 if (unlikely(ioc->ioprio_changed))
2972 cfq_ioc_set_ioprio(ioc); 2975 cfq_ioc_set_ioprio(ioc);
2973 2976
2974 #ifdef CONFIG_CFQ_GROUP_IOSCHED 2977 #ifdef CONFIG_CFQ_GROUP_IOSCHED
2975 if (unlikely(ioc->cgroup_changed)) 2978 if (unlikely(ioc->cgroup_changed))
2976 cfq_ioc_set_cgroup(ioc); 2979 cfq_ioc_set_cgroup(ioc);
2977 #endif 2980 #endif
2978 return cic; 2981 return cic;
2979 err_free: 2982 err_free:
2980 cfq_cic_free(cic); 2983 cfq_cic_free(cic);
2981 err: 2984 err:
2982 put_io_context(ioc); 2985 put_io_context(ioc);
2983 return NULL; 2986 return NULL;
2984 } 2987 }
2985 2988
2986 static void 2989 static void
2987 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 2990 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
2988 { 2991 {
2989 unsigned long elapsed = jiffies - cic->last_end_request; 2992 unsigned long elapsed = jiffies - cic->last_end_request;
2990 unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 2993 unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
2991 2994
2992 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 2995 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
2993 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 2996 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
2994 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 2997 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
2995 } 2998 }
2996 2999
2997 static void 3000 static void
2998 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, 3001 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2999 struct request *rq) 3002 struct request *rq)
3000 { 3003 {
3001 sector_t sdist = 0; 3004 sector_t sdist = 0;
3002 sector_t n_sec = blk_rq_sectors(rq); 3005 sector_t n_sec = blk_rq_sectors(rq);
3003 if (cfqq->last_request_pos) { 3006 if (cfqq->last_request_pos) {
3004 if (cfqq->last_request_pos < blk_rq_pos(rq)) 3007 if (cfqq->last_request_pos < blk_rq_pos(rq))
3005 sdist = blk_rq_pos(rq) - cfqq->last_request_pos; 3008 sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
3006 else 3009 else
3007 sdist = cfqq->last_request_pos - blk_rq_pos(rq); 3010 sdist = cfqq->last_request_pos - blk_rq_pos(rq);
3008 } 3011 }
3009 3012
3010 cfqq->seek_history <<= 1; 3013 cfqq->seek_history <<= 1;
3011 if (blk_queue_nonrot(cfqd->queue)) 3014 if (blk_queue_nonrot(cfqd->queue))
3012 cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT); 3015 cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT);
3013 else 3016 else
3014 cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); 3017 cfqq->seek_history |= (sdist > CFQQ_SEEK_THR);
3015 } 3018 }
3016 3019
3017 /* 3020 /*
3018 * Disable idle window if the process thinks too long or seeks so much that 3021 * Disable idle window if the process thinks too long or seeks so much that
3019 * it doesn't matter 3022 * it doesn't matter
3020 */ 3023 */
3021 static void 3024 static void
3022 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 3025 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3023 struct cfq_io_context *cic) 3026 struct cfq_io_context *cic)
3024 { 3027 {
3025 int old_idle, enable_idle; 3028 int old_idle, enable_idle;
3026 3029
3027 /* 3030 /*
3028 * Don't idle for async or idle io prio class 3031 * Don't idle for async or idle io prio class
3029 */ 3032 */
3030 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) 3033 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
3031 return; 3034 return;
3032 3035
3033 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); 3036 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
3034 3037
3035 if (cfqq->queued[0] + cfqq->queued[1] >= 4) 3038 if (cfqq->queued[0] + cfqq->queued[1] >= 4)
3036 cfq_mark_cfqq_deep(cfqq); 3039 cfq_mark_cfqq_deep(cfqq);
3037 3040
3038 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 3041 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
3039 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) 3042 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
3040 enable_idle = 0; 3043 enable_idle = 0;
3041 else if (sample_valid(cic->ttime_samples)) { 3044 else if (sample_valid(cic->ttime_samples)) {
3042 if (cic->ttime_mean > cfqd->cfq_slice_idle) 3045 if (cic->ttime_mean > cfqd->cfq_slice_idle)
3043 enable_idle = 0; 3046 enable_idle = 0;
3044 else 3047 else
3045 enable_idle = 1; 3048 enable_idle = 1;
3046 } 3049 }
3047 3050
3048 if (old_idle != enable_idle) { 3051 if (old_idle != enable_idle) {
3049 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle); 3052 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
3050 if (enable_idle) 3053 if (enable_idle)
3051 cfq_mark_cfqq_idle_window(cfqq); 3054 cfq_mark_cfqq_idle_window(cfqq);
3052 else 3055 else
3053 cfq_clear_cfqq_idle_window(cfqq); 3056 cfq_clear_cfqq_idle_window(cfqq);
3054 } 3057 }
3055 } 3058 }
3056 3059
3057 /* 3060 /*
3058 * Check if new_cfqq should preempt the currently active queue. Return 0 for 3061 * Check if new_cfqq should preempt the currently active queue. Return 0 for
3059 * no or if we aren't sure, a 1 will cause a preempt. 3062 * no or if we aren't sure, a 1 will cause a preempt.
3060 */ 3063 */
3061 static bool 3064 static bool
3062 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 3065 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3063 struct request *rq) 3066 struct request *rq)
3064 { 3067 {
3065 struct cfq_queue *cfqq; 3068 struct cfq_queue *cfqq;
3066 3069
3067 cfqq = cfqd->active_queue; 3070 cfqq = cfqd->active_queue;
3068 if (!cfqq) 3071 if (!cfqq)
3069 return false; 3072 return false;
3070 3073
3071 if (cfq_class_idle(new_cfqq)) 3074 if (cfq_class_idle(new_cfqq))
3072 return false; 3075 return false;
3073 3076
3074 if (cfq_class_idle(cfqq)) 3077 if (cfq_class_idle(cfqq))
3075 return true; 3078 return true;
3076 3079
3077 /* 3080 /*
3078 * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice. 3081 * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice.
3079 */ 3082 */
3080 if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq)) 3083 if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq))
3081 return false; 3084 return false;
3082 3085
3083 /* 3086 /*
3084 * if the new request is sync, but the currently running queue is 3087 * if the new request is sync, but the currently running queue is
3085 * not, let the sync request have priority. 3088 * not, let the sync request have priority.
3086 */ 3089 */
3087 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) 3090 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
3088 return true; 3091 return true;
3089 3092
3090 if (new_cfqq->cfqg != cfqq->cfqg) 3093 if (new_cfqq->cfqg != cfqq->cfqg)
3091 return false; 3094 return false;
3092 3095
3093 if (cfq_slice_used(cfqq)) 3096 if (cfq_slice_used(cfqq))
3094 return true; 3097 return true;
3095 3098
3096 /* Allow preemption only if we are idling on sync-noidle tree */ 3099 /* Allow preemption only if we are idling on sync-noidle tree */
3097 if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && 3100 if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
3098 cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD && 3101 cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
3099 new_cfqq->service_tree->count == 2 && 3102 new_cfqq->service_tree->count == 2 &&
3100 RB_EMPTY_ROOT(&cfqq->sort_list)) 3103 RB_EMPTY_ROOT(&cfqq->sort_list))
3101 return true; 3104 return true;
3102 3105
3103 /* 3106 /*
3104 * So both queues are sync. Let the new request get disk time if 3107 * So both queues are sync. Let the new request get disk time if
3105 * it's a metadata request and the current queue is doing regular IO. 3108 * it's a metadata request and the current queue is doing regular IO.
3106 */ 3109 */
3107 if (rq_is_meta(rq) && !cfqq->meta_pending) 3110 if (rq_is_meta(rq) && !cfqq->meta_pending)
3108 return true; 3111 return true;
3109 3112
3110 /* 3113 /*
3111 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. 3114 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
3112 */ 3115 */
3113 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3116 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
3114 return true; 3117 return true;
3115 3118
3116 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) 3119 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
3117 return false; 3120 return false;
3118 3121
3119 /* 3122 /*
3120 * if this request is as-good as one we would expect from the 3123 * if this request is as-good as one we would expect from the
3121 * current cfqq, let it preempt 3124 * current cfqq, let it preempt
3122 */ 3125 */
3123 if (cfq_rq_close(cfqd, cfqq, rq)) 3126 if (cfq_rq_close(cfqd, cfqq, rq))
3124 return true; 3127 return true;
3125 3128
3126 return false; 3129 return false;
3127 } 3130 }
3128 3131
3129 /* 3132 /*
3130 * cfqq preempts the active queue. if we allowed preempt with no slice left, 3133 * cfqq preempts the active queue. if we allowed preempt with no slice left,
3131 * let it have half of its nominal slice. 3134 * let it have half of its nominal slice.
3132 */ 3135 */
3133 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 3136 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3134 { 3137 {
3135 cfq_log_cfqq(cfqd, cfqq, "preempt"); 3138 cfq_log_cfqq(cfqd, cfqq, "preempt");
3136 cfq_slice_expired(cfqd, 1); 3139 cfq_slice_expired(cfqd, 1);
3137 3140
3138 /* 3141 /*
3139 * Put the new queue at the front of the of the current list, 3142 * Put the new queue at the front of the of the current list,
3140 * so we know that it will be selected next. 3143 * so we know that it will be selected next.
3141 */ 3144 */
3142 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 3145 BUG_ON(!cfq_cfqq_on_rr(cfqq));
3143 3146
3144 cfq_service_tree_add(cfqd, cfqq, 1); 3147 cfq_service_tree_add(cfqd, cfqq, 1);
3145 3148
3146 cfqq->slice_end = 0; 3149 cfqq->slice_end = 0;
3147 cfq_mark_cfqq_slice_new(cfqq); 3150 cfq_mark_cfqq_slice_new(cfqq);
3148 } 3151 }
3149 3152
3150 /* 3153 /*
3151 * Called when a new fs request (rq) is added (to cfqq). Check if there's 3154 * Called when a new fs request (rq) is added (to cfqq). Check if there's
3152 * something we should do about it 3155 * something we should do about it
3153 */ 3156 */
3154 static void 3157 static void
3155 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 3158 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3156 struct request *rq) 3159 struct request *rq)
3157 { 3160 {
3158 struct cfq_io_context *cic = RQ_CIC(rq); 3161 struct cfq_io_context *cic = RQ_CIC(rq);
3159 3162
3160 cfqd->rq_queued++; 3163 cfqd->rq_queued++;
3161 if (rq_is_meta(rq)) 3164 if (rq_is_meta(rq))
3162 cfqq->meta_pending++; 3165 cfqq->meta_pending++;
3163 3166
3164 cfq_update_io_thinktime(cfqd, cic); 3167 cfq_update_io_thinktime(cfqd, cic);
3165 cfq_update_io_seektime(cfqd, cfqq, rq); 3168 cfq_update_io_seektime(cfqd, cfqq, rq);
3166 cfq_update_idle_window(cfqd, cfqq, cic); 3169 cfq_update_idle_window(cfqd, cfqq, cic);
3167 3170
3168 cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); 3171 cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
3169 3172
3170 if (cfqq == cfqd->active_queue) { 3173 if (cfqq == cfqd->active_queue) {
3171 /* 3174 /*
3172 * Remember that we saw a request from this process, but 3175 * Remember that we saw a request from this process, but
3173 * don't start queuing just yet. Otherwise we risk seeing lots 3176 * don't start queuing just yet. Otherwise we risk seeing lots
3174 * of tiny requests, because we disrupt the normal plugging 3177 * of tiny requests, because we disrupt the normal plugging
3175 * and merging. If the request is already larger than a single 3178 * and merging. If the request is already larger than a single
3176 * page, let it rip immediately. For that case we assume that 3179 * page, let it rip immediately. For that case we assume that
3177 * merging is already done. Ditto for a busy system that 3180 * merging is already done. Ditto for a busy system that
3178 * has other work pending, don't risk delaying until the 3181 * has other work pending, don't risk delaying until the
3179 * idle timer unplug to continue working. 3182 * idle timer unplug to continue working.
3180 */ 3183 */
3181 if (cfq_cfqq_wait_request(cfqq)) { 3184 if (cfq_cfqq_wait_request(cfqq)) {
3182 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || 3185 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
3183 cfqd->busy_queues > 1) { 3186 cfqd->busy_queues > 1) {
3184 del_timer(&cfqd->idle_slice_timer); 3187 del_timer(&cfqd->idle_slice_timer);
3185 cfq_clear_cfqq_wait_request(cfqq); 3188 cfq_clear_cfqq_wait_request(cfqq);
3186 __blk_run_queue(cfqd->queue); 3189 __blk_run_queue(cfqd->queue);
3187 } else 3190 } else
3188 cfq_mark_cfqq_must_dispatch(cfqq); 3191 cfq_mark_cfqq_must_dispatch(cfqq);
3189 } 3192 }
3190 } else if (cfq_should_preempt(cfqd, cfqq, rq)) { 3193 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
3191 /* 3194 /*
3192 * not the active queue - expire current slice if it is 3195 * not the active queue - expire current slice if it is
3193 * idle and has expired it's mean thinktime or this new queue 3196 * idle and has expired it's mean thinktime or this new queue
3194 * has some old slice time left and is of higher priority or 3197 * has some old slice time left and is of higher priority or
3195 * this new queue is RT and the current one is BE 3198 * this new queue is RT and the current one is BE
3196 */ 3199 */
3197 cfq_preempt_queue(cfqd, cfqq); 3200 cfq_preempt_queue(cfqd, cfqq);
3198 __blk_run_queue(cfqd->queue); 3201 __blk_run_queue(cfqd->queue);
3199 } 3202 }
3200 } 3203 }
3201 3204
3202 static void cfq_insert_request(struct request_queue *q, struct request *rq) 3205 static void cfq_insert_request(struct request_queue *q, struct request *rq)
3203 { 3206 {
3204 struct cfq_data *cfqd = q->elevator->elevator_data; 3207 struct cfq_data *cfqd = q->elevator->elevator_data;
3205 struct cfq_queue *cfqq = RQ_CFQQ(rq); 3208 struct cfq_queue *cfqq = RQ_CFQQ(rq);
3206 3209
3207 cfq_log_cfqq(cfqd, cfqq, "insert_request"); 3210 cfq_log_cfqq(cfqd, cfqq, "insert_request");
3208 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); 3211 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
3209 3212
3210 rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); 3213 rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
3211 list_add_tail(&rq->queuelist, &cfqq->fifo); 3214 list_add_tail(&rq->queuelist, &cfqq->fifo);
3212 cfq_add_rq_rb(rq); 3215 cfq_add_rq_rb(rq);
3213 3216
3214 cfq_rq_enqueued(cfqd, cfqq, rq); 3217 cfq_rq_enqueued(cfqd, cfqq, rq);
3215 } 3218 }
3216 3219
3217 /* 3220 /*
3218 * Update hw_tag based on peak queue depth over 50 samples under 3221 * Update hw_tag based on peak queue depth over 50 samples under
3219 * sufficient load. 3222 * sufficient load.
3220 */ 3223 */
3221 static void cfq_update_hw_tag(struct cfq_data *cfqd) 3224 static void cfq_update_hw_tag(struct cfq_data *cfqd)
3222 { 3225 {
3223 struct cfq_queue *cfqq = cfqd->active_queue; 3226 struct cfq_queue *cfqq = cfqd->active_queue;
3224 3227
3225 if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth) 3228 if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth)
3226 cfqd->hw_tag_est_depth = cfqd->rq_in_driver; 3229 cfqd->hw_tag_est_depth = cfqd->rq_in_driver;
3227 3230
3228 if (cfqd->hw_tag == 1) 3231 if (cfqd->hw_tag == 1)
3229 return; 3232 return;
3230 3233
3231 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && 3234 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
3232 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) 3235 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
3233 return; 3236 return;
3234 3237
3235 /* 3238 /*
3236 * If active queue hasn't enough requests and can idle, cfq might not 3239 * If active queue hasn't enough requests and can idle, cfq might not
3237 * dispatch sufficient requests to hardware. Don't zero hw_tag in this 3240 * dispatch sufficient requests to hardware. Don't zero hw_tag in this
3238 * case 3241 * case
3239 */ 3242 */
3240 if (cfqq && cfq_cfqq_idle_window(cfqq) && 3243 if (cfqq && cfq_cfqq_idle_window(cfqq) &&
3241 cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] < 3244 cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
3242 CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN) 3245 CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN)
3243 return; 3246 return;
3244 3247
3245 if (cfqd->hw_tag_samples++ < 50) 3248 if (cfqd->hw_tag_samples++ < 50)
3246 return; 3249 return;
3247 3250
3248 if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN) 3251 if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN)
3249 cfqd->hw_tag = 1; 3252 cfqd->hw_tag = 1;
3250 else 3253 else
3251 cfqd->hw_tag = 0; 3254 cfqd->hw_tag = 0;
3252 } 3255 }
3253 3256
3254 static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) 3257 static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3255 { 3258 {
3256 struct cfq_io_context *cic = cfqd->active_cic; 3259 struct cfq_io_context *cic = cfqd->active_cic;
3257 3260
3258 /* If there are other queues in the group, don't wait */ 3261 /* If there are other queues in the group, don't wait */
3259 if (cfqq->cfqg->nr_cfqq > 1) 3262 if (cfqq->cfqg->nr_cfqq > 1)
3260 return false; 3263 return false;
3261 3264
3262 if (cfq_slice_used(cfqq)) 3265 if (cfq_slice_used(cfqq))
3263 return true; 3266 return true;
3264 3267
3265 /* if slice left is less than think time, wait busy */ 3268 /* if slice left is less than think time, wait busy */
3266 if (cic && sample_valid(cic->ttime_samples) 3269 if (cic && sample_valid(cic->ttime_samples)
3267 && (cfqq->slice_end - jiffies < cic->ttime_mean)) 3270 && (cfqq->slice_end - jiffies < cic->ttime_mean))
3268 return true; 3271 return true;
3269 3272
3270 /* 3273 /*
3271 * If think times is less than a jiffy than ttime_mean=0 and above 3274 * If think times is less than a jiffy than ttime_mean=0 and above
3272 * will not be true. It might happen that slice has not expired yet 3275 * will not be true. It might happen that slice has not expired yet
3273 * but will expire soon (4-5 ns) during select_queue(). To cover the 3276 * but will expire soon (4-5 ns) during select_queue(). To cover the
3274 * case where think time is less than a jiffy, mark the queue wait 3277 * case where think time is less than a jiffy, mark the queue wait
3275 * busy if only 1 jiffy is left in the slice. 3278 * busy if only 1 jiffy is left in the slice.
3276 */ 3279 */
3277 if (cfqq->slice_end - jiffies == 1) 3280 if (cfqq->slice_end - jiffies == 1)
3278 return true; 3281 return true;
3279 3282
3280 return false; 3283 return false;
3281 } 3284 }
3282 3285
3283 static void cfq_completed_request(struct request_queue *q, struct request *rq) 3286 static void cfq_completed_request(struct request_queue *q, struct request *rq)
3284 { 3287 {
3285 struct cfq_queue *cfqq = RQ_CFQQ(rq); 3288 struct cfq_queue *cfqq = RQ_CFQQ(rq);
3286 struct cfq_data *cfqd = cfqq->cfqd; 3289 struct cfq_data *cfqd = cfqq->cfqd;
3287 const int sync = rq_is_sync(rq); 3290 const int sync = rq_is_sync(rq);
3288 unsigned long now; 3291 unsigned long now;
3289 3292
3290 now = jiffies; 3293 now = jiffies;
3291 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", !!rq_noidle(rq)); 3294 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", !!rq_noidle(rq));
3292 3295
3293 cfq_update_hw_tag(cfqd); 3296 cfq_update_hw_tag(cfqd);
3294 3297
3295 WARN_ON(!cfqd->rq_in_driver); 3298 WARN_ON(!cfqd->rq_in_driver);
3296 WARN_ON(!cfqq->dispatched); 3299 WARN_ON(!cfqq->dispatched);
3297 cfqd->rq_in_driver--; 3300 cfqd->rq_in_driver--;
3298 cfqq->dispatched--; 3301 cfqq->dispatched--;
3299 3302
3300 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; 3303 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
3301 3304
3302 if (sync) { 3305 if (sync) {
3303 RQ_CIC(rq)->last_end_request = now; 3306 RQ_CIC(rq)->last_end_request = now;
3304 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) 3307 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
3305 cfqd->last_delayed_sync = now; 3308 cfqd->last_delayed_sync = now;
3306 } 3309 }
3307 3310
3308 /* 3311 /*
3309 * If this is the active queue, check if it needs to be expired, 3312 * If this is the active queue, check if it needs to be expired,
3310 * or if we want to idle in case it has no pending requests. 3313 * or if we want to idle in case it has no pending requests.
3311 */ 3314 */
3312 if (cfqd->active_queue == cfqq) { 3315 if (cfqd->active_queue == cfqq) {
3313 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list); 3316 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
3314 3317
3315 if (cfq_cfqq_slice_new(cfqq)) { 3318 if (cfq_cfqq_slice_new(cfqq)) {
3316 cfq_set_prio_slice(cfqd, cfqq); 3319 cfq_set_prio_slice(cfqd, cfqq);
3317 cfq_clear_cfqq_slice_new(cfqq); 3320 cfq_clear_cfqq_slice_new(cfqq);
3318 } 3321 }
3319 3322
3320 /* 3323 /*
3321 * Should we wait for next request to come in before we expire 3324 * Should we wait for next request to come in before we expire
3322 * the queue. 3325 * the queue.
3323 */ 3326 */
3324 if (cfq_should_wait_busy(cfqd, cfqq)) { 3327 if (cfq_should_wait_busy(cfqd, cfqq)) {
3325 cfqq->slice_end = jiffies + cfqd->cfq_slice_idle; 3328 cfqq->slice_end = jiffies + cfqd->cfq_slice_idle;
3326 cfq_mark_cfqq_wait_busy(cfqq); 3329 cfq_mark_cfqq_wait_busy(cfqq);
3327 cfq_log_cfqq(cfqd, cfqq, "will busy wait"); 3330 cfq_log_cfqq(cfqd, cfqq, "will busy wait");
3328 } 3331 }
3329 3332
3330 /* 3333 /*
3331 * Idling is not enabled on: 3334 * Idling is not enabled on:
3332 * - expired queues 3335 * - expired queues
3333 * - idle-priority queues 3336 * - idle-priority queues
3334 * - async queues 3337 * - async queues
3335 * - queues with still some requests queued 3338 * - queues with still some requests queued
3336 * - when there is a close cooperator 3339 * - when there is a close cooperator
3337 */ 3340 */
3338 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) 3341 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
3339 cfq_slice_expired(cfqd, 1); 3342 cfq_slice_expired(cfqd, 1);
3340 else if (sync && cfqq_empty && 3343 else if (sync && cfqq_empty &&
3341 !cfq_close_cooperator(cfqd, cfqq)) { 3344 !cfq_close_cooperator(cfqd, cfqq)) {
3342 cfqd->noidle_tree_requires_idle |= !rq_noidle(rq); 3345 cfqd->noidle_tree_requires_idle |= !rq_noidle(rq);
3343 /* 3346 /*
3344 * Idling is enabled for SYNC_WORKLOAD. 3347 * Idling is enabled for SYNC_WORKLOAD.
3345 * SYNC_NOIDLE_WORKLOAD idles at the end of the tree 3348 * SYNC_NOIDLE_WORKLOAD idles at the end of the tree
3346 * only if we processed at least one !rq_noidle request 3349 * only if we processed at least one !rq_noidle request
3347 */ 3350 */
3348 if (cfqd->serving_type == SYNC_WORKLOAD 3351 if (cfqd->serving_type == SYNC_WORKLOAD
3349 || cfqd->noidle_tree_requires_idle 3352 || cfqd->noidle_tree_requires_idle
3350 || cfqq->cfqg->nr_cfqq == 1) 3353 || cfqq->cfqg->nr_cfqq == 1)
3351 cfq_arm_slice_timer(cfqd); 3354 cfq_arm_slice_timer(cfqd);
3352 } 3355 }
3353 } 3356 }
3354 3357
3355 if (!cfqd->rq_in_driver) 3358 if (!cfqd->rq_in_driver)
3356 cfq_schedule_dispatch(cfqd); 3359 cfq_schedule_dispatch(cfqd);
3357 } 3360 }
3358 3361
3359 /* 3362 /*
3360 * we temporarily boost lower priority queues if they are holding fs exclusive 3363 * we temporarily boost lower priority queues if they are holding fs exclusive
3361 * resources. they are boosted to normal prio (CLASS_BE/4) 3364 * resources. they are boosted to normal prio (CLASS_BE/4)
3362 */ 3365 */
3363 static void cfq_prio_boost(struct cfq_queue *cfqq) 3366 static void cfq_prio_boost(struct cfq_queue *cfqq)
3364 { 3367 {
3365 if (has_fs_excl()) { 3368 if (has_fs_excl()) {
3366 /* 3369 /*
3367 * boost idle prio on transactions that would lock out other 3370 * boost idle prio on transactions that would lock out other
3368 * users of the filesystem 3371 * users of the filesystem
3369 */ 3372 */
3370 if (cfq_class_idle(cfqq)) 3373 if (cfq_class_idle(cfqq))
3371 cfqq->ioprio_class = IOPRIO_CLASS_BE; 3374 cfqq->ioprio_class = IOPRIO_CLASS_BE;
3372 if (cfqq->ioprio > IOPRIO_NORM) 3375 if (cfqq->ioprio > IOPRIO_NORM)
3373 cfqq->ioprio = IOPRIO_NORM; 3376 cfqq->ioprio = IOPRIO_NORM;
3374 } else { 3377 } else {
3375 /* 3378 /*
3376 * unboost the queue (if needed) 3379 * unboost the queue (if needed)
3377 */ 3380 */
3378 cfqq->ioprio_class = cfqq->org_ioprio_class; 3381 cfqq->ioprio_class = cfqq->org_ioprio_class;
3379 cfqq->ioprio = cfqq->org_ioprio; 3382 cfqq->ioprio = cfqq->org_ioprio;
3380 } 3383 }
3381 } 3384 }
3382 3385
3383 static inline int __cfq_may_queue(struct cfq_queue *cfqq) 3386 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
3384 { 3387 {
3385 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { 3388 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
3386 cfq_mark_cfqq_must_alloc_slice(cfqq); 3389 cfq_mark_cfqq_must_alloc_slice(cfqq);
3387 return ELV_MQUEUE_MUST; 3390 return ELV_MQUEUE_MUST;
3388 } 3391 }
3389 3392
3390 return ELV_MQUEUE_MAY; 3393 return ELV_MQUEUE_MAY;
3391 } 3394 }
3392 3395
3393 static int cfq_may_queue(struct request_queue *q, int rw) 3396 static int cfq_may_queue(struct request_queue *q, int rw)
3394 { 3397 {
3395 struct cfq_data *cfqd = q->elevator->elevator_data; 3398 struct cfq_data *cfqd = q->elevator->elevator_data;
3396 struct task_struct *tsk = current; 3399 struct task_struct *tsk = current;
3397 struct cfq_io_context *cic; 3400 struct cfq_io_context *cic;
3398 struct cfq_queue *cfqq; 3401 struct cfq_queue *cfqq;
3399 3402
3400 /* 3403 /*
3401 * don't force setup of a queue from here, as a call to may_queue 3404 * don't force setup of a queue from here, as a call to may_queue
3402 * does not necessarily imply that a request actually will be queued. 3405 * does not necessarily imply that a request actually will be queued.
3403 * so just lookup a possibly existing queue, or return 'may queue' 3406 * so just lookup a possibly existing queue, or return 'may queue'
3404 * if that fails 3407 * if that fails
3405 */ 3408 */
3406 cic = cfq_cic_lookup(cfqd, tsk->io_context); 3409 cic = cfq_cic_lookup(cfqd, tsk->io_context);
3407 if (!cic) 3410 if (!cic)
3408 return ELV_MQUEUE_MAY; 3411 return ELV_MQUEUE_MAY;
3409 3412
3410 cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); 3413 cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
3411 if (cfqq) { 3414 if (cfqq) {
3412 cfq_init_prio_data(cfqq, cic->ioc); 3415 cfq_init_prio_data(cfqq, cic->ioc);
3413 cfq_prio_boost(cfqq); 3416 cfq_prio_boost(cfqq);
3414 3417
3415 return __cfq_may_queue(cfqq); 3418 return __cfq_may_queue(cfqq);
3416 } 3419 }
3417 3420
3418 return ELV_MQUEUE_MAY; 3421 return ELV_MQUEUE_MAY;
3419 } 3422 }
3420 3423
3421 /* 3424 /*
3422 * queue lock held here 3425 * queue lock held here
3423 */ 3426 */
3424 static void cfq_put_request(struct request *rq) 3427 static void cfq_put_request(struct request *rq)
3425 { 3428 {
3426 struct cfq_queue *cfqq = RQ_CFQQ(rq); 3429 struct cfq_queue *cfqq = RQ_CFQQ(rq);
3427 3430
3428 if (cfqq) { 3431 if (cfqq) {
3429 const int rw = rq_data_dir(rq); 3432 const int rw = rq_data_dir(rq);
3430 3433
3431 BUG_ON(!cfqq->allocated[rw]); 3434 BUG_ON(!cfqq->allocated[rw]);
3432 cfqq->allocated[rw]--; 3435 cfqq->allocated[rw]--;
3433 3436
3434 put_io_context(RQ_CIC(rq)->ioc); 3437 put_io_context(RQ_CIC(rq)->ioc);
3435 3438
3436 rq->elevator_private = NULL; 3439 rq->elevator_private = NULL;
3437 rq->elevator_private2 = NULL; 3440 rq->elevator_private2 = NULL;
3438 3441
3439 cfq_put_queue(cfqq); 3442 cfq_put_queue(cfqq);
3440 } 3443 }
3441 } 3444 }
3442 3445
3443 static struct cfq_queue * 3446 static struct cfq_queue *
3444 cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic, 3447 cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
3445 struct cfq_queue *cfqq) 3448 struct cfq_queue *cfqq)
3446 { 3449 {
3447 cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq); 3450 cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
3448 cic_set_cfqq(cic, cfqq->new_cfqq, 1); 3451 cic_set_cfqq(cic, cfqq->new_cfqq, 1);
3449 cfq_mark_cfqq_coop(cfqq->new_cfqq); 3452 cfq_mark_cfqq_coop(cfqq->new_cfqq);
3450 cfq_put_queue(cfqq); 3453 cfq_put_queue(cfqq);
3451 return cic_to_cfqq(cic, 1); 3454 return cic_to_cfqq(cic, 1);
3452 } 3455 }
3453 3456
3454 /* 3457 /*
3455 * Returns NULL if a new cfqq should be allocated, or the old cfqq if this 3458 * Returns NULL if a new cfqq should be allocated, or the old cfqq if this
3456 * was the last process referring to said cfqq. 3459 * was the last process referring to said cfqq.
3457 */ 3460 */
3458 static struct cfq_queue * 3461 static struct cfq_queue *
3459 split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq) 3462 split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
3460 { 3463 {
3461 if (cfqq_process_refs(cfqq) == 1) { 3464 if (cfqq_process_refs(cfqq) == 1) {
3462 cfqq->pid = current->pid; 3465 cfqq->pid = current->pid;
3463 cfq_clear_cfqq_coop(cfqq); 3466 cfq_clear_cfqq_coop(cfqq);
3464 cfq_clear_cfqq_split_coop(cfqq); 3467 cfq_clear_cfqq_split_coop(cfqq);
3465 return cfqq; 3468 return cfqq;
3466 } 3469 }
3467 3470
3468 cic_set_cfqq(cic, NULL, 1); 3471 cic_set_cfqq(cic, NULL, 1);
3469 cfq_put_queue(cfqq); 3472 cfq_put_queue(cfqq);
3470 return NULL; 3473 return NULL;
3471 } 3474 }
3472 /* 3475 /*
3473 * Allocate cfq data structures associated with this request. 3476 * Allocate cfq data structures associated with this request.
3474 */ 3477 */
3475 static int 3478 static int
3476 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 3479 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
3477 { 3480 {
3478 struct cfq_data *cfqd = q->elevator->elevator_data; 3481 struct cfq_data *cfqd = q->elevator->elevator_data;
3479 struct cfq_io_context *cic; 3482 struct cfq_io_context *cic;
3480 const int rw = rq_data_dir(rq); 3483 const int rw = rq_data_dir(rq);
3481 const bool is_sync = rq_is_sync(rq); 3484 const bool is_sync = rq_is_sync(rq);
3482 struct cfq_queue *cfqq; 3485 struct cfq_queue *cfqq;
3483 unsigned long flags; 3486 unsigned long flags;
3484 3487
3485 might_sleep_if(gfp_mask & __GFP_WAIT); 3488 might_sleep_if(gfp_mask & __GFP_WAIT);
3486 3489
3487 cic = cfq_get_io_context(cfqd, gfp_mask); 3490 cic = cfq_get_io_context(cfqd, gfp_mask);
3488 3491
3489 spin_lock_irqsave(q->queue_lock, flags); 3492 spin_lock_irqsave(q->queue_lock, flags);
3490 3493
3491 if (!cic) 3494 if (!cic)
3492 goto queue_fail; 3495 goto queue_fail;
3493 3496
3494 new_queue: 3497 new_queue:
3495 cfqq = cic_to_cfqq(cic, is_sync); 3498 cfqq = cic_to_cfqq(cic, is_sync);
3496 if (!cfqq || cfqq == &cfqd->oom_cfqq) { 3499 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
3497 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); 3500 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
3498 cic_set_cfqq(cic, cfqq, is_sync); 3501 cic_set_cfqq(cic, cfqq, is_sync);
3499 } else { 3502 } else {
3500 /* 3503 /*
3501 * If the queue was seeky for too long, break it apart. 3504 * If the queue was seeky for too long, break it apart.
3502 */ 3505 */
3503 if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) { 3506 if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) {
3504 cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq"); 3507 cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
3505 cfqq = split_cfqq(cic, cfqq); 3508 cfqq = split_cfqq(cic, cfqq);
3506 if (!cfqq) 3509 if (!cfqq)
3507 goto new_queue; 3510 goto new_queue;
3508 } 3511 }
3509 3512
3510 /* 3513 /*
3511 * Check to see if this queue is scheduled to merge with 3514 * Check to see if this queue is scheduled to merge with
3512 * another, closely cooperating queue. The merging of 3515 * another, closely cooperating queue. The merging of
3513 * queues happens here as it must be done in process context. 3516 * queues happens here as it must be done in process context.
3514 * The reference on new_cfqq was taken in merge_cfqqs. 3517 * The reference on new_cfqq was taken in merge_cfqqs.
3515 */ 3518 */
3516 if (cfqq->new_cfqq) 3519 if (cfqq->new_cfqq)
3517 cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq); 3520 cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq);
3518 } 3521 }
3519 3522
3520 cfqq->allocated[rw]++; 3523 cfqq->allocated[rw]++;
3521 atomic_inc(&cfqq->ref); 3524 atomic_inc(&cfqq->ref);
3522 3525
3523 spin_unlock_irqrestore(q->queue_lock, flags); 3526 spin_unlock_irqrestore(q->queue_lock, flags);
3524 3527
3525 rq->elevator_private = cic; 3528 rq->elevator_private = cic;
3526 rq->elevator_private2 = cfqq; 3529 rq->elevator_private2 = cfqq;
3527 return 0; 3530 return 0;
3528 3531
3529 queue_fail: 3532 queue_fail:
3530 if (cic) 3533 if (cic)
3531 put_io_context(cic->ioc); 3534 put_io_context(cic->ioc);
3532 3535
3533 cfq_schedule_dispatch(cfqd); 3536 cfq_schedule_dispatch(cfqd);
3534 spin_unlock_irqrestore(q->queue_lock, flags); 3537 spin_unlock_irqrestore(q->queue_lock, flags);
3535 cfq_log(cfqd, "set_request fail"); 3538 cfq_log(cfqd, "set_request fail");
3536 return 1; 3539 return 1;
3537 } 3540 }
3538 3541
3539 static void cfq_kick_queue(struct work_struct *work) 3542 static void cfq_kick_queue(struct work_struct *work)
3540 { 3543 {
3541 struct cfq_data *cfqd = 3544 struct cfq_data *cfqd =
3542 container_of(work, struct cfq_data, unplug_work); 3545 container_of(work, struct cfq_data, unplug_work);
3543 struct request_queue *q = cfqd->queue; 3546 struct request_queue *q = cfqd->queue;
3544 3547
3545 spin_lock_irq(q->queue_lock); 3548 spin_lock_irq(q->queue_lock);
3546 __blk_run_queue(cfqd->queue); 3549 __blk_run_queue(cfqd->queue);
3547 spin_unlock_irq(q->queue_lock); 3550 spin_unlock_irq(q->queue_lock);
3548 } 3551 }
3549 3552
3550 /* 3553 /*
3551 * Timer running if the active_queue is currently idling inside its time slice 3554 * Timer running if the active_queue is currently idling inside its time slice
3552 */ 3555 */
3553 static void cfq_idle_slice_timer(unsigned long data) 3556 static void cfq_idle_slice_timer(unsigned long data)
3554 { 3557 {
3555 struct cfq_data *cfqd = (struct cfq_data *) data; 3558 struct cfq_data *cfqd = (struct cfq_data *) data;
3556 struct cfq_queue *cfqq; 3559 struct cfq_queue *cfqq;
3557 unsigned long flags; 3560 unsigned long flags;
3558 int timed_out = 1; 3561 int timed_out = 1;
3559 3562
3560 cfq_log(cfqd, "idle timer fired"); 3563 cfq_log(cfqd, "idle timer fired");
3561 3564
3562 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 3565 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
3563 3566
3564 cfqq = cfqd->active_queue; 3567 cfqq = cfqd->active_queue;
3565 if (cfqq) { 3568 if (cfqq) {
3566 timed_out = 0; 3569 timed_out = 0;
3567 3570
3568 /* 3571 /*
3569 * We saw a request before the queue expired, let it through 3572 * We saw a request before the queue expired, let it through
3570 */ 3573 */
3571 if (cfq_cfqq_must_dispatch(cfqq)) 3574 if (cfq_cfqq_must_dispatch(cfqq))
3572 goto out_kick; 3575 goto out_kick;
3573 3576
3574 /* 3577 /*
3575 * expired 3578 * expired
3576 */ 3579 */
3577 if (cfq_slice_used(cfqq)) 3580 if (cfq_slice_used(cfqq))
3578 goto expire; 3581 goto expire;
3579 3582
3580 /* 3583 /*
3581 * only expire and reinvoke request handler, if there are 3584 * only expire and reinvoke request handler, if there are
3582 * other queues with pending requests 3585 * other queues with pending requests
3583 */ 3586 */
3584 if (!cfqd->busy_queues) 3587 if (!cfqd->busy_queues)
3585 goto out_cont; 3588 goto out_cont;
3586 3589
3587 /* 3590 /*
3588 * not expired and it has a request pending, let it dispatch 3591 * not expired and it has a request pending, let it dispatch
3589 */ 3592 */
3590 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 3593 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
3591 goto out_kick; 3594 goto out_kick;
3592 3595
3593 /* 3596 /*
3594 * Queue depth flag is reset only when the idle didn't succeed 3597 * Queue depth flag is reset only when the idle didn't succeed
3595 */ 3598 */
3596 cfq_clear_cfqq_deep(cfqq); 3599 cfq_clear_cfqq_deep(cfqq);
3597 } 3600 }
3598 expire: 3601 expire:
3599 cfq_slice_expired(cfqd, timed_out); 3602 cfq_slice_expired(cfqd, timed_out);
3600 out_kick: 3603 out_kick:
3601 cfq_schedule_dispatch(cfqd); 3604 cfq_schedule_dispatch(cfqd);
3602 out_cont: 3605 out_cont:
3603 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 3606 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
3604 } 3607 }
3605 3608
3606 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 3609 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
3607 { 3610 {
3608 del_timer_sync(&cfqd->idle_slice_timer); 3611 del_timer_sync(&cfqd->idle_slice_timer);
3609 cancel_work_sync(&cfqd->unplug_work); 3612 cancel_work_sync(&cfqd->unplug_work);
3610 } 3613 }
3611 3614
3612 static void cfq_put_async_queues(struct cfq_data *cfqd) 3615 static void cfq_put_async_queues(struct cfq_data *cfqd)
3613 { 3616 {
3614 int i; 3617 int i;
3615 3618
3616 for (i = 0; i < IOPRIO_BE_NR; i++) { 3619 for (i = 0; i < IOPRIO_BE_NR; i++) {
3617 if (cfqd->async_cfqq[0][i]) 3620 if (cfqd->async_cfqq[0][i])
3618 cfq_put_queue(cfqd->async_cfqq[0][i]); 3621 cfq_put_queue(cfqd->async_cfqq[0][i]);
3619 if (cfqd->async_cfqq[1][i]) 3622 if (cfqd->async_cfqq[1][i])
3620 cfq_put_queue(cfqd->async_cfqq[1][i]); 3623 cfq_put_queue(cfqd->async_cfqq[1][i]);
3621 } 3624 }
3622 3625
3623 if (cfqd->async_idle_cfqq) 3626 if (cfqd->async_idle_cfqq)
3624 cfq_put_queue(cfqd->async_idle_cfqq); 3627 cfq_put_queue(cfqd->async_idle_cfqq);
3625 } 3628 }
3626 3629
3627 static void cfq_cfqd_free(struct rcu_head *head) 3630 static void cfq_cfqd_free(struct rcu_head *head)
3628 { 3631 {
3629 kfree(container_of(head, struct cfq_data, rcu)); 3632 kfree(container_of(head, struct cfq_data, rcu));
3630 } 3633 }
3631 3634
3632 static void cfq_exit_queue(struct elevator_queue *e) 3635 static void cfq_exit_queue(struct elevator_queue *e)
3633 { 3636 {
3634 struct cfq_data *cfqd = e->elevator_data; 3637 struct cfq_data *cfqd = e->elevator_data;
3635 struct request_queue *q = cfqd->queue; 3638 struct request_queue *q = cfqd->queue;
3636 3639
3637 cfq_shutdown_timer_wq(cfqd); 3640 cfq_shutdown_timer_wq(cfqd);
3638 3641
3639 spin_lock_irq(q->queue_lock); 3642 spin_lock_irq(q->queue_lock);
3640 3643
3641 if (cfqd->active_queue) 3644 if (cfqd->active_queue)
3642 __cfq_slice_expired(cfqd, cfqd->active_queue, 0); 3645 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
3643 3646
3644 while (!list_empty(&cfqd->cic_list)) { 3647 while (!list_empty(&cfqd->cic_list)) {
3645 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 3648 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
3646 struct cfq_io_context, 3649 struct cfq_io_context,
3647 queue_list); 3650 queue_list);
3648 3651
3649 __cfq_exit_single_io_context(cfqd, cic); 3652 __cfq_exit_single_io_context(cfqd, cic);
3650 } 3653 }
3651 3654
3652 cfq_put_async_queues(cfqd); 3655 cfq_put_async_queues(cfqd);
3653 cfq_release_cfq_groups(cfqd); 3656 cfq_release_cfq_groups(cfqd);
3654 blkiocg_del_blkio_group(&cfqd->root_group.blkg); 3657 blkiocg_del_blkio_group(&cfqd->root_group.blkg);
3655 3658
3656 spin_unlock_irq(q->queue_lock); 3659 spin_unlock_irq(q->queue_lock);
3657 3660
3658 cfq_shutdown_timer_wq(cfqd); 3661 cfq_shutdown_timer_wq(cfqd);
3659 3662
3660 /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ 3663 /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
3661 call_rcu(&cfqd->rcu, cfq_cfqd_free); 3664 call_rcu(&cfqd->rcu, cfq_cfqd_free);
3662 } 3665 }
3663 3666
3664 static void *cfq_init_queue(struct request_queue *q) 3667 static void *cfq_init_queue(struct request_queue *q)
3665 { 3668 {
3666 struct cfq_data *cfqd; 3669 struct cfq_data *cfqd;
3667 int i, j; 3670 int i, j;
3668 struct cfq_group *cfqg; 3671 struct cfq_group *cfqg;
3669 struct cfq_rb_root *st; 3672 struct cfq_rb_root *st;
3670 3673
3671 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 3674 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
3672 if (!cfqd) 3675 if (!cfqd)
3673 return NULL; 3676 return NULL;
3674 3677
3675 /* Init root service tree */ 3678 /* Init root service tree */
3676 cfqd->grp_service_tree = CFQ_RB_ROOT; 3679 cfqd->grp_service_tree = CFQ_RB_ROOT;
3677 3680
3678 /* Init root group */ 3681 /* Init root group */
3679 cfqg = &cfqd->root_group; 3682 cfqg = &cfqd->root_group;
3680 for_each_cfqg_st(cfqg, i, j, st) 3683 for_each_cfqg_st(cfqg, i, j, st)
3681 *st = CFQ_RB_ROOT; 3684 *st = CFQ_RB_ROOT;
3682 RB_CLEAR_NODE(&cfqg->rb_node); 3685 RB_CLEAR_NODE(&cfqg->rb_node);
3683 3686
3684 /* Give preference to root group over other groups */ 3687 /* Give preference to root group over other groups */
3685 cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT; 3688 cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
3686 3689
3687 #ifdef CONFIG_CFQ_GROUP_IOSCHED 3690 #ifdef CONFIG_CFQ_GROUP_IOSCHED
3688 /* 3691 /*
3689 * Take a reference to root group which we never drop. This is just 3692 * Take a reference to root group which we never drop. This is just
3690 * to make sure that cfq_put_cfqg() does not try to kfree root group 3693 * to make sure that cfq_put_cfqg() does not try to kfree root group
3691 */ 3694 */
3692 atomic_set(&cfqg->ref, 1); 3695 atomic_set(&cfqg->ref, 1);
3693 blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd, 3696 blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd,
3694 0); 3697 0);
3695 #endif 3698 #endif
3696 /* 3699 /*
3697 * Not strictly needed (since RB_ROOT just clears the node and we 3700 * Not strictly needed (since RB_ROOT just clears the node and we
3698 * zeroed cfqd on alloc), but better be safe in case someone decides 3701 * zeroed cfqd on alloc), but better be safe in case someone decides
3699 * to add magic to the rb code 3702 * to add magic to the rb code
3700 */ 3703 */
3701 for (i = 0; i < CFQ_PRIO_LISTS; i++) 3704 for (i = 0; i < CFQ_PRIO_LISTS; i++)
3702 cfqd->prio_trees[i] = RB_ROOT; 3705 cfqd->prio_trees[i] = RB_ROOT;
3703 3706
3704 /* 3707 /*
3705 * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues. 3708 * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
3706 * Grab a permanent reference to it, so that the normal code flow 3709 * Grab a permanent reference to it, so that the normal code flow
3707 * will not attempt to free it. 3710 * will not attempt to free it.
3708 */ 3711 */
3709 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); 3712 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
3710 atomic_inc(&cfqd->oom_cfqq.ref); 3713 atomic_inc(&cfqd->oom_cfqq.ref);
3711 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); 3714 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
3712 3715
3713 INIT_LIST_HEAD(&cfqd->cic_list); 3716 INIT_LIST_HEAD(&cfqd->cic_list);
3714 3717
3715 cfqd->queue = q; 3718 cfqd->queue = q;
3716 3719
3717 init_timer(&cfqd->idle_slice_timer); 3720 init_timer(&cfqd->idle_slice_timer);
3718 cfqd->idle_slice_timer.function = cfq_idle_slice_timer; 3721 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
3719 cfqd->idle_slice_timer.data = (unsigned long) cfqd; 3722 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
3720 3723
3721 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); 3724 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
3722 3725
3723 cfqd->cfq_quantum = cfq_quantum; 3726 cfqd->cfq_quantum = cfq_quantum;
3724 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 3727 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
3725 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 3728 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
3726 cfqd->cfq_back_max = cfq_back_max; 3729 cfqd->cfq_back_max = cfq_back_max;
3727 cfqd->cfq_back_penalty = cfq_back_penalty; 3730 cfqd->cfq_back_penalty = cfq_back_penalty;
3728 cfqd->cfq_slice[0] = cfq_slice_async; 3731 cfqd->cfq_slice[0] = cfq_slice_async;
3729 cfqd->cfq_slice[1] = cfq_slice_sync; 3732 cfqd->cfq_slice[1] = cfq_slice_sync;
3730 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 3733 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
3731 cfqd->cfq_slice_idle = cfq_slice_idle; 3734 cfqd->cfq_slice_idle = cfq_slice_idle;
3732 cfqd->cfq_latency = 1; 3735 cfqd->cfq_latency = 1;
3733 cfqd->cfq_group_isolation = 0; 3736 cfqd->cfq_group_isolation = 0;
3734 cfqd->hw_tag = -1; 3737 cfqd->hw_tag = -1;
3735 /* 3738 /*
3736 * we optimistically start assuming sync ops weren't delayed in last 3739 * we optimistically start assuming sync ops weren't delayed in last
3737 * second, in order to have larger depth for async operations. 3740 * second, in order to have larger depth for async operations.
3738 */ 3741 */
3739 cfqd->last_delayed_sync = jiffies - HZ; 3742 cfqd->last_delayed_sync = jiffies - HZ;
3740 INIT_RCU_HEAD(&cfqd->rcu); 3743 INIT_RCU_HEAD(&cfqd->rcu);
3741 return cfqd; 3744 return cfqd;
3742 } 3745 }
3743 3746
3744 static void cfq_slab_kill(void) 3747 static void cfq_slab_kill(void)
3745 { 3748 {
3746 /* 3749 /*
3747 * Caller already ensured that pending RCU callbacks are completed, 3750 * Caller already ensured that pending RCU callbacks are completed,
3748 * so we should have no busy allocations at this point. 3751 * so we should have no busy allocations at this point.
3749 */ 3752 */
3750 if (cfq_pool) 3753 if (cfq_pool)
3751 kmem_cache_destroy(cfq_pool); 3754 kmem_cache_destroy(cfq_pool);
3752 if (cfq_ioc_pool) 3755 if (cfq_ioc_pool)
3753 kmem_cache_destroy(cfq_ioc_pool); 3756 kmem_cache_destroy(cfq_ioc_pool);
3754 } 3757 }
3755 3758
3756 static int __init cfq_slab_setup(void) 3759 static int __init cfq_slab_setup(void)
3757 { 3760 {
3758 cfq_pool = KMEM_CACHE(cfq_queue, 0); 3761 cfq_pool = KMEM_CACHE(cfq_queue, 0);
3759 if (!cfq_pool) 3762 if (!cfq_pool)
3760 goto fail; 3763 goto fail;
3761 3764
3762 cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); 3765 cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0);
3763 if (!cfq_ioc_pool) 3766 if (!cfq_ioc_pool)
3764 goto fail; 3767 goto fail;
3765 3768
3766 return 0; 3769 return 0;
3767 fail: 3770 fail:
3768 cfq_slab_kill(); 3771 cfq_slab_kill();
3769 return -ENOMEM; 3772 return -ENOMEM;
3770 } 3773 }
3771 3774
3772 /* 3775 /*
3773 * sysfs parts below --> 3776 * sysfs parts below -->
3774 */ 3777 */
3775 static ssize_t 3778 static ssize_t
3776 cfq_var_show(unsigned int var, char *page) 3779 cfq_var_show(unsigned int var, char *page)
3777 { 3780 {
3778 return sprintf(page, "%d\n", var); 3781 return sprintf(page, "%d\n", var);
3779 } 3782 }
3780 3783
3781 static ssize_t 3784 static ssize_t
3782 cfq_var_store(unsigned int *var, const char *page, size_t count) 3785 cfq_var_store(unsigned int *var, const char *page, size_t count)
3783 { 3786 {
3784 char *p = (char *) page; 3787 char *p = (char *) page;
3785 3788
3786 *var = simple_strtoul(p, &p, 10); 3789 *var = simple_strtoul(p, &p, 10);
3787 return count; 3790 return count;
3788 } 3791 }
3789 3792
3790 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 3793 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
3791 static ssize_t __FUNC(struct elevator_queue *e, char *page) \ 3794 static ssize_t __FUNC(struct elevator_queue *e, char *page) \
3792 { \ 3795 { \
3793 struct cfq_data *cfqd = e->elevator_data; \ 3796 struct cfq_data *cfqd = e->elevator_data; \
3794 unsigned int __data = __VAR; \ 3797 unsigned int __data = __VAR; \
3795 if (__CONV) \ 3798 if (__CONV) \
3796 __data = jiffies_to_msecs(__data); \ 3799 __data = jiffies_to_msecs(__data); \
3797 return cfq_var_show(__data, (page)); \ 3800 return cfq_var_show(__data, (page)); \
3798 } 3801 }
3799 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 3802 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
3800 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 3803 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
3801 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 3804 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
3802 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); 3805 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
3803 SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); 3806 SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
3804 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); 3807 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
3805 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 3808 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
3806 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 3809 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
3807 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 3810 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
3808 SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); 3811 SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
3809 SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0); 3812 SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0);
3810 #undef SHOW_FUNCTION 3813 #undef SHOW_FUNCTION
3811 3814
3812 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 3815 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
3813 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ 3816 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
3814 { \ 3817 { \
3815 struct cfq_data *cfqd = e->elevator_data; \ 3818 struct cfq_data *cfqd = e->elevator_data; \
3816 unsigned int __data; \ 3819 unsigned int __data; \
3817 int ret = cfq_var_store(&__data, (page), count); \ 3820 int ret = cfq_var_store(&__data, (page), count); \
3818 if (__data < (MIN)) \ 3821 if (__data < (MIN)) \
3819 __data = (MIN); \ 3822 __data = (MIN); \
3820 else if (__data > (MAX)) \ 3823 else if (__data > (MAX)) \
3821 __data = (MAX); \ 3824 __data = (MAX); \
3822 if (__CONV) \ 3825 if (__CONV) \
3823 *(__PTR) = msecs_to_jiffies(__data); \ 3826 *(__PTR) = msecs_to_jiffies(__data); \
3824 else \ 3827 else \
3825 *(__PTR) = __data; \ 3828 *(__PTR) = __data; \
3826 return ret; \ 3829 return ret; \
3827 } 3830 }
3828 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 3831 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
3829 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, 3832 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1,
3830 UINT_MAX, 1); 3833 UINT_MAX, 1);
3831 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, 3834 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1,
3832 UINT_MAX, 1); 3835 UINT_MAX, 1);
3833 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 3836 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
3834 STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, 3837 STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1,
3835 UINT_MAX, 0); 3838 UINT_MAX, 0);
3836 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); 3839 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
3837 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 3840 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
3838 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 3841 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
3839 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, 3842 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
3840 UINT_MAX, 0); 3843 UINT_MAX, 0);
3841 STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); 3844 STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
3842 STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0); 3845 STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0);
3843 #undef STORE_FUNCTION 3846 #undef STORE_FUNCTION
3844 3847
3845 #define CFQ_ATTR(name) \ 3848 #define CFQ_ATTR(name) \
3846 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) 3849 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
3847 3850
3848 static struct elv_fs_entry cfq_attrs[] = { 3851 static struct elv_fs_entry cfq_attrs[] = {
3849 CFQ_ATTR(quantum), 3852 CFQ_ATTR(quantum),
3850 CFQ_ATTR(fifo_expire_sync), 3853 CFQ_ATTR(fifo_expire_sync),
3851 CFQ_ATTR(fifo_expire_async), 3854 CFQ_ATTR(fifo_expire_async),
3852 CFQ_ATTR(back_seek_max), 3855 CFQ_ATTR(back_seek_max),
3853 CFQ_ATTR(back_seek_penalty), 3856 CFQ_ATTR(back_seek_penalty),
3854 CFQ_ATTR(slice_sync), 3857 CFQ_ATTR(slice_sync),
3855 CFQ_ATTR(slice_async), 3858 CFQ_ATTR(slice_async),
3856 CFQ_ATTR(slice_async_rq), 3859 CFQ_ATTR(slice_async_rq),
3857 CFQ_ATTR(slice_idle), 3860 CFQ_ATTR(slice_idle),
3858 CFQ_ATTR(low_latency), 3861 CFQ_ATTR(low_latency),
3859 CFQ_ATTR(group_isolation), 3862 CFQ_ATTR(group_isolation),
3860 __ATTR_NULL 3863 __ATTR_NULL
3861 }; 3864 };
3862 3865
3863 static struct elevator_type iosched_cfq = { 3866 static struct elevator_type iosched_cfq = {
3864 .ops = { 3867 .ops = {
3865 .elevator_merge_fn = cfq_merge, 3868 .elevator_merge_fn = cfq_merge,
3866 .elevator_merged_fn = cfq_merged_request, 3869 .elevator_merged_fn = cfq_merged_request,
3867 .elevator_merge_req_fn = cfq_merged_requests, 3870 .elevator_merge_req_fn = cfq_merged_requests,
3868 .elevator_allow_merge_fn = cfq_allow_merge, 3871 .elevator_allow_merge_fn = cfq_allow_merge,
3869 .elevator_dispatch_fn = cfq_dispatch_requests, 3872 .elevator_dispatch_fn = cfq_dispatch_requests,
3870 .elevator_add_req_fn = cfq_insert_request, 3873 .elevator_add_req_fn = cfq_insert_request,
3871 .elevator_activate_req_fn = cfq_activate_request, 3874 .elevator_activate_req_fn = cfq_activate_request,
3872 .elevator_deactivate_req_fn = cfq_deactivate_request, 3875 .elevator_deactivate_req_fn = cfq_deactivate_request,
3873 .elevator_queue_empty_fn = cfq_queue_empty, 3876 .elevator_queue_empty_fn = cfq_queue_empty,
3874 .elevator_completed_req_fn = cfq_completed_request, 3877 .elevator_completed_req_fn = cfq_completed_request,
3875 .elevator_former_req_fn = elv_rb_former_request, 3878 .elevator_former_req_fn = elv_rb_former_request,
3876 .elevator_latter_req_fn = elv_rb_latter_request, 3879 .elevator_latter_req_fn = elv_rb_latter_request,
3877 .elevator_set_req_fn = cfq_set_request, 3880 .elevator_set_req_fn = cfq_set_request,
3878 .elevator_put_req_fn = cfq_put_request, 3881 .elevator_put_req_fn = cfq_put_request,
3879 .elevator_may_queue_fn = cfq_may_queue, 3882 .elevator_may_queue_fn = cfq_may_queue,
3880 .elevator_init_fn = cfq_init_queue, 3883 .elevator_init_fn = cfq_init_queue,
3881 .elevator_exit_fn = cfq_exit_queue, 3884 .elevator_exit_fn = cfq_exit_queue,
3882 .trim = cfq_free_io_context, 3885 .trim = cfq_free_io_context,
3883 }, 3886 },
3884 .elevator_attrs = cfq_attrs, 3887 .elevator_attrs = cfq_attrs,
3885 .elevator_name = "cfq", 3888 .elevator_name = "cfq",
3886 .elevator_owner = THIS_MODULE, 3889 .elevator_owner = THIS_MODULE,
3887 }; 3890 };
3888 3891
3889 #ifdef CONFIG_CFQ_GROUP_IOSCHED 3892 #ifdef CONFIG_CFQ_GROUP_IOSCHED
3890 static struct blkio_policy_type blkio_policy_cfq = { 3893 static struct blkio_policy_type blkio_policy_cfq = {
3891 .ops = { 3894 .ops = {
3892 .blkio_unlink_group_fn = cfq_unlink_blkio_group, 3895 .blkio_unlink_group_fn = cfq_unlink_blkio_group,
3893 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, 3896 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
3894 }, 3897 },
3895 }; 3898 };
3896 #else 3899 #else
3897 static struct blkio_policy_type blkio_policy_cfq; 3900 static struct blkio_policy_type blkio_policy_cfq;
3898 #endif 3901 #endif
3899 3902
3900 static int __init cfq_init(void) 3903 static int __init cfq_init(void)
3901 { 3904 {
3902 /* 3905 /*
3903 * could be 0 on HZ < 1000 setups 3906 * could be 0 on HZ < 1000 setups
3904 */ 3907 */
3905 if (!cfq_slice_async) 3908 if (!cfq_slice_async)
3906 cfq_slice_async = 1; 3909 cfq_slice_async = 1;
3907 if (!cfq_slice_idle) 3910 if (!cfq_slice_idle)
3908 cfq_slice_idle = 1; 3911 cfq_slice_idle = 1;
3909 3912
3910 if (cfq_slab_setup()) 3913 if (cfq_slab_setup())
3911 return -ENOMEM; 3914 return -ENOMEM;
3912 3915
3913 elv_register(&iosched_cfq); 3916 elv_register(&iosched_cfq);
3914 blkio_policy_register(&blkio_policy_cfq); 3917 blkio_policy_register(&blkio_policy_cfq);
3915 3918
3916 return 0; 3919 return 0;
3917 } 3920 }
3918 3921
3919 static void __exit cfq_exit(void) 3922 static void __exit cfq_exit(void)
3920 { 3923 {
3921 DECLARE_COMPLETION_ONSTACK(all_gone); 3924 DECLARE_COMPLETION_ONSTACK(all_gone);
3922 blkio_policy_unregister(&blkio_policy_cfq); 3925 blkio_policy_unregister(&blkio_policy_cfq);
3923 elv_unregister(&iosched_cfq); 3926 elv_unregister(&iosched_cfq);
3924 ioc_gone = &all_gone; 3927 ioc_gone = &all_gone;
3925 /* ioc_gone's update must be visible before reading ioc_count */ 3928 /* ioc_gone's update must be visible before reading ioc_count */
3926 smp_wmb(); 3929 smp_wmb();
3927 3930
3928 /* 3931 /*
3929 * this also protects us from entering cfq_slab_kill() with 3932 * this also protects us from entering cfq_slab_kill() with
3930 * pending RCU callbacks 3933 * pending RCU callbacks
3931 */ 3934 */
3932 if (elv_ioc_count_read(cfq_ioc_count)) 3935 if (elv_ioc_count_read(cfq_ioc_count))
3933 wait_for_completion(&all_gone); 3936 wait_for_completion(&all_gone);
3934 cfq_slab_kill(); 3937 cfq_slab_kill();
3935 } 3938 }
3936 3939
3937 module_init(cfq_init); 3940 module_init(cfq_init);
3938 module_exit(cfq_exit); 3941 module_exit(cfq_exit);
3939 3942
3940 MODULE_AUTHOR("Jens Axboe"); 3943 MODULE_AUTHOR("Jens Axboe");
3941 MODULE_LICENSE("GPL"); 3944 MODULE_LICENSE("GPL");
3942 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); 3945 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");