Commit 02a8f01b5a9f396d0327977af4c232d0f94c45fd

Authored by Justin TerAvest
Committed by Jens Axboe
1 parent be2c6b1990

cfq-iosched: Don't wait if queue already has requests.

Commit 7667aa0630407bc07dc38dcc79d29cc0a65553c1 added logic to wait for
the last queue of the group to become busy (have at least one request),
so that the group does not lose out for not being continuously
backlogged. The commit did not check for the condition that the last
queue already has some requests. As a result, if the queue already has
requests, wait_busy is set. Later on, cfq_select_queue() checks the
flag, and decides that since the queue has a request now and wait_busy
is set, the queue is expired.  This results in early expiration of the
queue.

This patch fixes the problem by adding a check to see if queue already
has requests. If it does, wait_busy is not set. As a result, time slices
do not expire early.

The queues with more than one request are usually buffered writers.
Testing shows improvement in isolation between buffered writers.

Cc: stable@kernel.org
Signed-off-by: Justin TerAvest <teravest@google.com>
Reviewed-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

Showing 1 changed file with 4 additions and 0 deletions Inline Diff

1 /* 1 /*
2 * CFQ, or complete fairness queueing, disk scheduler. 2 * CFQ, or complete fairness queueing, disk scheduler.
3 * 3 *
4 * Based on ideas from a previously unfinished io 4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 * 6 *
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 */ 8 */
9 #include <linux/module.h> 9 #include <linux/module.h>
10 #include <linux/slab.h> 10 #include <linux/slab.h>
11 #include <linux/blkdev.h> 11 #include <linux/blkdev.h>
12 #include <linux/elevator.h> 12 #include <linux/elevator.h>
13 #include <linux/jiffies.h> 13 #include <linux/jiffies.h>
14 #include <linux/rbtree.h> 14 #include <linux/rbtree.h>
15 #include <linux/ioprio.h> 15 #include <linux/ioprio.h>
16 #include <linux/blktrace_api.h> 16 #include <linux/blktrace_api.h>
17 #include "cfq.h" 17 #include "cfq.h"
18 18
19 /* 19 /*
20 * tunables 20 * tunables
21 */ 21 */
22 /* max queue in one round of service */ 22 /* max queue in one round of service */
23 static const int cfq_quantum = 8; 23 static const int cfq_quantum = 8;
24 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 24 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
25 /* maximum backwards seek, in KiB */ 25 /* maximum backwards seek, in KiB */
26 static const int cfq_back_max = 16 * 1024; 26 static const int cfq_back_max = 16 * 1024;
27 /* penalty of a backwards seek */ 27 /* penalty of a backwards seek */
28 static const int cfq_back_penalty = 2; 28 static const int cfq_back_penalty = 2;
29 static const int cfq_slice_sync = HZ / 10; 29 static const int cfq_slice_sync = HZ / 10;
30 static int cfq_slice_async = HZ / 25; 30 static int cfq_slice_async = HZ / 25;
31 static const int cfq_slice_async_rq = 2; 31 static const int cfq_slice_async_rq = 2;
32 static int cfq_slice_idle = HZ / 125; 32 static int cfq_slice_idle = HZ / 125;
33 static int cfq_group_idle = HZ / 125; 33 static int cfq_group_idle = HZ / 125;
34 static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ 34 static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
35 static const int cfq_hist_divisor = 4; 35 static const int cfq_hist_divisor = 4;
36 36
37 /* 37 /*
38 * offset from end of service tree 38 * offset from end of service tree
39 */ 39 */
40 #define CFQ_IDLE_DELAY (HZ / 5) 40 #define CFQ_IDLE_DELAY (HZ / 5)
41 41
42 /* 42 /*
43 * below this threshold, we consider thinktime immediate 43 * below this threshold, we consider thinktime immediate
44 */ 44 */
45 #define CFQ_MIN_TT (2) 45 #define CFQ_MIN_TT (2)
46 46
47 #define CFQ_SLICE_SCALE (5) 47 #define CFQ_SLICE_SCALE (5)
48 #define CFQ_HW_QUEUE_MIN (5) 48 #define CFQ_HW_QUEUE_MIN (5)
49 #define CFQ_SERVICE_SHIFT 12 49 #define CFQ_SERVICE_SHIFT 12
50 50
51 #define CFQQ_SEEK_THR (sector_t)(8 * 100) 51 #define CFQQ_SEEK_THR (sector_t)(8 * 100)
52 #define CFQQ_CLOSE_THR (sector_t)(8 * 1024) 52 #define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
53 #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) 53 #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
54 #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) 54 #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
55 55
56 #define RQ_CIC(rq) \ 56 #define RQ_CIC(rq) \
57 ((struct cfq_io_context *) (rq)->elevator_private) 57 ((struct cfq_io_context *) (rq)->elevator_private)
58 #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) 58 #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
59 #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) 59 #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3)
60 60
61 static struct kmem_cache *cfq_pool; 61 static struct kmem_cache *cfq_pool;
62 static struct kmem_cache *cfq_ioc_pool; 62 static struct kmem_cache *cfq_ioc_pool;
63 63
64 static DEFINE_PER_CPU(unsigned long, cfq_ioc_count); 64 static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
65 static struct completion *ioc_gone; 65 static struct completion *ioc_gone;
66 static DEFINE_SPINLOCK(ioc_gone_lock); 66 static DEFINE_SPINLOCK(ioc_gone_lock);
67 67
68 static DEFINE_SPINLOCK(cic_index_lock); 68 static DEFINE_SPINLOCK(cic_index_lock);
69 static DEFINE_IDA(cic_index_ida); 69 static DEFINE_IDA(cic_index_ida);
70 70
71 #define CFQ_PRIO_LISTS IOPRIO_BE_NR 71 #define CFQ_PRIO_LISTS IOPRIO_BE_NR
72 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 72 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
73 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 73 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
74 74
75 #define sample_valid(samples) ((samples) > 80) 75 #define sample_valid(samples) ((samples) > 80)
76 #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) 76 #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
77 77
78 /* 78 /*
79 * Most of our rbtree usage is for sorting with min extraction, so 79 * Most of our rbtree usage is for sorting with min extraction, so
80 * if we cache the leftmost node we don't have to walk down the tree 80 * if we cache the leftmost node we don't have to walk down the tree
81 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should 81 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
82 * move this into the elevator for the rq sorting as well. 82 * move this into the elevator for the rq sorting as well.
83 */ 83 */
84 struct cfq_rb_root { 84 struct cfq_rb_root {
85 struct rb_root rb; 85 struct rb_root rb;
86 struct rb_node *left; 86 struct rb_node *left;
87 unsigned count; 87 unsigned count;
88 unsigned total_weight; 88 unsigned total_weight;
89 u64 min_vdisktime; 89 u64 min_vdisktime;
90 }; 90 };
91 #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ 91 #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
92 .count = 0, .min_vdisktime = 0, } 92 .count = 0, .min_vdisktime = 0, }
93 93
94 /* 94 /*
95 * Per process-grouping structure 95 * Per process-grouping structure
96 */ 96 */
97 struct cfq_queue { 97 struct cfq_queue {
98 /* reference count */ 98 /* reference count */
99 int ref; 99 int ref;
100 /* various state flags, see below */ 100 /* various state flags, see below */
101 unsigned int flags; 101 unsigned int flags;
102 /* parent cfq_data */ 102 /* parent cfq_data */
103 struct cfq_data *cfqd; 103 struct cfq_data *cfqd;
104 /* service_tree member */ 104 /* service_tree member */
105 struct rb_node rb_node; 105 struct rb_node rb_node;
106 /* service_tree key */ 106 /* service_tree key */
107 unsigned long rb_key; 107 unsigned long rb_key;
108 /* prio tree member */ 108 /* prio tree member */
109 struct rb_node p_node; 109 struct rb_node p_node;
110 /* prio tree root we belong to, if any */ 110 /* prio tree root we belong to, if any */
111 struct rb_root *p_root; 111 struct rb_root *p_root;
112 /* sorted list of pending requests */ 112 /* sorted list of pending requests */
113 struct rb_root sort_list; 113 struct rb_root sort_list;
114 /* if fifo isn't expired, next request to serve */ 114 /* if fifo isn't expired, next request to serve */
115 struct request *next_rq; 115 struct request *next_rq;
116 /* requests queued in sort_list */ 116 /* requests queued in sort_list */
117 int queued[2]; 117 int queued[2];
118 /* currently allocated requests */ 118 /* currently allocated requests */
119 int allocated[2]; 119 int allocated[2];
120 /* fifo list of requests in sort_list */ 120 /* fifo list of requests in sort_list */
121 struct list_head fifo; 121 struct list_head fifo;
122 122
123 /* time when queue got scheduled in to dispatch first request. */ 123 /* time when queue got scheduled in to dispatch first request. */
124 unsigned long dispatch_start; 124 unsigned long dispatch_start;
125 unsigned int allocated_slice; 125 unsigned int allocated_slice;
126 unsigned int slice_dispatch; 126 unsigned int slice_dispatch;
127 /* time when first request from queue completed and slice started. */ 127 /* time when first request from queue completed and slice started. */
128 unsigned long slice_start; 128 unsigned long slice_start;
129 unsigned long slice_end; 129 unsigned long slice_end;
130 long slice_resid; 130 long slice_resid;
131 131
132 /* pending metadata requests */ 132 /* pending metadata requests */
133 int meta_pending; 133 int meta_pending;
134 /* number of requests that are on the dispatch list or inside driver */ 134 /* number of requests that are on the dispatch list or inside driver */
135 int dispatched; 135 int dispatched;
136 136
137 /* io prio of this group */ 137 /* io prio of this group */
138 unsigned short ioprio, org_ioprio; 138 unsigned short ioprio, org_ioprio;
139 unsigned short ioprio_class, org_ioprio_class; 139 unsigned short ioprio_class, org_ioprio_class;
140 140
141 pid_t pid; 141 pid_t pid;
142 142
143 u32 seek_history; 143 u32 seek_history;
144 sector_t last_request_pos; 144 sector_t last_request_pos;
145 145
146 struct cfq_rb_root *service_tree; 146 struct cfq_rb_root *service_tree;
147 struct cfq_queue *new_cfqq; 147 struct cfq_queue *new_cfqq;
148 struct cfq_group *cfqg; 148 struct cfq_group *cfqg;
149 struct cfq_group *orig_cfqg; 149 struct cfq_group *orig_cfqg;
150 /* Number of sectors dispatched from queue in single dispatch round */ 150 /* Number of sectors dispatched from queue in single dispatch round */
151 unsigned long nr_sectors; 151 unsigned long nr_sectors;
152 }; 152 };
153 153
154 /* 154 /*
155 * First index in the service_trees. 155 * First index in the service_trees.
156 * IDLE is handled separately, so it has negative index 156 * IDLE is handled separately, so it has negative index
157 */ 157 */
158 enum wl_prio_t { 158 enum wl_prio_t {
159 BE_WORKLOAD = 0, 159 BE_WORKLOAD = 0,
160 RT_WORKLOAD = 1, 160 RT_WORKLOAD = 1,
161 IDLE_WORKLOAD = 2, 161 IDLE_WORKLOAD = 2,
162 CFQ_PRIO_NR, 162 CFQ_PRIO_NR,
163 }; 163 };
164 164
165 /* 165 /*
166 * Second index in the service_trees. 166 * Second index in the service_trees.
167 */ 167 */
168 enum wl_type_t { 168 enum wl_type_t {
169 ASYNC_WORKLOAD = 0, 169 ASYNC_WORKLOAD = 0,
170 SYNC_NOIDLE_WORKLOAD = 1, 170 SYNC_NOIDLE_WORKLOAD = 1,
171 SYNC_WORKLOAD = 2 171 SYNC_WORKLOAD = 2
172 }; 172 };
173 173
174 /* This is per cgroup per device grouping structure */ 174 /* This is per cgroup per device grouping structure */
175 struct cfq_group { 175 struct cfq_group {
176 /* group service_tree member */ 176 /* group service_tree member */
177 struct rb_node rb_node; 177 struct rb_node rb_node;
178 178
179 /* group service_tree key */ 179 /* group service_tree key */
180 u64 vdisktime; 180 u64 vdisktime;
181 unsigned int weight; 181 unsigned int weight;
182 182
183 /* number of cfqq currently on this group */ 183 /* number of cfqq currently on this group */
184 int nr_cfqq; 184 int nr_cfqq;
185 185
186 /* 186 /*
187 * Per group busy queus average. Useful for workload slice calc. We 187 * Per group busy queus average. Useful for workload slice calc. We
188 * create the array for each prio class but at run time it is used 188 * create the array for each prio class but at run time it is used
189 * only for RT and BE class and slot for IDLE class remains unused. 189 * only for RT and BE class and slot for IDLE class remains unused.
190 * This is primarily done to avoid confusion and a gcc warning. 190 * This is primarily done to avoid confusion and a gcc warning.
191 */ 191 */
192 unsigned int busy_queues_avg[CFQ_PRIO_NR]; 192 unsigned int busy_queues_avg[CFQ_PRIO_NR];
193 /* 193 /*
194 * rr lists of queues with requests. We maintain service trees for 194 * rr lists of queues with requests. We maintain service trees for
195 * RT and BE classes. These trees are subdivided in subclasses 195 * RT and BE classes. These trees are subdivided in subclasses
196 * of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE 196 * of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE
197 * class there is no subclassification and all the cfq queues go on 197 * class there is no subclassification and all the cfq queues go on
198 * a single tree service_tree_idle. 198 * a single tree service_tree_idle.
199 * Counts are embedded in the cfq_rb_root 199 * Counts are embedded in the cfq_rb_root
200 */ 200 */
201 struct cfq_rb_root service_trees[2][3]; 201 struct cfq_rb_root service_trees[2][3];
202 struct cfq_rb_root service_tree_idle; 202 struct cfq_rb_root service_tree_idle;
203 203
204 unsigned long saved_workload_slice; 204 unsigned long saved_workload_slice;
205 enum wl_type_t saved_workload; 205 enum wl_type_t saved_workload;
206 enum wl_prio_t saved_serving_prio; 206 enum wl_prio_t saved_serving_prio;
207 struct blkio_group blkg; 207 struct blkio_group blkg;
208 #ifdef CONFIG_CFQ_GROUP_IOSCHED 208 #ifdef CONFIG_CFQ_GROUP_IOSCHED
209 struct hlist_node cfqd_node; 209 struct hlist_node cfqd_node;
210 int ref; 210 int ref;
211 #endif 211 #endif
212 /* number of requests that are on the dispatch list or inside driver */ 212 /* number of requests that are on the dispatch list or inside driver */
213 int dispatched; 213 int dispatched;
214 }; 214 };
215 215
216 /* 216 /*
217 * Per block device queue structure 217 * Per block device queue structure
218 */ 218 */
219 struct cfq_data { 219 struct cfq_data {
220 struct request_queue *queue; 220 struct request_queue *queue;
221 /* Root service tree for cfq_groups */ 221 /* Root service tree for cfq_groups */
222 struct cfq_rb_root grp_service_tree; 222 struct cfq_rb_root grp_service_tree;
223 struct cfq_group root_group; 223 struct cfq_group root_group;
224 224
225 /* 225 /*
226 * The priority currently being served 226 * The priority currently being served
227 */ 227 */
228 enum wl_prio_t serving_prio; 228 enum wl_prio_t serving_prio;
229 enum wl_type_t serving_type; 229 enum wl_type_t serving_type;
230 unsigned long workload_expires; 230 unsigned long workload_expires;
231 struct cfq_group *serving_group; 231 struct cfq_group *serving_group;
232 232
233 /* 233 /*
234 * Each priority tree is sorted by next_request position. These 234 * Each priority tree is sorted by next_request position. These
235 * trees are used when determining if two or more queues are 235 * trees are used when determining if two or more queues are
236 * interleaving requests (see cfq_close_cooperator). 236 * interleaving requests (see cfq_close_cooperator).
237 */ 237 */
238 struct rb_root prio_trees[CFQ_PRIO_LISTS]; 238 struct rb_root prio_trees[CFQ_PRIO_LISTS];
239 239
240 unsigned int busy_queues; 240 unsigned int busy_queues;
241 241
242 int rq_in_driver; 242 int rq_in_driver;
243 int rq_in_flight[2]; 243 int rq_in_flight[2];
244 244
245 /* 245 /*
246 * queue-depth detection 246 * queue-depth detection
247 */ 247 */
248 int rq_queued; 248 int rq_queued;
249 int hw_tag; 249 int hw_tag;
250 /* 250 /*
251 * hw_tag can be 251 * hw_tag can be
252 * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection) 252 * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection)
253 * 1 => NCQ is present (hw_tag_est_depth is the estimated max depth) 253 * 1 => NCQ is present (hw_tag_est_depth is the estimated max depth)
254 * 0 => no NCQ 254 * 0 => no NCQ
255 */ 255 */
256 int hw_tag_est_depth; 256 int hw_tag_est_depth;
257 unsigned int hw_tag_samples; 257 unsigned int hw_tag_samples;
258 258
259 /* 259 /*
260 * idle window management 260 * idle window management
261 */ 261 */
262 struct timer_list idle_slice_timer; 262 struct timer_list idle_slice_timer;
263 struct work_struct unplug_work; 263 struct work_struct unplug_work;
264 264
265 struct cfq_queue *active_queue; 265 struct cfq_queue *active_queue;
266 struct cfq_io_context *active_cic; 266 struct cfq_io_context *active_cic;
267 267
268 /* 268 /*
269 * async queue for each priority case 269 * async queue for each priority case
270 */ 270 */
271 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; 271 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
272 struct cfq_queue *async_idle_cfqq; 272 struct cfq_queue *async_idle_cfqq;
273 273
274 sector_t last_position; 274 sector_t last_position;
275 275
276 /* 276 /*
277 * tunables, see top of file 277 * tunables, see top of file
278 */ 278 */
279 unsigned int cfq_quantum; 279 unsigned int cfq_quantum;
280 unsigned int cfq_fifo_expire[2]; 280 unsigned int cfq_fifo_expire[2];
281 unsigned int cfq_back_penalty; 281 unsigned int cfq_back_penalty;
282 unsigned int cfq_back_max; 282 unsigned int cfq_back_max;
283 unsigned int cfq_slice[2]; 283 unsigned int cfq_slice[2];
284 unsigned int cfq_slice_async_rq; 284 unsigned int cfq_slice_async_rq;
285 unsigned int cfq_slice_idle; 285 unsigned int cfq_slice_idle;
286 unsigned int cfq_group_idle; 286 unsigned int cfq_group_idle;
287 unsigned int cfq_latency; 287 unsigned int cfq_latency;
288 unsigned int cfq_group_isolation; 288 unsigned int cfq_group_isolation;
289 289
290 unsigned int cic_index; 290 unsigned int cic_index;
291 struct list_head cic_list; 291 struct list_head cic_list;
292 292
293 /* 293 /*
294 * Fallback dummy cfqq for extreme OOM conditions 294 * Fallback dummy cfqq for extreme OOM conditions
295 */ 295 */
296 struct cfq_queue oom_cfqq; 296 struct cfq_queue oom_cfqq;
297 297
298 unsigned long last_delayed_sync; 298 unsigned long last_delayed_sync;
299 299
300 /* List of cfq groups being managed on this device*/ 300 /* List of cfq groups being managed on this device*/
301 struct hlist_head cfqg_list; 301 struct hlist_head cfqg_list;
302 struct rcu_head rcu; 302 struct rcu_head rcu;
303 }; 303 };
304 304
305 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); 305 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
306 306
307 static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, 307 static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
308 enum wl_prio_t prio, 308 enum wl_prio_t prio,
309 enum wl_type_t type) 309 enum wl_type_t type)
310 { 310 {
311 if (!cfqg) 311 if (!cfqg)
312 return NULL; 312 return NULL;
313 313
314 if (prio == IDLE_WORKLOAD) 314 if (prio == IDLE_WORKLOAD)
315 return &cfqg->service_tree_idle; 315 return &cfqg->service_tree_idle;
316 316
317 return &cfqg->service_trees[prio][type]; 317 return &cfqg->service_trees[prio][type];
318 } 318 }
319 319
320 enum cfqq_state_flags { 320 enum cfqq_state_flags {
321 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ 321 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
322 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ 322 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
323 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */ 323 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
324 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ 324 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
325 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ 325 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
326 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ 326 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
327 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ 327 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
328 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ 328 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
329 CFQ_CFQQ_FLAG_sync, /* synchronous queue */ 329 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
330 CFQ_CFQQ_FLAG_coop, /* cfqq is shared */ 330 CFQ_CFQQ_FLAG_coop, /* cfqq is shared */
331 CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */ 331 CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */
332 CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */ 332 CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */
333 CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */ 333 CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */
334 }; 334 };
335 335
336 #define CFQ_CFQQ_FNS(name) \ 336 #define CFQ_CFQQ_FNS(name) \
337 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ 337 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
338 { \ 338 { \
339 (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ 339 (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
340 } \ 340 } \
341 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ 341 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
342 { \ 342 { \
343 (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ 343 (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
344 } \ 344 } \
345 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ 345 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
346 { \ 346 { \
347 return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ 347 return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
348 } 348 }
349 349
350 CFQ_CFQQ_FNS(on_rr); 350 CFQ_CFQQ_FNS(on_rr);
351 CFQ_CFQQ_FNS(wait_request); 351 CFQ_CFQQ_FNS(wait_request);
352 CFQ_CFQQ_FNS(must_dispatch); 352 CFQ_CFQQ_FNS(must_dispatch);
353 CFQ_CFQQ_FNS(must_alloc_slice); 353 CFQ_CFQQ_FNS(must_alloc_slice);
354 CFQ_CFQQ_FNS(fifo_expire); 354 CFQ_CFQQ_FNS(fifo_expire);
355 CFQ_CFQQ_FNS(idle_window); 355 CFQ_CFQQ_FNS(idle_window);
356 CFQ_CFQQ_FNS(prio_changed); 356 CFQ_CFQQ_FNS(prio_changed);
357 CFQ_CFQQ_FNS(slice_new); 357 CFQ_CFQQ_FNS(slice_new);
358 CFQ_CFQQ_FNS(sync); 358 CFQ_CFQQ_FNS(sync);
359 CFQ_CFQQ_FNS(coop); 359 CFQ_CFQQ_FNS(coop);
360 CFQ_CFQQ_FNS(split_coop); 360 CFQ_CFQQ_FNS(split_coop);
361 CFQ_CFQQ_FNS(deep); 361 CFQ_CFQQ_FNS(deep);
362 CFQ_CFQQ_FNS(wait_busy); 362 CFQ_CFQQ_FNS(wait_busy);
363 #undef CFQ_CFQQ_FNS 363 #undef CFQ_CFQQ_FNS
364 364
365 #ifdef CONFIG_CFQ_GROUP_IOSCHED 365 #ifdef CONFIG_CFQ_GROUP_IOSCHED
366 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ 366 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
367 blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ 367 blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
368 cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ 368 cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
369 blkg_path(&(cfqq)->cfqg->blkg), ##args); 369 blkg_path(&(cfqq)->cfqg->blkg), ##args);
370 370
371 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \ 371 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \
372 blk_add_trace_msg((cfqd)->queue, "%s " fmt, \ 372 blk_add_trace_msg((cfqd)->queue, "%s " fmt, \
373 blkg_path(&(cfqg)->blkg), ##args); \ 373 blkg_path(&(cfqg)->blkg), ##args); \
374 374
375 #else 375 #else
376 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ 376 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
377 blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) 377 blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
378 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0); 378 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0);
379 #endif 379 #endif
380 #define cfq_log(cfqd, fmt, args...) \ 380 #define cfq_log(cfqd, fmt, args...) \
381 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) 381 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
382 382
383 /* Traverses through cfq group service trees */ 383 /* Traverses through cfq group service trees */
384 #define for_each_cfqg_st(cfqg, i, j, st) \ 384 #define for_each_cfqg_st(cfqg, i, j, st) \
385 for (i = 0; i <= IDLE_WORKLOAD; i++) \ 385 for (i = 0; i <= IDLE_WORKLOAD; i++) \
386 for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\ 386 for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\
387 : &cfqg->service_tree_idle; \ 387 : &cfqg->service_tree_idle; \
388 (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \ 388 (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \
389 (i == IDLE_WORKLOAD && j == 0); \ 389 (i == IDLE_WORKLOAD && j == 0); \
390 j++, st = i < IDLE_WORKLOAD ? \ 390 j++, st = i < IDLE_WORKLOAD ? \
391 &cfqg->service_trees[i][j]: NULL) \ 391 &cfqg->service_trees[i][j]: NULL) \
392 392
393 393
394 static inline bool iops_mode(struct cfq_data *cfqd) 394 static inline bool iops_mode(struct cfq_data *cfqd)
395 { 395 {
396 /* 396 /*
397 * If we are not idling on queues and it is a NCQ drive, parallel 397 * If we are not idling on queues and it is a NCQ drive, parallel
398 * execution of requests is on and measuring time is not possible 398 * execution of requests is on and measuring time is not possible
399 * in most of the cases until and unless we drive shallower queue 399 * in most of the cases until and unless we drive shallower queue
400 * depths and that becomes a performance bottleneck. In such cases 400 * depths and that becomes a performance bottleneck. In such cases
401 * switch to start providing fairness in terms of number of IOs. 401 * switch to start providing fairness in terms of number of IOs.
402 */ 402 */
403 if (!cfqd->cfq_slice_idle && cfqd->hw_tag) 403 if (!cfqd->cfq_slice_idle && cfqd->hw_tag)
404 return true; 404 return true;
405 else 405 else
406 return false; 406 return false;
407 } 407 }
408 408
409 static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) 409 static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
410 { 410 {
411 if (cfq_class_idle(cfqq)) 411 if (cfq_class_idle(cfqq))
412 return IDLE_WORKLOAD; 412 return IDLE_WORKLOAD;
413 if (cfq_class_rt(cfqq)) 413 if (cfq_class_rt(cfqq))
414 return RT_WORKLOAD; 414 return RT_WORKLOAD;
415 return BE_WORKLOAD; 415 return BE_WORKLOAD;
416 } 416 }
417 417
418 418
419 static enum wl_type_t cfqq_type(struct cfq_queue *cfqq) 419 static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
420 { 420 {
421 if (!cfq_cfqq_sync(cfqq)) 421 if (!cfq_cfqq_sync(cfqq))
422 return ASYNC_WORKLOAD; 422 return ASYNC_WORKLOAD;
423 if (!cfq_cfqq_idle_window(cfqq)) 423 if (!cfq_cfqq_idle_window(cfqq))
424 return SYNC_NOIDLE_WORKLOAD; 424 return SYNC_NOIDLE_WORKLOAD;
425 return SYNC_WORKLOAD; 425 return SYNC_WORKLOAD;
426 } 426 }
427 427
428 static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl, 428 static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
429 struct cfq_data *cfqd, 429 struct cfq_data *cfqd,
430 struct cfq_group *cfqg) 430 struct cfq_group *cfqg)
431 { 431 {
432 if (wl == IDLE_WORKLOAD) 432 if (wl == IDLE_WORKLOAD)
433 return cfqg->service_tree_idle.count; 433 return cfqg->service_tree_idle.count;
434 434
435 return cfqg->service_trees[wl][ASYNC_WORKLOAD].count 435 return cfqg->service_trees[wl][ASYNC_WORKLOAD].count
436 + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count 436 + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
437 + cfqg->service_trees[wl][SYNC_WORKLOAD].count; 437 + cfqg->service_trees[wl][SYNC_WORKLOAD].count;
438 } 438 }
439 439
440 static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, 440 static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
441 struct cfq_group *cfqg) 441 struct cfq_group *cfqg)
442 { 442 {
443 return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count 443 return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count
444 + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; 444 + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
445 } 445 }
446 446
447 static void cfq_dispatch_insert(struct request_queue *, struct request *); 447 static void cfq_dispatch_insert(struct request_queue *, struct request *);
448 static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, 448 static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
449 struct io_context *, gfp_t); 449 struct io_context *, gfp_t);
450 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, 450 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
451 struct io_context *); 451 struct io_context *);
452 452
453 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, 453 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
454 bool is_sync) 454 bool is_sync)
455 { 455 {
456 return cic->cfqq[is_sync]; 456 return cic->cfqq[is_sync];
457 } 457 }
458 458
459 static inline void cic_set_cfqq(struct cfq_io_context *cic, 459 static inline void cic_set_cfqq(struct cfq_io_context *cic,
460 struct cfq_queue *cfqq, bool is_sync) 460 struct cfq_queue *cfqq, bool is_sync)
461 { 461 {
462 cic->cfqq[is_sync] = cfqq; 462 cic->cfqq[is_sync] = cfqq;
463 } 463 }
464 464
465 #define CIC_DEAD_KEY 1ul 465 #define CIC_DEAD_KEY 1ul
466 #define CIC_DEAD_INDEX_SHIFT 1 466 #define CIC_DEAD_INDEX_SHIFT 1
467 467
468 static inline void *cfqd_dead_key(struct cfq_data *cfqd) 468 static inline void *cfqd_dead_key(struct cfq_data *cfqd)
469 { 469 {
470 return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); 470 return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
471 } 471 }
472 472
473 static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic) 473 static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
474 { 474 {
475 struct cfq_data *cfqd = cic->key; 475 struct cfq_data *cfqd = cic->key;
476 476
477 if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY)) 477 if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
478 return NULL; 478 return NULL;
479 479
480 return cfqd; 480 return cfqd;
481 } 481 }
482 482
483 /* 483 /*
484 * We regard a request as SYNC, if it's either a read or has the SYNC bit 484 * We regard a request as SYNC, if it's either a read or has the SYNC bit
485 * set (in which case it could also be direct WRITE). 485 * set (in which case it could also be direct WRITE).
486 */ 486 */
487 static inline bool cfq_bio_sync(struct bio *bio) 487 static inline bool cfq_bio_sync(struct bio *bio)
488 { 488 {
489 return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC); 489 return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC);
490 } 490 }
491 491
492 /* 492 /*
493 * scheduler run of queue, if there are requests pending and no one in the 493 * scheduler run of queue, if there are requests pending and no one in the
494 * driver that will restart queueing 494 * driver that will restart queueing
495 */ 495 */
496 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 496 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
497 { 497 {
498 if (cfqd->busy_queues) { 498 if (cfqd->busy_queues) {
499 cfq_log(cfqd, "schedule dispatch"); 499 cfq_log(cfqd, "schedule dispatch");
500 kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); 500 kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
501 } 501 }
502 } 502 }
503 503
504 static int cfq_queue_empty(struct request_queue *q) 504 static int cfq_queue_empty(struct request_queue *q)
505 { 505 {
506 struct cfq_data *cfqd = q->elevator->elevator_data; 506 struct cfq_data *cfqd = q->elevator->elevator_data;
507 507
508 return !cfqd->rq_queued; 508 return !cfqd->rq_queued;
509 } 509 }
510 510
511 /* 511 /*
512 * Scale schedule slice based on io priority. Use the sync time slice only 512 * Scale schedule slice based on io priority. Use the sync time slice only
513 * if a queue is marked sync and has sync io queued. A sync queue with async 513 * if a queue is marked sync and has sync io queued. A sync queue with async
514 * io only, should not get full sync slice length. 514 * io only, should not get full sync slice length.
515 */ 515 */
516 static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, 516 static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
517 unsigned short prio) 517 unsigned short prio)
518 { 518 {
519 const int base_slice = cfqd->cfq_slice[sync]; 519 const int base_slice = cfqd->cfq_slice[sync];
520 520
521 WARN_ON(prio >= IOPRIO_BE_NR); 521 WARN_ON(prio >= IOPRIO_BE_NR);
522 522
523 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); 523 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
524 } 524 }
525 525
526 static inline int 526 static inline int
527 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 527 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
528 { 528 {
529 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); 529 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
530 } 530 }
531 531
532 static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg) 532 static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
533 { 533 {
534 u64 d = delta << CFQ_SERVICE_SHIFT; 534 u64 d = delta << CFQ_SERVICE_SHIFT;
535 535
536 d = d * BLKIO_WEIGHT_DEFAULT; 536 d = d * BLKIO_WEIGHT_DEFAULT;
537 do_div(d, cfqg->weight); 537 do_div(d, cfqg->weight);
538 return d; 538 return d;
539 } 539 }
540 540
541 static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) 541 static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
542 { 542 {
543 s64 delta = (s64)(vdisktime - min_vdisktime); 543 s64 delta = (s64)(vdisktime - min_vdisktime);
544 if (delta > 0) 544 if (delta > 0)
545 min_vdisktime = vdisktime; 545 min_vdisktime = vdisktime;
546 546
547 return min_vdisktime; 547 return min_vdisktime;
548 } 548 }
549 549
550 static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) 550 static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
551 { 551 {
552 s64 delta = (s64)(vdisktime - min_vdisktime); 552 s64 delta = (s64)(vdisktime - min_vdisktime);
553 if (delta < 0) 553 if (delta < 0)
554 min_vdisktime = vdisktime; 554 min_vdisktime = vdisktime;
555 555
556 return min_vdisktime; 556 return min_vdisktime;
557 } 557 }
558 558
559 static void update_min_vdisktime(struct cfq_rb_root *st) 559 static void update_min_vdisktime(struct cfq_rb_root *st)
560 { 560 {
561 u64 vdisktime = st->min_vdisktime; 561 u64 vdisktime = st->min_vdisktime;
562 struct cfq_group *cfqg; 562 struct cfq_group *cfqg;
563 563
564 if (st->left) { 564 if (st->left) {
565 cfqg = rb_entry_cfqg(st->left); 565 cfqg = rb_entry_cfqg(st->left);
566 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); 566 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
567 } 567 }
568 568
569 st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime); 569 st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
570 } 570 }
571 571
572 /* 572 /*
573 * get averaged number of queues of RT/BE priority. 573 * get averaged number of queues of RT/BE priority.
574 * average is updated, with a formula that gives more weight to higher numbers, 574 * average is updated, with a formula that gives more weight to higher numbers,
575 * to quickly follows sudden increases and decrease slowly 575 * to quickly follows sudden increases and decrease slowly
576 */ 576 */
577 577
578 static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd, 578 static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
579 struct cfq_group *cfqg, bool rt) 579 struct cfq_group *cfqg, bool rt)
580 { 580 {
581 unsigned min_q, max_q; 581 unsigned min_q, max_q;
582 unsigned mult = cfq_hist_divisor - 1; 582 unsigned mult = cfq_hist_divisor - 1;
583 unsigned round = cfq_hist_divisor / 2; 583 unsigned round = cfq_hist_divisor / 2;
584 unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg); 584 unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
585 585
586 min_q = min(cfqg->busy_queues_avg[rt], busy); 586 min_q = min(cfqg->busy_queues_avg[rt], busy);
587 max_q = max(cfqg->busy_queues_avg[rt], busy); 587 max_q = max(cfqg->busy_queues_avg[rt], busy);
588 cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) / 588 cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
589 cfq_hist_divisor; 589 cfq_hist_divisor;
590 return cfqg->busy_queues_avg[rt]; 590 return cfqg->busy_queues_avg[rt];
591 } 591 }
592 592
593 static inline unsigned 593 static inline unsigned
594 cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) 594 cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
595 { 595 {
596 struct cfq_rb_root *st = &cfqd->grp_service_tree; 596 struct cfq_rb_root *st = &cfqd->grp_service_tree;
597 597
598 return cfq_target_latency * cfqg->weight / st->total_weight; 598 return cfq_target_latency * cfqg->weight / st->total_weight;
599 } 599 }
600 600
601 static inline unsigned 601 static inline unsigned
602 cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 602 cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
603 { 603 {
604 unsigned slice = cfq_prio_to_slice(cfqd, cfqq); 604 unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
605 if (cfqd->cfq_latency) { 605 if (cfqd->cfq_latency) {
606 /* 606 /*
607 * interested queues (we consider only the ones with the same 607 * interested queues (we consider only the ones with the same
608 * priority class in the cfq group) 608 * priority class in the cfq group)
609 */ 609 */
610 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg, 610 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
611 cfq_class_rt(cfqq)); 611 cfq_class_rt(cfqq));
612 unsigned sync_slice = cfqd->cfq_slice[1]; 612 unsigned sync_slice = cfqd->cfq_slice[1];
613 unsigned expect_latency = sync_slice * iq; 613 unsigned expect_latency = sync_slice * iq;
614 unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg); 614 unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
615 615
616 if (expect_latency > group_slice) { 616 if (expect_latency > group_slice) {
617 unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; 617 unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
618 /* scale low_slice according to IO priority 618 /* scale low_slice according to IO priority
619 * and sync vs async */ 619 * and sync vs async */
620 unsigned low_slice = 620 unsigned low_slice =
621 min(slice, base_low_slice * slice / sync_slice); 621 min(slice, base_low_slice * slice / sync_slice);
622 /* the adapted slice value is scaled to fit all iqs 622 /* the adapted slice value is scaled to fit all iqs
623 * into the target latency */ 623 * into the target latency */
624 slice = max(slice * group_slice / expect_latency, 624 slice = max(slice * group_slice / expect_latency,
625 low_slice); 625 low_slice);
626 } 626 }
627 } 627 }
628 return slice; 628 return slice;
629 } 629 }
630 630
631 static inline void 631 static inline void
632 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 632 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
633 { 633 {
634 unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq); 634 unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
635 635
636 cfqq->slice_start = jiffies; 636 cfqq->slice_start = jiffies;
637 cfqq->slice_end = jiffies + slice; 637 cfqq->slice_end = jiffies + slice;
638 cfqq->allocated_slice = slice; 638 cfqq->allocated_slice = slice;
639 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); 639 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
640 } 640 }
641 641
642 /* 642 /*
643 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end 643 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
644 * isn't valid until the first request from the dispatch is activated 644 * isn't valid until the first request from the dispatch is activated
645 * and the slice time set. 645 * and the slice time set.
646 */ 646 */
647 static inline bool cfq_slice_used(struct cfq_queue *cfqq) 647 static inline bool cfq_slice_used(struct cfq_queue *cfqq)
648 { 648 {
649 if (cfq_cfqq_slice_new(cfqq)) 649 if (cfq_cfqq_slice_new(cfqq))
650 return false; 650 return false;
651 if (time_before(jiffies, cfqq->slice_end)) 651 if (time_before(jiffies, cfqq->slice_end))
652 return false; 652 return false;
653 653
654 return true; 654 return true;
655 } 655 }
656 656
657 /* 657 /*
658 * Lifted from AS - choose which of rq1 and rq2 that is best served now. 658 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
659 * We choose the request that is closest to the head right now. Distance 659 * We choose the request that is closest to the head right now. Distance
660 * behind the head is penalized and only allowed to a certain extent. 660 * behind the head is penalized and only allowed to a certain extent.
661 */ 661 */
662 static struct request * 662 static struct request *
663 cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last) 663 cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last)
664 { 664 {
665 sector_t s1, s2, d1 = 0, d2 = 0; 665 sector_t s1, s2, d1 = 0, d2 = 0;
666 unsigned long back_max; 666 unsigned long back_max;
667 #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ 667 #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */
668 #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ 668 #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
669 unsigned wrap = 0; /* bit mask: requests behind the disk head? */ 669 unsigned wrap = 0; /* bit mask: requests behind the disk head? */
670 670
671 if (rq1 == NULL || rq1 == rq2) 671 if (rq1 == NULL || rq1 == rq2)
672 return rq2; 672 return rq2;
673 if (rq2 == NULL) 673 if (rq2 == NULL)
674 return rq1; 674 return rq1;
675 675
676 if (rq_is_sync(rq1) && !rq_is_sync(rq2)) 676 if (rq_is_sync(rq1) && !rq_is_sync(rq2))
677 return rq1; 677 return rq1;
678 else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) 678 else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
679 return rq2; 679 return rq2;
680 if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) 680 if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
681 return rq1; 681 return rq1;
682 else if ((rq2->cmd_flags & REQ_META) && 682 else if ((rq2->cmd_flags & REQ_META) &&
683 !(rq1->cmd_flags & REQ_META)) 683 !(rq1->cmd_flags & REQ_META))
684 return rq2; 684 return rq2;
685 685
686 s1 = blk_rq_pos(rq1); 686 s1 = blk_rq_pos(rq1);
687 s2 = blk_rq_pos(rq2); 687 s2 = blk_rq_pos(rq2);
688 688
689 /* 689 /*
690 * by definition, 1KiB is 2 sectors 690 * by definition, 1KiB is 2 sectors
691 */ 691 */
692 back_max = cfqd->cfq_back_max * 2; 692 back_max = cfqd->cfq_back_max * 2;
693 693
694 /* 694 /*
695 * Strict one way elevator _except_ in the case where we allow 695 * Strict one way elevator _except_ in the case where we allow
696 * short backward seeks which are biased as twice the cost of a 696 * short backward seeks which are biased as twice the cost of a
697 * similar forward seek. 697 * similar forward seek.
698 */ 698 */
699 if (s1 >= last) 699 if (s1 >= last)
700 d1 = s1 - last; 700 d1 = s1 - last;
701 else if (s1 + back_max >= last) 701 else if (s1 + back_max >= last)
702 d1 = (last - s1) * cfqd->cfq_back_penalty; 702 d1 = (last - s1) * cfqd->cfq_back_penalty;
703 else 703 else
704 wrap |= CFQ_RQ1_WRAP; 704 wrap |= CFQ_RQ1_WRAP;
705 705
706 if (s2 >= last) 706 if (s2 >= last)
707 d2 = s2 - last; 707 d2 = s2 - last;
708 else if (s2 + back_max >= last) 708 else if (s2 + back_max >= last)
709 d2 = (last - s2) * cfqd->cfq_back_penalty; 709 d2 = (last - s2) * cfqd->cfq_back_penalty;
710 else 710 else
711 wrap |= CFQ_RQ2_WRAP; 711 wrap |= CFQ_RQ2_WRAP;
712 712
713 /* Found required data */ 713 /* Found required data */
714 714
715 /* 715 /*
716 * By doing switch() on the bit mask "wrap" we avoid having to 716 * By doing switch() on the bit mask "wrap" we avoid having to
717 * check two variables for all permutations: --> faster! 717 * check two variables for all permutations: --> faster!
718 */ 718 */
719 switch (wrap) { 719 switch (wrap) {
720 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ 720 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
721 if (d1 < d2) 721 if (d1 < d2)
722 return rq1; 722 return rq1;
723 else if (d2 < d1) 723 else if (d2 < d1)
724 return rq2; 724 return rq2;
725 else { 725 else {
726 if (s1 >= s2) 726 if (s1 >= s2)
727 return rq1; 727 return rq1;
728 else 728 else
729 return rq2; 729 return rq2;
730 } 730 }
731 731
732 case CFQ_RQ2_WRAP: 732 case CFQ_RQ2_WRAP:
733 return rq1; 733 return rq1;
734 case CFQ_RQ1_WRAP: 734 case CFQ_RQ1_WRAP:
735 return rq2; 735 return rq2;
736 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */ 736 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
737 default: 737 default:
738 /* 738 /*
739 * Since both rqs are wrapped, 739 * Since both rqs are wrapped,
740 * start with the one that's further behind head 740 * start with the one that's further behind head
741 * (--> only *one* back seek required), 741 * (--> only *one* back seek required),
742 * since back seek takes more time than forward. 742 * since back seek takes more time than forward.
743 */ 743 */
744 if (s1 <= s2) 744 if (s1 <= s2)
745 return rq1; 745 return rq1;
746 else 746 else
747 return rq2; 747 return rq2;
748 } 748 }
749 } 749 }
750 750
751 /* 751 /*
752 * The below is leftmost cache rbtree addon 752 * The below is leftmost cache rbtree addon
753 */ 753 */
754 static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) 754 static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
755 { 755 {
756 /* Service tree is empty */ 756 /* Service tree is empty */
757 if (!root->count) 757 if (!root->count)
758 return NULL; 758 return NULL;
759 759
760 if (!root->left) 760 if (!root->left)
761 root->left = rb_first(&root->rb); 761 root->left = rb_first(&root->rb);
762 762
763 if (root->left) 763 if (root->left)
764 return rb_entry(root->left, struct cfq_queue, rb_node); 764 return rb_entry(root->left, struct cfq_queue, rb_node);
765 765
766 return NULL; 766 return NULL;
767 } 767 }
768 768
769 static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root) 769 static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
770 { 770 {
771 if (!root->left) 771 if (!root->left)
772 root->left = rb_first(&root->rb); 772 root->left = rb_first(&root->rb);
773 773
774 if (root->left) 774 if (root->left)
775 return rb_entry_cfqg(root->left); 775 return rb_entry_cfqg(root->left);
776 776
777 return NULL; 777 return NULL;
778 } 778 }
779 779
780 static void rb_erase_init(struct rb_node *n, struct rb_root *root) 780 static void rb_erase_init(struct rb_node *n, struct rb_root *root)
781 { 781 {
782 rb_erase(n, root); 782 rb_erase(n, root);
783 RB_CLEAR_NODE(n); 783 RB_CLEAR_NODE(n);
784 } 784 }
785 785
786 static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) 786 static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
787 { 787 {
788 if (root->left == n) 788 if (root->left == n)
789 root->left = NULL; 789 root->left = NULL;
790 rb_erase_init(n, &root->rb); 790 rb_erase_init(n, &root->rb);
791 --root->count; 791 --root->count;
792 } 792 }
793 793
794 /* 794 /*
795 * would be nice to take fifo expire time into account as well 795 * would be nice to take fifo expire time into account as well
796 */ 796 */
797 static struct request * 797 static struct request *
798 cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 798 cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
799 struct request *last) 799 struct request *last)
800 { 800 {
801 struct rb_node *rbnext = rb_next(&last->rb_node); 801 struct rb_node *rbnext = rb_next(&last->rb_node);
802 struct rb_node *rbprev = rb_prev(&last->rb_node); 802 struct rb_node *rbprev = rb_prev(&last->rb_node);
803 struct request *next = NULL, *prev = NULL; 803 struct request *next = NULL, *prev = NULL;
804 804
805 BUG_ON(RB_EMPTY_NODE(&last->rb_node)); 805 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
806 806
807 if (rbprev) 807 if (rbprev)
808 prev = rb_entry_rq(rbprev); 808 prev = rb_entry_rq(rbprev);
809 809
810 if (rbnext) 810 if (rbnext)
811 next = rb_entry_rq(rbnext); 811 next = rb_entry_rq(rbnext);
812 else { 812 else {
813 rbnext = rb_first(&cfqq->sort_list); 813 rbnext = rb_first(&cfqq->sort_list);
814 if (rbnext && rbnext != &last->rb_node) 814 if (rbnext && rbnext != &last->rb_node)
815 next = rb_entry_rq(rbnext); 815 next = rb_entry_rq(rbnext);
816 } 816 }
817 817
818 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last)); 818 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
819 } 819 }
820 820
821 static unsigned long cfq_slice_offset(struct cfq_data *cfqd, 821 static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
822 struct cfq_queue *cfqq) 822 struct cfq_queue *cfqq)
823 { 823 {
824 /* 824 /*
825 * just an approximation, should be ok. 825 * just an approximation, should be ok.
826 */ 826 */
827 return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) - 827 return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) -
828 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio)); 828 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
829 } 829 }
830 830
831 static inline s64 831 static inline s64
832 cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg) 832 cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
833 { 833 {
834 return cfqg->vdisktime - st->min_vdisktime; 834 return cfqg->vdisktime - st->min_vdisktime;
835 } 835 }
836 836
837 static void 837 static void
838 __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) 838 __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
839 { 839 {
840 struct rb_node **node = &st->rb.rb_node; 840 struct rb_node **node = &st->rb.rb_node;
841 struct rb_node *parent = NULL; 841 struct rb_node *parent = NULL;
842 struct cfq_group *__cfqg; 842 struct cfq_group *__cfqg;
843 s64 key = cfqg_key(st, cfqg); 843 s64 key = cfqg_key(st, cfqg);
844 int left = 1; 844 int left = 1;
845 845
846 while (*node != NULL) { 846 while (*node != NULL) {
847 parent = *node; 847 parent = *node;
848 __cfqg = rb_entry_cfqg(parent); 848 __cfqg = rb_entry_cfqg(parent);
849 849
850 if (key < cfqg_key(st, __cfqg)) 850 if (key < cfqg_key(st, __cfqg))
851 node = &parent->rb_left; 851 node = &parent->rb_left;
852 else { 852 else {
853 node = &parent->rb_right; 853 node = &parent->rb_right;
854 left = 0; 854 left = 0;
855 } 855 }
856 } 856 }
857 857
858 if (left) 858 if (left)
859 st->left = &cfqg->rb_node; 859 st->left = &cfqg->rb_node;
860 860
861 rb_link_node(&cfqg->rb_node, parent, node); 861 rb_link_node(&cfqg->rb_node, parent, node);
862 rb_insert_color(&cfqg->rb_node, &st->rb); 862 rb_insert_color(&cfqg->rb_node, &st->rb);
863 } 863 }
864 864
865 static void 865 static void
866 cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) 866 cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
867 { 867 {
868 struct cfq_rb_root *st = &cfqd->grp_service_tree; 868 struct cfq_rb_root *st = &cfqd->grp_service_tree;
869 struct cfq_group *__cfqg; 869 struct cfq_group *__cfqg;
870 struct rb_node *n; 870 struct rb_node *n;
871 871
872 cfqg->nr_cfqq++; 872 cfqg->nr_cfqq++;
873 if (!RB_EMPTY_NODE(&cfqg->rb_node)) 873 if (!RB_EMPTY_NODE(&cfqg->rb_node))
874 return; 874 return;
875 875
876 /* 876 /*
877 * Currently put the group at the end. Later implement something 877 * Currently put the group at the end. Later implement something
878 * so that groups get lesser vtime based on their weights, so that 878 * so that groups get lesser vtime based on their weights, so that
879 * if group does not loose all if it was not continously backlogged. 879 * if group does not loose all if it was not continously backlogged.
880 */ 880 */
881 n = rb_last(&st->rb); 881 n = rb_last(&st->rb);
882 if (n) { 882 if (n) {
883 __cfqg = rb_entry_cfqg(n); 883 __cfqg = rb_entry_cfqg(n);
884 cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; 884 cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
885 } else 885 } else
886 cfqg->vdisktime = st->min_vdisktime; 886 cfqg->vdisktime = st->min_vdisktime;
887 887
888 __cfq_group_service_tree_add(st, cfqg); 888 __cfq_group_service_tree_add(st, cfqg);
889 st->total_weight += cfqg->weight; 889 st->total_weight += cfqg->weight;
890 } 890 }
891 891
892 static void 892 static void
893 cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) 893 cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
894 { 894 {
895 struct cfq_rb_root *st = &cfqd->grp_service_tree; 895 struct cfq_rb_root *st = &cfqd->grp_service_tree;
896 896
897 BUG_ON(cfqg->nr_cfqq < 1); 897 BUG_ON(cfqg->nr_cfqq < 1);
898 cfqg->nr_cfqq--; 898 cfqg->nr_cfqq--;
899 899
900 /* If there are other cfq queues under this group, don't delete it */ 900 /* If there are other cfq queues under this group, don't delete it */
901 if (cfqg->nr_cfqq) 901 if (cfqg->nr_cfqq)
902 return; 902 return;
903 903
904 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); 904 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
905 st->total_weight -= cfqg->weight; 905 st->total_weight -= cfqg->weight;
906 if (!RB_EMPTY_NODE(&cfqg->rb_node)) 906 if (!RB_EMPTY_NODE(&cfqg->rb_node))
907 cfq_rb_erase(&cfqg->rb_node, st); 907 cfq_rb_erase(&cfqg->rb_node, st);
908 cfqg->saved_workload_slice = 0; 908 cfqg->saved_workload_slice = 0;
909 cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1); 909 cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
910 } 910 }
911 911
912 static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) 912 static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
913 { 913 {
914 unsigned int slice_used; 914 unsigned int slice_used;
915 915
916 /* 916 /*
917 * Queue got expired before even a single request completed or 917 * Queue got expired before even a single request completed or
918 * got expired immediately after first request completion. 918 * got expired immediately after first request completion.
919 */ 919 */
920 if (!cfqq->slice_start || cfqq->slice_start == jiffies) { 920 if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
921 /* 921 /*
922 * Also charge the seek time incurred to the group, otherwise 922 * Also charge the seek time incurred to the group, otherwise
923 * if there are mutiple queues in the group, each can dispatch 923 * if there are mutiple queues in the group, each can dispatch
924 * a single request on seeky media and cause lots of seek time 924 * a single request on seeky media and cause lots of seek time
925 * and group will never know it. 925 * and group will never know it.
926 */ 926 */
927 slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start), 927 slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
928 1); 928 1);
929 } else { 929 } else {
930 slice_used = jiffies - cfqq->slice_start; 930 slice_used = jiffies - cfqq->slice_start;
931 if (slice_used > cfqq->allocated_slice) 931 if (slice_used > cfqq->allocated_slice)
932 slice_used = cfqq->allocated_slice; 932 slice_used = cfqq->allocated_slice;
933 } 933 }
934 934
935 return slice_used; 935 return slice_used;
936 } 936 }
937 937
938 static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, 938 static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
939 struct cfq_queue *cfqq) 939 struct cfq_queue *cfqq)
940 { 940 {
941 struct cfq_rb_root *st = &cfqd->grp_service_tree; 941 struct cfq_rb_root *st = &cfqd->grp_service_tree;
942 unsigned int used_sl, charge; 942 unsigned int used_sl, charge;
943 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) 943 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
944 - cfqg->service_tree_idle.count; 944 - cfqg->service_tree_idle.count;
945 945
946 BUG_ON(nr_sync < 0); 946 BUG_ON(nr_sync < 0);
947 used_sl = charge = cfq_cfqq_slice_usage(cfqq); 947 used_sl = charge = cfq_cfqq_slice_usage(cfqq);
948 948
949 if (iops_mode(cfqd)) 949 if (iops_mode(cfqd))
950 charge = cfqq->slice_dispatch; 950 charge = cfqq->slice_dispatch;
951 else if (!cfq_cfqq_sync(cfqq) && !nr_sync) 951 else if (!cfq_cfqq_sync(cfqq) && !nr_sync)
952 charge = cfqq->allocated_slice; 952 charge = cfqq->allocated_slice;
953 953
954 /* Can't update vdisktime while group is on service tree */ 954 /* Can't update vdisktime while group is on service tree */
955 cfq_rb_erase(&cfqg->rb_node, st); 955 cfq_rb_erase(&cfqg->rb_node, st);
956 cfqg->vdisktime += cfq_scale_slice(charge, cfqg); 956 cfqg->vdisktime += cfq_scale_slice(charge, cfqg);
957 __cfq_group_service_tree_add(st, cfqg); 957 __cfq_group_service_tree_add(st, cfqg);
958 958
959 /* This group is being expired. Save the context */ 959 /* This group is being expired. Save the context */
960 if (time_after(cfqd->workload_expires, jiffies)) { 960 if (time_after(cfqd->workload_expires, jiffies)) {
961 cfqg->saved_workload_slice = cfqd->workload_expires 961 cfqg->saved_workload_slice = cfqd->workload_expires
962 - jiffies; 962 - jiffies;
963 cfqg->saved_workload = cfqd->serving_type; 963 cfqg->saved_workload = cfqd->serving_type;
964 cfqg->saved_serving_prio = cfqd->serving_prio; 964 cfqg->saved_serving_prio = cfqd->serving_prio;
965 } else 965 } else
966 cfqg->saved_workload_slice = 0; 966 cfqg->saved_workload_slice = 0;
967 967
968 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, 968 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
969 st->min_vdisktime); 969 st->min_vdisktime);
970 cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u" 970 cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
971 " sect=%u", used_sl, cfqq->slice_dispatch, charge, 971 " sect=%u", used_sl, cfqq->slice_dispatch, charge,
972 iops_mode(cfqd), cfqq->nr_sectors); 972 iops_mode(cfqd), cfqq->nr_sectors);
973 cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); 973 cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
974 cfq_blkiocg_set_start_empty_time(&cfqg->blkg); 974 cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
975 } 975 }
976 976
977 #ifdef CONFIG_CFQ_GROUP_IOSCHED 977 #ifdef CONFIG_CFQ_GROUP_IOSCHED
978 static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg) 978 static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
979 { 979 {
980 if (blkg) 980 if (blkg)
981 return container_of(blkg, struct cfq_group, blkg); 981 return container_of(blkg, struct cfq_group, blkg);
982 return NULL; 982 return NULL;
983 } 983 }
984 984
985 void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, 985 void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
986 unsigned int weight) 986 unsigned int weight)
987 { 987 {
988 cfqg_of_blkg(blkg)->weight = weight; 988 cfqg_of_blkg(blkg)->weight = weight;
989 } 989 }
990 990
991 static struct cfq_group * 991 static struct cfq_group *
992 cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) 992 cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
993 { 993 {
994 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 994 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
995 struct cfq_group *cfqg = NULL; 995 struct cfq_group *cfqg = NULL;
996 void *key = cfqd; 996 void *key = cfqd;
997 int i, j; 997 int i, j;
998 struct cfq_rb_root *st; 998 struct cfq_rb_root *st;
999 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; 999 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
1000 unsigned int major, minor; 1000 unsigned int major, minor;
1001 1001
1002 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); 1002 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
1003 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { 1003 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
1004 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); 1004 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1005 cfqg->blkg.dev = MKDEV(major, minor); 1005 cfqg->blkg.dev = MKDEV(major, minor);
1006 goto done; 1006 goto done;
1007 } 1007 }
1008 if (cfqg || !create) 1008 if (cfqg || !create)
1009 goto done; 1009 goto done;
1010 1010
1011 cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); 1011 cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
1012 if (!cfqg) 1012 if (!cfqg)
1013 goto done; 1013 goto done;
1014 1014
1015 for_each_cfqg_st(cfqg, i, j, st) 1015 for_each_cfqg_st(cfqg, i, j, st)
1016 *st = CFQ_RB_ROOT; 1016 *st = CFQ_RB_ROOT;
1017 RB_CLEAR_NODE(&cfqg->rb_node); 1017 RB_CLEAR_NODE(&cfqg->rb_node);
1018 1018
1019 /* 1019 /*
1020 * Take the initial reference that will be released on destroy 1020 * Take the initial reference that will be released on destroy
1021 * This can be thought of a joint reference by cgroup and 1021 * This can be thought of a joint reference by cgroup and
1022 * elevator which will be dropped by either elevator exit 1022 * elevator which will be dropped by either elevator exit
1023 * or cgroup deletion path depending on who is exiting first. 1023 * or cgroup deletion path depending on who is exiting first.
1024 */ 1024 */
1025 cfqg->ref = 1; 1025 cfqg->ref = 1;
1026 1026
1027 /* 1027 /*
1028 * Add group onto cgroup list. It might happen that bdi->dev is 1028 * Add group onto cgroup list. It might happen that bdi->dev is
1029 * not initialized yet. Initialize this new group without major 1029 * not initialized yet. Initialize this new group without major
1030 * and minor info and this info will be filled in once a new thread 1030 * and minor info and this info will be filled in once a new thread
1031 * comes for IO. See code above. 1031 * comes for IO. See code above.
1032 */ 1032 */
1033 if (bdi->dev) { 1033 if (bdi->dev) {
1034 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); 1034 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1035 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, 1035 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
1036 MKDEV(major, minor)); 1036 MKDEV(major, minor));
1037 } else 1037 } else
1038 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, 1038 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
1039 0); 1039 0);
1040 1040
1041 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); 1041 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
1042 1042
1043 /* Add group on cfqd list */ 1043 /* Add group on cfqd list */
1044 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); 1044 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
1045 1045
1046 done: 1046 done:
1047 return cfqg; 1047 return cfqg;
1048 } 1048 }
1049 1049
1050 /* 1050 /*
1051 * Search for the cfq group current task belongs to. If create = 1, then also 1051 * Search for the cfq group current task belongs to. If create = 1, then also
1052 * create the cfq group if it does not exist. request_queue lock must be held. 1052 * create the cfq group if it does not exist. request_queue lock must be held.
1053 */ 1053 */
1054 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) 1054 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
1055 { 1055 {
1056 struct cgroup *cgroup; 1056 struct cgroup *cgroup;
1057 struct cfq_group *cfqg = NULL; 1057 struct cfq_group *cfqg = NULL;
1058 1058
1059 rcu_read_lock(); 1059 rcu_read_lock();
1060 cgroup = task_cgroup(current, blkio_subsys_id); 1060 cgroup = task_cgroup(current, blkio_subsys_id);
1061 cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create); 1061 cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create);
1062 if (!cfqg && create) 1062 if (!cfqg && create)
1063 cfqg = &cfqd->root_group; 1063 cfqg = &cfqd->root_group;
1064 rcu_read_unlock(); 1064 rcu_read_unlock();
1065 return cfqg; 1065 return cfqg;
1066 } 1066 }
1067 1067
1068 static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) 1068 static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
1069 { 1069 {
1070 cfqg->ref++; 1070 cfqg->ref++;
1071 return cfqg; 1071 return cfqg;
1072 } 1072 }
1073 1073
1074 static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) 1074 static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
1075 { 1075 {
1076 /* Currently, all async queues are mapped to root group */ 1076 /* Currently, all async queues are mapped to root group */
1077 if (!cfq_cfqq_sync(cfqq)) 1077 if (!cfq_cfqq_sync(cfqq))
1078 cfqg = &cfqq->cfqd->root_group; 1078 cfqg = &cfqq->cfqd->root_group;
1079 1079
1080 cfqq->cfqg = cfqg; 1080 cfqq->cfqg = cfqg;
1081 /* cfqq reference on cfqg */ 1081 /* cfqq reference on cfqg */
1082 cfqq->cfqg->ref++; 1082 cfqq->cfqg->ref++;
1083 } 1083 }
1084 1084
1085 static void cfq_put_cfqg(struct cfq_group *cfqg) 1085 static void cfq_put_cfqg(struct cfq_group *cfqg)
1086 { 1086 {
1087 struct cfq_rb_root *st; 1087 struct cfq_rb_root *st;
1088 int i, j; 1088 int i, j;
1089 1089
1090 BUG_ON(cfqg->ref <= 0); 1090 BUG_ON(cfqg->ref <= 0);
1091 cfqg->ref--; 1091 cfqg->ref--;
1092 if (cfqg->ref) 1092 if (cfqg->ref)
1093 return; 1093 return;
1094 for_each_cfqg_st(cfqg, i, j, st) 1094 for_each_cfqg_st(cfqg, i, j, st)
1095 BUG_ON(!RB_EMPTY_ROOT(&st->rb)); 1095 BUG_ON(!RB_EMPTY_ROOT(&st->rb));
1096 kfree(cfqg); 1096 kfree(cfqg);
1097 } 1097 }
1098 1098
1099 static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) 1099 static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1100 { 1100 {
1101 /* Something wrong if we are trying to remove same group twice */ 1101 /* Something wrong if we are trying to remove same group twice */
1102 BUG_ON(hlist_unhashed(&cfqg->cfqd_node)); 1102 BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
1103 1103
1104 hlist_del_init(&cfqg->cfqd_node); 1104 hlist_del_init(&cfqg->cfqd_node);
1105 1105
1106 /* 1106 /*
1107 * Put the reference taken at the time of creation so that when all 1107 * Put the reference taken at the time of creation so that when all
1108 * queues are gone, group can be destroyed. 1108 * queues are gone, group can be destroyed.
1109 */ 1109 */
1110 cfq_put_cfqg(cfqg); 1110 cfq_put_cfqg(cfqg);
1111 } 1111 }
1112 1112
1113 static void cfq_release_cfq_groups(struct cfq_data *cfqd) 1113 static void cfq_release_cfq_groups(struct cfq_data *cfqd)
1114 { 1114 {
1115 struct hlist_node *pos, *n; 1115 struct hlist_node *pos, *n;
1116 struct cfq_group *cfqg; 1116 struct cfq_group *cfqg;
1117 1117
1118 hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) { 1118 hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
1119 /* 1119 /*
1120 * If cgroup removal path got to blk_group first and removed 1120 * If cgroup removal path got to blk_group first and removed
1121 * it from cgroup list, then it will take care of destroying 1121 * it from cgroup list, then it will take care of destroying
1122 * cfqg also. 1122 * cfqg also.
1123 */ 1123 */
1124 if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg)) 1124 if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
1125 cfq_destroy_cfqg(cfqd, cfqg); 1125 cfq_destroy_cfqg(cfqd, cfqg);
1126 } 1126 }
1127 } 1127 }
1128 1128
1129 /* 1129 /*
1130 * Blk cgroup controller notification saying that blkio_group object is being 1130 * Blk cgroup controller notification saying that blkio_group object is being
1131 * delinked as associated cgroup object is going away. That also means that 1131 * delinked as associated cgroup object is going away. That also means that
1132 * no new IO will come in this group. So get rid of this group as soon as 1132 * no new IO will come in this group. So get rid of this group as soon as
1133 * any pending IO in the group is finished. 1133 * any pending IO in the group is finished.
1134 * 1134 *
1135 * This function is called under rcu_read_lock(). key is the rcu protected 1135 * This function is called under rcu_read_lock(). key is the rcu protected
1136 * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu 1136 * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
1137 * read lock. 1137 * read lock.
1138 * 1138 *
1139 * "key" was fetched from blkio_group under blkio_cgroup->lock. That means 1139 * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
1140 * it should not be NULL as even if elevator was exiting, cgroup deltion 1140 * it should not be NULL as even if elevator was exiting, cgroup deltion
1141 * path got to it first. 1141 * path got to it first.
1142 */ 1142 */
1143 void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) 1143 void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
1144 { 1144 {
1145 unsigned long flags; 1145 unsigned long flags;
1146 struct cfq_data *cfqd = key; 1146 struct cfq_data *cfqd = key;
1147 1147
1148 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1148 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1149 cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg)); 1149 cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
1150 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 1150 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1151 } 1151 }
1152 1152
1153 #else /* GROUP_IOSCHED */ 1153 #else /* GROUP_IOSCHED */
1154 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) 1154 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
1155 { 1155 {
1156 return &cfqd->root_group; 1156 return &cfqd->root_group;
1157 } 1157 }
1158 1158
1159 static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) 1159 static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
1160 { 1160 {
1161 return cfqg; 1161 return cfqg;
1162 } 1162 }
1163 1163
1164 static inline void 1164 static inline void
1165 cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { 1165 cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
1166 cfqq->cfqg = cfqg; 1166 cfqq->cfqg = cfqg;
1167 } 1167 }
1168 1168
1169 static void cfq_release_cfq_groups(struct cfq_data *cfqd) {} 1169 static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
1170 static inline void cfq_put_cfqg(struct cfq_group *cfqg) {} 1170 static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
1171 1171
1172 #endif /* GROUP_IOSCHED */ 1172 #endif /* GROUP_IOSCHED */
1173 1173
1174 /* 1174 /*
1175 * The cfqd->service_trees holds all pending cfq_queue's that have 1175 * The cfqd->service_trees holds all pending cfq_queue's that have
1176 * requests waiting to be processed. It is sorted in the order that 1176 * requests waiting to be processed. It is sorted in the order that
1177 * we will service the queues. 1177 * we will service the queues.
1178 */ 1178 */
1179 static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1179 static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1180 bool add_front) 1180 bool add_front)
1181 { 1181 {
1182 struct rb_node **p, *parent; 1182 struct rb_node **p, *parent;
1183 struct cfq_queue *__cfqq; 1183 struct cfq_queue *__cfqq;
1184 unsigned long rb_key; 1184 unsigned long rb_key;
1185 struct cfq_rb_root *service_tree; 1185 struct cfq_rb_root *service_tree;
1186 int left; 1186 int left;
1187 int new_cfqq = 1; 1187 int new_cfqq = 1;
1188 int group_changed = 0; 1188 int group_changed = 0;
1189 1189
1190 #ifdef CONFIG_CFQ_GROUP_IOSCHED 1190 #ifdef CONFIG_CFQ_GROUP_IOSCHED
1191 if (!cfqd->cfq_group_isolation 1191 if (!cfqd->cfq_group_isolation
1192 && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD 1192 && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD
1193 && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) { 1193 && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) {
1194 /* Move this cfq to root group */ 1194 /* Move this cfq to root group */
1195 cfq_log_cfqq(cfqd, cfqq, "moving to root group"); 1195 cfq_log_cfqq(cfqd, cfqq, "moving to root group");
1196 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 1196 if (!RB_EMPTY_NODE(&cfqq->rb_node))
1197 cfq_group_service_tree_del(cfqd, cfqq->cfqg); 1197 cfq_group_service_tree_del(cfqd, cfqq->cfqg);
1198 cfqq->orig_cfqg = cfqq->cfqg; 1198 cfqq->orig_cfqg = cfqq->cfqg;
1199 cfqq->cfqg = &cfqd->root_group; 1199 cfqq->cfqg = &cfqd->root_group;
1200 cfqd->root_group.ref++; 1200 cfqd->root_group.ref++;
1201 group_changed = 1; 1201 group_changed = 1;
1202 } else if (!cfqd->cfq_group_isolation 1202 } else if (!cfqd->cfq_group_isolation
1203 && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { 1203 && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
1204 /* cfqq is sequential now needs to go to its original group */ 1204 /* cfqq is sequential now needs to go to its original group */
1205 BUG_ON(cfqq->cfqg != &cfqd->root_group); 1205 BUG_ON(cfqq->cfqg != &cfqd->root_group);
1206 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 1206 if (!RB_EMPTY_NODE(&cfqq->rb_node))
1207 cfq_group_service_tree_del(cfqd, cfqq->cfqg); 1207 cfq_group_service_tree_del(cfqd, cfqq->cfqg);
1208 cfq_put_cfqg(cfqq->cfqg); 1208 cfq_put_cfqg(cfqq->cfqg);
1209 cfqq->cfqg = cfqq->orig_cfqg; 1209 cfqq->cfqg = cfqq->orig_cfqg;
1210 cfqq->orig_cfqg = NULL; 1210 cfqq->orig_cfqg = NULL;
1211 group_changed = 1; 1211 group_changed = 1;
1212 cfq_log_cfqq(cfqd, cfqq, "moved to origin group"); 1212 cfq_log_cfqq(cfqd, cfqq, "moved to origin group");
1213 } 1213 }
1214 #endif 1214 #endif
1215 1215
1216 service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), 1216 service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
1217 cfqq_type(cfqq)); 1217 cfqq_type(cfqq));
1218 if (cfq_class_idle(cfqq)) { 1218 if (cfq_class_idle(cfqq)) {
1219 rb_key = CFQ_IDLE_DELAY; 1219 rb_key = CFQ_IDLE_DELAY;
1220 parent = rb_last(&service_tree->rb); 1220 parent = rb_last(&service_tree->rb);
1221 if (parent && parent != &cfqq->rb_node) { 1221 if (parent && parent != &cfqq->rb_node) {
1222 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 1222 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
1223 rb_key += __cfqq->rb_key; 1223 rb_key += __cfqq->rb_key;
1224 } else 1224 } else
1225 rb_key += jiffies; 1225 rb_key += jiffies;
1226 } else if (!add_front) { 1226 } else if (!add_front) {
1227 /* 1227 /*
1228 * Get our rb key offset. Subtract any residual slice 1228 * Get our rb key offset. Subtract any residual slice
1229 * value carried from last service. A negative resid 1229 * value carried from last service. A negative resid
1230 * count indicates slice overrun, and this should position 1230 * count indicates slice overrun, and this should position
1231 * the next service time further away in the tree. 1231 * the next service time further away in the tree.
1232 */ 1232 */
1233 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; 1233 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
1234 rb_key -= cfqq->slice_resid; 1234 rb_key -= cfqq->slice_resid;
1235 cfqq->slice_resid = 0; 1235 cfqq->slice_resid = 0;
1236 } else { 1236 } else {
1237 rb_key = -HZ; 1237 rb_key = -HZ;
1238 __cfqq = cfq_rb_first(service_tree); 1238 __cfqq = cfq_rb_first(service_tree);
1239 rb_key += __cfqq ? __cfqq->rb_key : jiffies; 1239 rb_key += __cfqq ? __cfqq->rb_key : jiffies;
1240 } 1240 }
1241 1241
1242 if (!RB_EMPTY_NODE(&cfqq->rb_node)) { 1242 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
1243 new_cfqq = 0; 1243 new_cfqq = 0;
1244 /* 1244 /*
1245 * same position, nothing more to do 1245 * same position, nothing more to do
1246 */ 1246 */
1247 if (rb_key == cfqq->rb_key && 1247 if (rb_key == cfqq->rb_key &&
1248 cfqq->service_tree == service_tree) 1248 cfqq->service_tree == service_tree)
1249 return; 1249 return;
1250 1250
1251 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); 1251 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
1252 cfqq->service_tree = NULL; 1252 cfqq->service_tree = NULL;
1253 } 1253 }
1254 1254
1255 left = 1; 1255 left = 1;
1256 parent = NULL; 1256 parent = NULL;
1257 cfqq->service_tree = service_tree; 1257 cfqq->service_tree = service_tree;
1258 p = &service_tree->rb.rb_node; 1258 p = &service_tree->rb.rb_node;
1259 while (*p) { 1259 while (*p) {
1260 struct rb_node **n; 1260 struct rb_node **n;
1261 1261
1262 parent = *p; 1262 parent = *p;
1263 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 1263 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
1264 1264
1265 /* 1265 /*
1266 * sort by key, that represents service time. 1266 * sort by key, that represents service time.
1267 */ 1267 */
1268 if (time_before(rb_key, __cfqq->rb_key)) 1268 if (time_before(rb_key, __cfqq->rb_key))
1269 n = &(*p)->rb_left; 1269 n = &(*p)->rb_left;
1270 else { 1270 else {
1271 n = &(*p)->rb_right; 1271 n = &(*p)->rb_right;
1272 left = 0; 1272 left = 0;
1273 } 1273 }
1274 1274
1275 p = n; 1275 p = n;
1276 } 1276 }
1277 1277
1278 if (left) 1278 if (left)
1279 service_tree->left = &cfqq->rb_node; 1279 service_tree->left = &cfqq->rb_node;
1280 1280
1281 cfqq->rb_key = rb_key; 1281 cfqq->rb_key = rb_key;
1282 rb_link_node(&cfqq->rb_node, parent, p); 1282 rb_link_node(&cfqq->rb_node, parent, p);
1283 rb_insert_color(&cfqq->rb_node, &service_tree->rb); 1283 rb_insert_color(&cfqq->rb_node, &service_tree->rb);
1284 service_tree->count++; 1284 service_tree->count++;
1285 if ((add_front || !new_cfqq) && !group_changed) 1285 if ((add_front || !new_cfqq) && !group_changed)
1286 return; 1286 return;
1287 cfq_group_service_tree_add(cfqd, cfqq->cfqg); 1287 cfq_group_service_tree_add(cfqd, cfqq->cfqg);
1288 } 1288 }
1289 1289
1290 static struct cfq_queue * 1290 static struct cfq_queue *
1291 cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root, 1291 cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
1292 sector_t sector, struct rb_node **ret_parent, 1292 sector_t sector, struct rb_node **ret_parent,
1293 struct rb_node ***rb_link) 1293 struct rb_node ***rb_link)
1294 { 1294 {
1295 struct rb_node **p, *parent; 1295 struct rb_node **p, *parent;
1296 struct cfq_queue *cfqq = NULL; 1296 struct cfq_queue *cfqq = NULL;
1297 1297
1298 parent = NULL; 1298 parent = NULL;
1299 p = &root->rb_node; 1299 p = &root->rb_node;
1300 while (*p) { 1300 while (*p) {
1301 struct rb_node **n; 1301 struct rb_node **n;
1302 1302
1303 parent = *p; 1303 parent = *p;
1304 cfqq = rb_entry(parent, struct cfq_queue, p_node); 1304 cfqq = rb_entry(parent, struct cfq_queue, p_node);
1305 1305
1306 /* 1306 /*
1307 * Sort strictly based on sector. Smallest to the left, 1307 * Sort strictly based on sector. Smallest to the left,
1308 * largest to the right. 1308 * largest to the right.
1309 */ 1309 */
1310 if (sector > blk_rq_pos(cfqq->next_rq)) 1310 if (sector > blk_rq_pos(cfqq->next_rq))
1311 n = &(*p)->rb_right; 1311 n = &(*p)->rb_right;
1312 else if (sector < blk_rq_pos(cfqq->next_rq)) 1312 else if (sector < blk_rq_pos(cfqq->next_rq))
1313 n = &(*p)->rb_left; 1313 n = &(*p)->rb_left;
1314 else 1314 else
1315 break; 1315 break;
1316 p = n; 1316 p = n;
1317 cfqq = NULL; 1317 cfqq = NULL;
1318 } 1318 }
1319 1319
1320 *ret_parent = parent; 1320 *ret_parent = parent;
1321 if (rb_link) 1321 if (rb_link)
1322 *rb_link = p; 1322 *rb_link = p;
1323 return cfqq; 1323 return cfqq;
1324 } 1324 }
1325 1325
1326 static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1326 static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1327 { 1327 {
1328 struct rb_node **p, *parent; 1328 struct rb_node **p, *parent;
1329 struct cfq_queue *__cfqq; 1329 struct cfq_queue *__cfqq;
1330 1330
1331 if (cfqq->p_root) { 1331 if (cfqq->p_root) {
1332 rb_erase(&cfqq->p_node, cfqq->p_root); 1332 rb_erase(&cfqq->p_node, cfqq->p_root);
1333 cfqq->p_root = NULL; 1333 cfqq->p_root = NULL;
1334 } 1334 }
1335 1335
1336 if (cfq_class_idle(cfqq)) 1336 if (cfq_class_idle(cfqq))
1337 return; 1337 return;
1338 if (!cfqq->next_rq) 1338 if (!cfqq->next_rq)
1339 return; 1339 return;
1340 1340
1341 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio]; 1341 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
1342 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, 1342 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
1343 blk_rq_pos(cfqq->next_rq), &parent, &p); 1343 blk_rq_pos(cfqq->next_rq), &parent, &p);
1344 if (!__cfqq) { 1344 if (!__cfqq) {
1345 rb_link_node(&cfqq->p_node, parent, p); 1345 rb_link_node(&cfqq->p_node, parent, p);
1346 rb_insert_color(&cfqq->p_node, cfqq->p_root); 1346 rb_insert_color(&cfqq->p_node, cfqq->p_root);
1347 } else 1347 } else
1348 cfqq->p_root = NULL; 1348 cfqq->p_root = NULL;
1349 } 1349 }
1350 1350
1351 /* 1351 /*
1352 * Update cfqq's position in the service tree. 1352 * Update cfqq's position in the service tree.
1353 */ 1353 */
1354 static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1354 static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1355 { 1355 {
1356 /* 1356 /*
1357 * Resorting requires the cfqq to be on the RR list already. 1357 * Resorting requires the cfqq to be on the RR list already.
1358 */ 1358 */
1359 if (cfq_cfqq_on_rr(cfqq)) { 1359 if (cfq_cfqq_on_rr(cfqq)) {
1360 cfq_service_tree_add(cfqd, cfqq, 0); 1360 cfq_service_tree_add(cfqd, cfqq, 0);
1361 cfq_prio_tree_add(cfqd, cfqq); 1361 cfq_prio_tree_add(cfqd, cfqq);
1362 } 1362 }
1363 } 1363 }
1364 1364
1365 /* 1365 /*
1366 * add to busy list of queues for service, trying to be fair in ordering 1366 * add to busy list of queues for service, trying to be fair in ordering
1367 * the pending list according to last request service 1367 * the pending list according to last request service
1368 */ 1368 */
1369 static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1369 static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1370 { 1370 {
1371 cfq_log_cfqq(cfqd, cfqq, "add_to_rr"); 1371 cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
1372 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1372 BUG_ON(cfq_cfqq_on_rr(cfqq));
1373 cfq_mark_cfqq_on_rr(cfqq); 1373 cfq_mark_cfqq_on_rr(cfqq);
1374 cfqd->busy_queues++; 1374 cfqd->busy_queues++;
1375 1375
1376 cfq_resort_rr_list(cfqd, cfqq); 1376 cfq_resort_rr_list(cfqd, cfqq);
1377 } 1377 }
1378 1378
1379 /* 1379 /*
1380 * Called when the cfqq no longer has requests pending, remove it from 1380 * Called when the cfqq no longer has requests pending, remove it from
1381 * the service tree. 1381 * the service tree.
1382 */ 1382 */
1383 static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1383 static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1384 { 1384 {
1385 cfq_log_cfqq(cfqd, cfqq, "del_from_rr"); 1385 cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
1386 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 1386 BUG_ON(!cfq_cfqq_on_rr(cfqq));
1387 cfq_clear_cfqq_on_rr(cfqq); 1387 cfq_clear_cfqq_on_rr(cfqq);
1388 1388
1389 if (!RB_EMPTY_NODE(&cfqq->rb_node)) { 1389 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
1390 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); 1390 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
1391 cfqq->service_tree = NULL; 1391 cfqq->service_tree = NULL;
1392 } 1392 }
1393 if (cfqq->p_root) { 1393 if (cfqq->p_root) {
1394 rb_erase(&cfqq->p_node, cfqq->p_root); 1394 rb_erase(&cfqq->p_node, cfqq->p_root);
1395 cfqq->p_root = NULL; 1395 cfqq->p_root = NULL;
1396 } 1396 }
1397 1397
1398 cfq_group_service_tree_del(cfqd, cfqq->cfqg); 1398 cfq_group_service_tree_del(cfqd, cfqq->cfqg);
1399 BUG_ON(!cfqd->busy_queues); 1399 BUG_ON(!cfqd->busy_queues);
1400 cfqd->busy_queues--; 1400 cfqd->busy_queues--;
1401 } 1401 }
1402 1402
1403 /* 1403 /*
1404 * rb tree support functions 1404 * rb tree support functions
1405 */ 1405 */
1406 static void cfq_del_rq_rb(struct request *rq) 1406 static void cfq_del_rq_rb(struct request *rq)
1407 { 1407 {
1408 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1408 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1409 const int sync = rq_is_sync(rq); 1409 const int sync = rq_is_sync(rq);
1410 1410
1411 BUG_ON(!cfqq->queued[sync]); 1411 BUG_ON(!cfqq->queued[sync]);
1412 cfqq->queued[sync]--; 1412 cfqq->queued[sync]--;
1413 1413
1414 elv_rb_del(&cfqq->sort_list, rq); 1414 elv_rb_del(&cfqq->sort_list, rq);
1415 1415
1416 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) { 1416 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) {
1417 /* 1417 /*
1418 * Queue will be deleted from service tree when we actually 1418 * Queue will be deleted from service tree when we actually
1419 * expire it later. Right now just remove it from prio tree 1419 * expire it later. Right now just remove it from prio tree
1420 * as it is empty. 1420 * as it is empty.
1421 */ 1421 */
1422 if (cfqq->p_root) { 1422 if (cfqq->p_root) {
1423 rb_erase(&cfqq->p_node, cfqq->p_root); 1423 rb_erase(&cfqq->p_node, cfqq->p_root);
1424 cfqq->p_root = NULL; 1424 cfqq->p_root = NULL;
1425 } 1425 }
1426 } 1426 }
1427 } 1427 }
1428 1428
1429 static void cfq_add_rq_rb(struct request *rq) 1429 static void cfq_add_rq_rb(struct request *rq)
1430 { 1430 {
1431 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1431 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1432 struct cfq_data *cfqd = cfqq->cfqd; 1432 struct cfq_data *cfqd = cfqq->cfqd;
1433 struct request *__alias, *prev; 1433 struct request *__alias, *prev;
1434 1434
1435 cfqq->queued[rq_is_sync(rq)]++; 1435 cfqq->queued[rq_is_sync(rq)]++;
1436 1436
1437 /* 1437 /*
1438 * looks a little odd, but the first insert might return an alias. 1438 * looks a little odd, but the first insert might return an alias.
1439 * if that happens, put the alias on the dispatch list 1439 * if that happens, put the alias on the dispatch list
1440 */ 1440 */
1441 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) 1441 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
1442 cfq_dispatch_insert(cfqd->queue, __alias); 1442 cfq_dispatch_insert(cfqd->queue, __alias);
1443 1443
1444 if (!cfq_cfqq_on_rr(cfqq)) 1444 if (!cfq_cfqq_on_rr(cfqq))
1445 cfq_add_cfqq_rr(cfqd, cfqq); 1445 cfq_add_cfqq_rr(cfqd, cfqq);
1446 1446
1447 /* 1447 /*
1448 * check if this request is a better next-serve candidate 1448 * check if this request is a better next-serve candidate
1449 */ 1449 */
1450 prev = cfqq->next_rq; 1450 prev = cfqq->next_rq;
1451 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position); 1451 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position);
1452 1452
1453 /* 1453 /*
1454 * adjust priority tree position, if ->next_rq changes 1454 * adjust priority tree position, if ->next_rq changes
1455 */ 1455 */
1456 if (prev != cfqq->next_rq) 1456 if (prev != cfqq->next_rq)
1457 cfq_prio_tree_add(cfqd, cfqq); 1457 cfq_prio_tree_add(cfqd, cfqq);
1458 1458
1459 BUG_ON(!cfqq->next_rq); 1459 BUG_ON(!cfqq->next_rq);
1460 } 1460 }
1461 1461
1462 static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) 1462 static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
1463 { 1463 {
1464 elv_rb_del(&cfqq->sort_list, rq); 1464 elv_rb_del(&cfqq->sort_list, rq);
1465 cfqq->queued[rq_is_sync(rq)]--; 1465 cfqq->queued[rq_is_sync(rq)]--;
1466 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1466 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1467 rq_data_dir(rq), rq_is_sync(rq)); 1467 rq_data_dir(rq), rq_is_sync(rq));
1468 cfq_add_rq_rb(rq); 1468 cfq_add_rq_rb(rq);
1469 cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, 1469 cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
1470 &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq), 1470 &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq),
1471 rq_is_sync(rq)); 1471 rq_is_sync(rq));
1472 } 1472 }
1473 1473
1474 static struct request * 1474 static struct request *
1475 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) 1475 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
1476 { 1476 {
1477 struct task_struct *tsk = current; 1477 struct task_struct *tsk = current;
1478 struct cfq_io_context *cic; 1478 struct cfq_io_context *cic;
1479 struct cfq_queue *cfqq; 1479 struct cfq_queue *cfqq;
1480 1480
1481 cic = cfq_cic_lookup(cfqd, tsk->io_context); 1481 cic = cfq_cic_lookup(cfqd, tsk->io_context);
1482 if (!cic) 1482 if (!cic)
1483 return NULL; 1483 return NULL;
1484 1484
1485 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 1485 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
1486 if (cfqq) { 1486 if (cfqq) {
1487 sector_t sector = bio->bi_sector + bio_sectors(bio); 1487 sector_t sector = bio->bi_sector + bio_sectors(bio);
1488 1488
1489 return elv_rb_find(&cfqq->sort_list, sector); 1489 return elv_rb_find(&cfqq->sort_list, sector);
1490 } 1490 }
1491 1491
1492 return NULL; 1492 return NULL;
1493 } 1493 }
1494 1494
1495 static void cfq_activate_request(struct request_queue *q, struct request *rq) 1495 static void cfq_activate_request(struct request_queue *q, struct request *rq)
1496 { 1496 {
1497 struct cfq_data *cfqd = q->elevator->elevator_data; 1497 struct cfq_data *cfqd = q->elevator->elevator_data;
1498 1498
1499 cfqd->rq_in_driver++; 1499 cfqd->rq_in_driver++;
1500 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", 1500 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
1501 cfqd->rq_in_driver); 1501 cfqd->rq_in_driver);
1502 1502
1503 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); 1503 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
1504 } 1504 }
1505 1505
1506 static void cfq_deactivate_request(struct request_queue *q, struct request *rq) 1506 static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
1507 { 1507 {
1508 struct cfq_data *cfqd = q->elevator->elevator_data; 1508 struct cfq_data *cfqd = q->elevator->elevator_data;
1509 1509
1510 WARN_ON(!cfqd->rq_in_driver); 1510 WARN_ON(!cfqd->rq_in_driver);
1511 cfqd->rq_in_driver--; 1511 cfqd->rq_in_driver--;
1512 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", 1512 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
1513 cfqd->rq_in_driver); 1513 cfqd->rq_in_driver);
1514 } 1514 }
1515 1515
1516 static void cfq_remove_request(struct request *rq) 1516 static void cfq_remove_request(struct request *rq)
1517 { 1517 {
1518 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1518 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1519 1519
1520 if (cfqq->next_rq == rq) 1520 if (cfqq->next_rq == rq)
1521 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq); 1521 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
1522 1522
1523 list_del_init(&rq->queuelist); 1523 list_del_init(&rq->queuelist);
1524 cfq_del_rq_rb(rq); 1524 cfq_del_rq_rb(rq);
1525 1525
1526 cfqq->cfqd->rq_queued--; 1526 cfqq->cfqd->rq_queued--;
1527 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1527 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1528 rq_data_dir(rq), rq_is_sync(rq)); 1528 rq_data_dir(rq), rq_is_sync(rq));
1529 if (rq->cmd_flags & REQ_META) { 1529 if (rq->cmd_flags & REQ_META) {
1530 WARN_ON(!cfqq->meta_pending); 1530 WARN_ON(!cfqq->meta_pending);
1531 cfqq->meta_pending--; 1531 cfqq->meta_pending--;
1532 } 1532 }
1533 } 1533 }
1534 1534
1535 static int cfq_merge(struct request_queue *q, struct request **req, 1535 static int cfq_merge(struct request_queue *q, struct request **req,
1536 struct bio *bio) 1536 struct bio *bio)
1537 { 1537 {
1538 struct cfq_data *cfqd = q->elevator->elevator_data; 1538 struct cfq_data *cfqd = q->elevator->elevator_data;
1539 struct request *__rq; 1539 struct request *__rq;
1540 1540
1541 __rq = cfq_find_rq_fmerge(cfqd, bio); 1541 __rq = cfq_find_rq_fmerge(cfqd, bio);
1542 if (__rq && elv_rq_merge_ok(__rq, bio)) { 1542 if (__rq && elv_rq_merge_ok(__rq, bio)) {
1543 *req = __rq; 1543 *req = __rq;
1544 return ELEVATOR_FRONT_MERGE; 1544 return ELEVATOR_FRONT_MERGE;
1545 } 1545 }
1546 1546
1547 return ELEVATOR_NO_MERGE; 1547 return ELEVATOR_NO_MERGE;
1548 } 1548 }
1549 1549
1550 static void cfq_merged_request(struct request_queue *q, struct request *req, 1550 static void cfq_merged_request(struct request_queue *q, struct request *req,
1551 int type) 1551 int type)
1552 { 1552 {
1553 if (type == ELEVATOR_FRONT_MERGE) { 1553 if (type == ELEVATOR_FRONT_MERGE) {
1554 struct cfq_queue *cfqq = RQ_CFQQ(req); 1554 struct cfq_queue *cfqq = RQ_CFQQ(req);
1555 1555
1556 cfq_reposition_rq_rb(cfqq, req); 1556 cfq_reposition_rq_rb(cfqq, req);
1557 } 1557 }
1558 } 1558 }
1559 1559
1560 static void cfq_bio_merged(struct request_queue *q, struct request *req, 1560 static void cfq_bio_merged(struct request_queue *q, struct request *req,
1561 struct bio *bio) 1561 struct bio *bio)
1562 { 1562 {
1563 cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg, 1563 cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg,
1564 bio_data_dir(bio), cfq_bio_sync(bio)); 1564 bio_data_dir(bio), cfq_bio_sync(bio));
1565 } 1565 }
1566 1566
1567 static void 1567 static void
1568 cfq_merged_requests(struct request_queue *q, struct request *rq, 1568 cfq_merged_requests(struct request_queue *q, struct request *rq,
1569 struct request *next) 1569 struct request *next)
1570 { 1570 {
1571 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1571 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1572 /* 1572 /*
1573 * reposition in fifo if next is older than rq 1573 * reposition in fifo if next is older than rq
1574 */ 1574 */
1575 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 1575 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
1576 time_before(rq_fifo_time(next), rq_fifo_time(rq))) { 1576 time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
1577 list_move(&rq->queuelist, &next->queuelist); 1577 list_move(&rq->queuelist, &next->queuelist);
1578 rq_set_fifo_time(rq, rq_fifo_time(next)); 1578 rq_set_fifo_time(rq, rq_fifo_time(next));
1579 } 1579 }
1580 1580
1581 if (cfqq->next_rq == next) 1581 if (cfqq->next_rq == next)
1582 cfqq->next_rq = rq; 1582 cfqq->next_rq = rq;
1583 cfq_remove_request(next); 1583 cfq_remove_request(next);
1584 cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, 1584 cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
1585 rq_data_dir(next), rq_is_sync(next)); 1585 rq_data_dir(next), rq_is_sync(next));
1586 } 1586 }
1587 1587
1588 static int cfq_allow_merge(struct request_queue *q, struct request *rq, 1588 static int cfq_allow_merge(struct request_queue *q, struct request *rq,
1589 struct bio *bio) 1589 struct bio *bio)
1590 { 1590 {
1591 struct cfq_data *cfqd = q->elevator->elevator_data; 1591 struct cfq_data *cfqd = q->elevator->elevator_data;
1592 struct cfq_io_context *cic; 1592 struct cfq_io_context *cic;
1593 struct cfq_queue *cfqq; 1593 struct cfq_queue *cfqq;
1594 1594
1595 /* 1595 /*
1596 * Disallow merge of a sync bio into an async request. 1596 * Disallow merge of a sync bio into an async request.
1597 */ 1597 */
1598 if (cfq_bio_sync(bio) && !rq_is_sync(rq)) 1598 if (cfq_bio_sync(bio) && !rq_is_sync(rq))
1599 return false; 1599 return false;
1600 1600
1601 /* 1601 /*
1602 * Lookup the cfqq that this bio will be queued with. Allow 1602 * Lookup the cfqq that this bio will be queued with. Allow
1603 * merge only if rq is queued there. 1603 * merge only if rq is queued there.
1604 */ 1604 */
1605 cic = cfq_cic_lookup(cfqd, current->io_context); 1605 cic = cfq_cic_lookup(cfqd, current->io_context);
1606 if (!cic) 1606 if (!cic)
1607 return false; 1607 return false;
1608 1608
1609 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 1609 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
1610 return cfqq == RQ_CFQQ(rq); 1610 return cfqq == RQ_CFQQ(rq);
1611 } 1611 }
1612 1612
1613 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1613 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1614 { 1614 {
1615 del_timer(&cfqd->idle_slice_timer); 1615 del_timer(&cfqd->idle_slice_timer);
1616 cfq_blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg); 1616 cfq_blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg);
1617 } 1617 }
1618 1618
1619 static void __cfq_set_active_queue(struct cfq_data *cfqd, 1619 static void __cfq_set_active_queue(struct cfq_data *cfqd,
1620 struct cfq_queue *cfqq) 1620 struct cfq_queue *cfqq)
1621 { 1621 {
1622 if (cfqq) { 1622 if (cfqq) {
1623 cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", 1623 cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
1624 cfqd->serving_prio, cfqd->serving_type); 1624 cfqd->serving_prio, cfqd->serving_type);
1625 cfq_blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg); 1625 cfq_blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg);
1626 cfqq->slice_start = 0; 1626 cfqq->slice_start = 0;
1627 cfqq->dispatch_start = jiffies; 1627 cfqq->dispatch_start = jiffies;
1628 cfqq->allocated_slice = 0; 1628 cfqq->allocated_slice = 0;
1629 cfqq->slice_end = 0; 1629 cfqq->slice_end = 0;
1630 cfqq->slice_dispatch = 0; 1630 cfqq->slice_dispatch = 0;
1631 cfqq->nr_sectors = 0; 1631 cfqq->nr_sectors = 0;
1632 1632
1633 cfq_clear_cfqq_wait_request(cfqq); 1633 cfq_clear_cfqq_wait_request(cfqq);
1634 cfq_clear_cfqq_must_dispatch(cfqq); 1634 cfq_clear_cfqq_must_dispatch(cfqq);
1635 cfq_clear_cfqq_must_alloc_slice(cfqq); 1635 cfq_clear_cfqq_must_alloc_slice(cfqq);
1636 cfq_clear_cfqq_fifo_expire(cfqq); 1636 cfq_clear_cfqq_fifo_expire(cfqq);
1637 cfq_mark_cfqq_slice_new(cfqq); 1637 cfq_mark_cfqq_slice_new(cfqq);
1638 1638
1639 cfq_del_timer(cfqd, cfqq); 1639 cfq_del_timer(cfqd, cfqq);
1640 } 1640 }
1641 1641
1642 cfqd->active_queue = cfqq; 1642 cfqd->active_queue = cfqq;
1643 } 1643 }
1644 1644
1645 /* 1645 /*
1646 * current cfqq expired its slice (or was too idle), select new one 1646 * current cfqq expired its slice (or was too idle), select new one
1647 */ 1647 */
1648 static void 1648 static void
1649 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1649 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1650 bool timed_out) 1650 bool timed_out)
1651 { 1651 {
1652 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); 1652 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
1653 1653
1654 if (cfq_cfqq_wait_request(cfqq)) 1654 if (cfq_cfqq_wait_request(cfqq))
1655 cfq_del_timer(cfqd, cfqq); 1655 cfq_del_timer(cfqd, cfqq);
1656 1656
1657 cfq_clear_cfqq_wait_request(cfqq); 1657 cfq_clear_cfqq_wait_request(cfqq);
1658 cfq_clear_cfqq_wait_busy(cfqq); 1658 cfq_clear_cfqq_wait_busy(cfqq);
1659 1659
1660 /* 1660 /*
1661 * If this cfqq is shared between multiple processes, check to 1661 * If this cfqq is shared between multiple processes, check to
1662 * make sure that those processes are still issuing I/Os within 1662 * make sure that those processes are still issuing I/Os within
1663 * the mean seek distance. If not, it may be time to break the 1663 * the mean seek distance. If not, it may be time to break the
1664 * queues apart again. 1664 * queues apart again.
1665 */ 1665 */
1666 if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq)) 1666 if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
1667 cfq_mark_cfqq_split_coop(cfqq); 1667 cfq_mark_cfqq_split_coop(cfqq);
1668 1668
1669 /* 1669 /*
1670 * store what was left of this slice, if the queue idled/timed out 1670 * store what was left of this slice, if the queue idled/timed out
1671 */ 1671 */
1672 if (timed_out) { 1672 if (timed_out) {
1673 if (cfq_cfqq_slice_new(cfqq)) 1673 if (cfq_cfqq_slice_new(cfqq))
1674 cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); 1674 cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
1675 else 1675 else
1676 cfqq->slice_resid = cfqq->slice_end - jiffies; 1676 cfqq->slice_resid = cfqq->slice_end - jiffies;
1677 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); 1677 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
1678 } 1678 }
1679 1679
1680 cfq_group_served(cfqd, cfqq->cfqg, cfqq); 1680 cfq_group_served(cfqd, cfqq->cfqg, cfqq);
1681 1681
1682 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) 1682 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
1683 cfq_del_cfqq_rr(cfqd, cfqq); 1683 cfq_del_cfqq_rr(cfqd, cfqq);
1684 1684
1685 cfq_resort_rr_list(cfqd, cfqq); 1685 cfq_resort_rr_list(cfqd, cfqq);
1686 1686
1687 if (cfqq == cfqd->active_queue) 1687 if (cfqq == cfqd->active_queue)
1688 cfqd->active_queue = NULL; 1688 cfqd->active_queue = NULL;
1689 1689
1690 if (cfqd->active_cic) { 1690 if (cfqd->active_cic) {
1691 put_io_context(cfqd->active_cic->ioc); 1691 put_io_context(cfqd->active_cic->ioc);
1692 cfqd->active_cic = NULL; 1692 cfqd->active_cic = NULL;
1693 } 1693 }
1694 } 1694 }
1695 1695
1696 static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) 1696 static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
1697 { 1697 {
1698 struct cfq_queue *cfqq = cfqd->active_queue; 1698 struct cfq_queue *cfqq = cfqd->active_queue;
1699 1699
1700 if (cfqq) 1700 if (cfqq)
1701 __cfq_slice_expired(cfqd, cfqq, timed_out); 1701 __cfq_slice_expired(cfqd, cfqq, timed_out);
1702 } 1702 }
1703 1703
1704 /* 1704 /*
1705 * Get next queue for service. Unless we have a queue preemption, 1705 * Get next queue for service. Unless we have a queue preemption,
1706 * we'll simply select the first cfqq in the service tree. 1706 * we'll simply select the first cfqq in the service tree.
1707 */ 1707 */
1708 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) 1708 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
1709 { 1709 {
1710 struct cfq_rb_root *service_tree = 1710 struct cfq_rb_root *service_tree =
1711 service_tree_for(cfqd->serving_group, cfqd->serving_prio, 1711 service_tree_for(cfqd->serving_group, cfqd->serving_prio,
1712 cfqd->serving_type); 1712 cfqd->serving_type);
1713 1713
1714 if (!cfqd->rq_queued) 1714 if (!cfqd->rq_queued)
1715 return NULL; 1715 return NULL;
1716 1716
1717 /* There is nothing to dispatch */ 1717 /* There is nothing to dispatch */
1718 if (!service_tree) 1718 if (!service_tree)
1719 return NULL; 1719 return NULL;
1720 if (RB_EMPTY_ROOT(&service_tree->rb)) 1720 if (RB_EMPTY_ROOT(&service_tree->rb))
1721 return NULL; 1721 return NULL;
1722 return cfq_rb_first(service_tree); 1722 return cfq_rb_first(service_tree);
1723 } 1723 }
1724 1724
1725 static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) 1725 static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
1726 { 1726 {
1727 struct cfq_group *cfqg; 1727 struct cfq_group *cfqg;
1728 struct cfq_queue *cfqq; 1728 struct cfq_queue *cfqq;
1729 int i, j; 1729 int i, j;
1730 struct cfq_rb_root *st; 1730 struct cfq_rb_root *st;
1731 1731
1732 if (!cfqd->rq_queued) 1732 if (!cfqd->rq_queued)
1733 return NULL; 1733 return NULL;
1734 1734
1735 cfqg = cfq_get_next_cfqg(cfqd); 1735 cfqg = cfq_get_next_cfqg(cfqd);
1736 if (!cfqg) 1736 if (!cfqg)
1737 return NULL; 1737 return NULL;
1738 1738
1739 for_each_cfqg_st(cfqg, i, j, st) 1739 for_each_cfqg_st(cfqg, i, j, st)
1740 if ((cfqq = cfq_rb_first(st)) != NULL) 1740 if ((cfqq = cfq_rb_first(st)) != NULL)
1741 return cfqq; 1741 return cfqq;
1742 return NULL; 1742 return NULL;
1743 } 1743 }
1744 1744
1745 /* 1745 /*
1746 * Get and set a new active queue for service. 1746 * Get and set a new active queue for service.
1747 */ 1747 */
1748 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd, 1748 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
1749 struct cfq_queue *cfqq) 1749 struct cfq_queue *cfqq)
1750 { 1750 {
1751 if (!cfqq) 1751 if (!cfqq)
1752 cfqq = cfq_get_next_queue(cfqd); 1752 cfqq = cfq_get_next_queue(cfqd);
1753 1753
1754 __cfq_set_active_queue(cfqd, cfqq); 1754 __cfq_set_active_queue(cfqd, cfqq);
1755 return cfqq; 1755 return cfqq;
1756 } 1756 }
1757 1757
1758 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, 1758 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
1759 struct request *rq) 1759 struct request *rq)
1760 { 1760 {
1761 if (blk_rq_pos(rq) >= cfqd->last_position) 1761 if (blk_rq_pos(rq) >= cfqd->last_position)
1762 return blk_rq_pos(rq) - cfqd->last_position; 1762 return blk_rq_pos(rq) - cfqd->last_position;
1763 else 1763 else
1764 return cfqd->last_position - blk_rq_pos(rq); 1764 return cfqd->last_position - blk_rq_pos(rq);
1765 } 1765 }
1766 1766
1767 static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1767 static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1768 struct request *rq) 1768 struct request *rq)
1769 { 1769 {
1770 return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR; 1770 return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
1771 } 1771 }
1772 1772
1773 static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, 1773 static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
1774 struct cfq_queue *cur_cfqq) 1774 struct cfq_queue *cur_cfqq)
1775 { 1775 {
1776 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio]; 1776 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio];
1777 struct rb_node *parent, *node; 1777 struct rb_node *parent, *node;
1778 struct cfq_queue *__cfqq; 1778 struct cfq_queue *__cfqq;
1779 sector_t sector = cfqd->last_position; 1779 sector_t sector = cfqd->last_position;
1780 1780
1781 if (RB_EMPTY_ROOT(root)) 1781 if (RB_EMPTY_ROOT(root))
1782 return NULL; 1782 return NULL;
1783 1783
1784 /* 1784 /*
1785 * First, if we find a request starting at the end of the last 1785 * First, if we find a request starting at the end of the last
1786 * request, choose it. 1786 * request, choose it.
1787 */ 1787 */
1788 __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL); 1788 __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
1789 if (__cfqq) 1789 if (__cfqq)
1790 return __cfqq; 1790 return __cfqq;
1791 1791
1792 /* 1792 /*
1793 * If the exact sector wasn't found, the parent of the NULL leaf 1793 * If the exact sector wasn't found, the parent of the NULL leaf
1794 * will contain the closest sector. 1794 * will contain the closest sector.
1795 */ 1795 */
1796 __cfqq = rb_entry(parent, struct cfq_queue, p_node); 1796 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
1797 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) 1797 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
1798 return __cfqq; 1798 return __cfqq;
1799 1799
1800 if (blk_rq_pos(__cfqq->next_rq) < sector) 1800 if (blk_rq_pos(__cfqq->next_rq) < sector)
1801 node = rb_next(&__cfqq->p_node); 1801 node = rb_next(&__cfqq->p_node);
1802 else 1802 else
1803 node = rb_prev(&__cfqq->p_node); 1803 node = rb_prev(&__cfqq->p_node);
1804 if (!node) 1804 if (!node)
1805 return NULL; 1805 return NULL;
1806 1806
1807 __cfqq = rb_entry(node, struct cfq_queue, p_node); 1807 __cfqq = rb_entry(node, struct cfq_queue, p_node);
1808 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) 1808 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
1809 return __cfqq; 1809 return __cfqq;
1810 1810
1811 return NULL; 1811 return NULL;
1812 } 1812 }
1813 1813
1814 /* 1814 /*
1815 * cfqd - obvious 1815 * cfqd - obvious
1816 * cur_cfqq - passed in so that we don't decide that the current queue is 1816 * cur_cfqq - passed in so that we don't decide that the current queue is
1817 * closely cooperating with itself. 1817 * closely cooperating with itself.
1818 * 1818 *
1819 * So, basically we're assuming that that cur_cfqq has dispatched at least 1819 * So, basically we're assuming that that cur_cfqq has dispatched at least
1820 * one request, and that cfqd->last_position reflects a position on the disk 1820 * one request, and that cfqd->last_position reflects a position on the disk
1821 * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid 1821 * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid
1822 * assumption. 1822 * assumption.
1823 */ 1823 */
1824 static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, 1824 static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1825 struct cfq_queue *cur_cfqq) 1825 struct cfq_queue *cur_cfqq)
1826 { 1826 {
1827 struct cfq_queue *cfqq; 1827 struct cfq_queue *cfqq;
1828 1828
1829 if (cfq_class_idle(cur_cfqq)) 1829 if (cfq_class_idle(cur_cfqq))
1830 return NULL; 1830 return NULL;
1831 if (!cfq_cfqq_sync(cur_cfqq)) 1831 if (!cfq_cfqq_sync(cur_cfqq))
1832 return NULL; 1832 return NULL;
1833 if (CFQQ_SEEKY(cur_cfqq)) 1833 if (CFQQ_SEEKY(cur_cfqq))
1834 return NULL; 1834 return NULL;
1835 1835
1836 /* 1836 /*
1837 * Don't search priority tree if it's the only queue in the group. 1837 * Don't search priority tree if it's the only queue in the group.
1838 */ 1838 */
1839 if (cur_cfqq->cfqg->nr_cfqq == 1) 1839 if (cur_cfqq->cfqg->nr_cfqq == 1)
1840 return NULL; 1840 return NULL;
1841 1841
1842 /* 1842 /*
1843 * We should notice if some of the queues are cooperating, eg 1843 * We should notice if some of the queues are cooperating, eg
1844 * working closely on the same area of the disk. In that case, 1844 * working closely on the same area of the disk. In that case,
1845 * we can group them together and don't waste time idling. 1845 * we can group them together and don't waste time idling.
1846 */ 1846 */
1847 cfqq = cfqq_close(cfqd, cur_cfqq); 1847 cfqq = cfqq_close(cfqd, cur_cfqq);
1848 if (!cfqq) 1848 if (!cfqq)
1849 return NULL; 1849 return NULL;
1850 1850
1851 /* If new queue belongs to different cfq_group, don't choose it */ 1851 /* If new queue belongs to different cfq_group, don't choose it */
1852 if (cur_cfqq->cfqg != cfqq->cfqg) 1852 if (cur_cfqq->cfqg != cfqq->cfqg)
1853 return NULL; 1853 return NULL;
1854 1854
1855 /* 1855 /*
1856 * It only makes sense to merge sync queues. 1856 * It only makes sense to merge sync queues.
1857 */ 1857 */
1858 if (!cfq_cfqq_sync(cfqq)) 1858 if (!cfq_cfqq_sync(cfqq))
1859 return NULL; 1859 return NULL;
1860 if (CFQQ_SEEKY(cfqq)) 1860 if (CFQQ_SEEKY(cfqq))
1861 return NULL; 1861 return NULL;
1862 1862
1863 /* 1863 /*
1864 * Do not merge queues of different priority classes 1864 * Do not merge queues of different priority classes
1865 */ 1865 */
1866 if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq)) 1866 if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq))
1867 return NULL; 1867 return NULL;
1868 1868
1869 return cfqq; 1869 return cfqq;
1870 } 1870 }
1871 1871
1872 /* 1872 /*
1873 * Determine whether we should enforce idle window for this queue. 1873 * Determine whether we should enforce idle window for this queue.
1874 */ 1874 */
1875 1875
1876 static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1876 static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1877 { 1877 {
1878 enum wl_prio_t prio = cfqq_prio(cfqq); 1878 enum wl_prio_t prio = cfqq_prio(cfqq);
1879 struct cfq_rb_root *service_tree = cfqq->service_tree; 1879 struct cfq_rb_root *service_tree = cfqq->service_tree;
1880 1880
1881 BUG_ON(!service_tree); 1881 BUG_ON(!service_tree);
1882 BUG_ON(!service_tree->count); 1882 BUG_ON(!service_tree->count);
1883 1883
1884 if (!cfqd->cfq_slice_idle) 1884 if (!cfqd->cfq_slice_idle)
1885 return false; 1885 return false;
1886 1886
1887 /* We never do for idle class queues. */ 1887 /* We never do for idle class queues. */
1888 if (prio == IDLE_WORKLOAD) 1888 if (prio == IDLE_WORKLOAD)
1889 return false; 1889 return false;
1890 1890
1891 /* We do for queues that were marked with idle window flag. */ 1891 /* We do for queues that were marked with idle window flag. */
1892 if (cfq_cfqq_idle_window(cfqq) && 1892 if (cfq_cfqq_idle_window(cfqq) &&
1893 !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)) 1893 !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag))
1894 return true; 1894 return true;
1895 1895
1896 /* 1896 /*
1897 * Otherwise, we do only if they are the last ones 1897 * Otherwise, we do only if they are the last ones
1898 * in their service tree. 1898 * in their service tree.
1899 */ 1899 */
1900 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) 1900 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
1901 return true; 1901 return true;
1902 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", 1902 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
1903 service_tree->count); 1903 service_tree->count);
1904 return false; 1904 return false;
1905 } 1905 }
1906 1906
1907 static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1907 static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1908 { 1908 {
1909 struct cfq_queue *cfqq = cfqd->active_queue; 1909 struct cfq_queue *cfqq = cfqd->active_queue;
1910 struct cfq_io_context *cic; 1910 struct cfq_io_context *cic;
1911 unsigned long sl, group_idle = 0; 1911 unsigned long sl, group_idle = 0;
1912 1912
1913 /* 1913 /*
1914 * SSD device without seek penalty, disable idling. But only do so 1914 * SSD device without seek penalty, disable idling. But only do so
1915 * for devices that support queuing, otherwise we still have a problem 1915 * for devices that support queuing, otherwise we still have a problem
1916 * with sync vs async workloads. 1916 * with sync vs async workloads.
1917 */ 1917 */
1918 if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) 1918 if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
1919 return; 1919 return;
1920 1920
1921 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); 1921 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
1922 WARN_ON(cfq_cfqq_slice_new(cfqq)); 1922 WARN_ON(cfq_cfqq_slice_new(cfqq));
1923 1923
1924 /* 1924 /*
1925 * idle is disabled, either manually or by past process history 1925 * idle is disabled, either manually or by past process history
1926 */ 1926 */
1927 if (!cfq_should_idle(cfqd, cfqq)) { 1927 if (!cfq_should_idle(cfqd, cfqq)) {
1928 /* no queue idling. Check for group idling */ 1928 /* no queue idling. Check for group idling */
1929 if (cfqd->cfq_group_idle) 1929 if (cfqd->cfq_group_idle)
1930 group_idle = cfqd->cfq_group_idle; 1930 group_idle = cfqd->cfq_group_idle;
1931 else 1931 else
1932 return; 1932 return;
1933 } 1933 }
1934 1934
1935 /* 1935 /*
1936 * still active requests from this queue, don't idle 1936 * still active requests from this queue, don't idle
1937 */ 1937 */
1938 if (cfqq->dispatched) 1938 if (cfqq->dispatched)
1939 return; 1939 return;
1940 1940
1941 /* 1941 /*
1942 * task has exited, don't wait 1942 * task has exited, don't wait
1943 */ 1943 */
1944 cic = cfqd->active_cic; 1944 cic = cfqd->active_cic;
1945 if (!cic || !atomic_read(&cic->ioc->nr_tasks)) 1945 if (!cic || !atomic_read(&cic->ioc->nr_tasks))
1946 return; 1946 return;
1947 1947
1948 /* 1948 /*
1949 * If our average think time is larger than the remaining time 1949 * If our average think time is larger than the remaining time
1950 * slice, then don't idle. This avoids overrunning the allotted 1950 * slice, then don't idle. This avoids overrunning the allotted
1951 * time slice. 1951 * time slice.
1952 */ 1952 */
1953 if (sample_valid(cic->ttime_samples) && 1953 if (sample_valid(cic->ttime_samples) &&
1954 (cfqq->slice_end - jiffies < cic->ttime_mean)) { 1954 (cfqq->slice_end - jiffies < cic->ttime_mean)) {
1955 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d", 1955 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
1956 cic->ttime_mean); 1956 cic->ttime_mean);
1957 return; 1957 return;
1958 } 1958 }
1959 1959
1960 /* There are other queues in the group, don't do group idle */ 1960 /* There are other queues in the group, don't do group idle */
1961 if (group_idle && cfqq->cfqg->nr_cfqq > 1) 1961 if (group_idle && cfqq->cfqg->nr_cfqq > 1)
1962 return; 1962 return;
1963 1963
1964 cfq_mark_cfqq_wait_request(cfqq); 1964 cfq_mark_cfqq_wait_request(cfqq);
1965 1965
1966 if (group_idle) 1966 if (group_idle)
1967 sl = cfqd->cfq_group_idle; 1967 sl = cfqd->cfq_group_idle;
1968 else 1968 else
1969 sl = cfqd->cfq_slice_idle; 1969 sl = cfqd->cfq_slice_idle;
1970 1970
1971 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 1971 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
1972 cfq_blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg); 1972 cfq_blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg);
1973 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl, 1973 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
1974 group_idle ? 1 : 0); 1974 group_idle ? 1 : 0);
1975 } 1975 }
1976 1976
1977 /* 1977 /*
1978 * Move request from internal lists to the request queue dispatch list. 1978 * Move request from internal lists to the request queue dispatch list.
1979 */ 1979 */
1980 static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) 1980 static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
1981 { 1981 {
1982 struct cfq_data *cfqd = q->elevator->elevator_data; 1982 struct cfq_data *cfqd = q->elevator->elevator_data;
1983 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1983 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1984 1984
1985 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert"); 1985 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
1986 1986
1987 cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq); 1987 cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
1988 cfq_remove_request(rq); 1988 cfq_remove_request(rq);
1989 cfqq->dispatched++; 1989 cfqq->dispatched++;
1990 (RQ_CFQG(rq))->dispatched++; 1990 (RQ_CFQG(rq))->dispatched++;
1991 elv_dispatch_sort(q, rq); 1991 elv_dispatch_sort(q, rq);
1992 1992
1993 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; 1993 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
1994 cfqq->nr_sectors += blk_rq_sectors(rq); 1994 cfqq->nr_sectors += blk_rq_sectors(rq);
1995 cfq_blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq), 1995 cfq_blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
1996 rq_data_dir(rq), rq_is_sync(rq)); 1996 rq_data_dir(rq), rq_is_sync(rq));
1997 } 1997 }
1998 1998
1999 /* 1999 /*
2000 * return expired entry, or NULL to just start from scratch in rbtree 2000 * return expired entry, or NULL to just start from scratch in rbtree
2001 */ 2001 */
2002 static struct request *cfq_check_fifo(struct cfq_queue *cfqq) 2002 static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
2003 { 2003 {
2004 struct request *rq = NULL; 2004 struct request *rq = NULL;
2005 2005
2006 if (cfq_cfqq_fifo_expire(cfqq)) 2006 if (cfq_cfqq_fifo_expire(cfqq))
2007 return NULL; 2007 return NULL;
2008 2008
2009 cfq_mark_cfqq_fifo_expire(cfqq); 2009 cfq_mark_cfqq_fifo_expire(cfqq);
2010 2010
2011 if (list_empty(&cfqq->fifo)) 2011 if (list_empty(&cfqq->fifo))
2012 return NULL; 2012 return NULL;
2013 2013
2014 rq = rq_entry_fifo(cfqq->fifo.next); 2014 rq = rq_entry_fifo(cfqq->fifo.next);
2015 if (time_before(jiffies, rq_fifo_time(rq))) 2015 if (time_before(jiffies, rq_fifo_time(rq)))
2016 rq = NULL; 2016 rq = NULL;
2017 2017
2018 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); 2018 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
2019 return rq; 2019 return rq;
2020 } 2020 }
2021 2021
2022 static inline int 2022 static inline int
2023 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2023 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2024 { 2024 {
2025 const int base_rq = cfqd->cfq_slice_async_rq; 2025 const int base_rq = cfqd->cfq_slice_async_rq;
2026 2026
2027 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 2027 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
2028 2028
2029 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 2029 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
2030 } 2030 }
2031 2031
2032 /* 2032 /*
2033 * Must be called with the queue_lock held. 2033 * Must be called with the queue_lock held.
2034 */ 2034 */
2035 static int cfqq_process_refs(struct cfq_queue *cfqq) 2035 static int cfqq_process_refs(struct cfq_queue *cfqq)
2036 { 2036 {
2037 int process_refs, io_refs; 2037 int process_refs, io_refs;
2038 2038
2039 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; 2039 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
2040 process_refs = cfqq->ref - io_refs; 2040 process_refs = cfqq->ref - io_refs;
2041 BUG_ON(process_refs < 0); 2041 BUG_ON(process_refs < 0);
2042 return process_refs; 2042 return process_refs;
2043 } 2043 }
2044 2044
2045 static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) 2045 static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
2046 { 2046 {
2047 int process_refs, new_process_refs; 2047 int process_refs, new_process_refs;
2048 struct cfq_queue *__cfqq; 2048 struct cfq_queue *__cfqq;
2049 2049
2050 /* 2050 /*
2051 * If there are no process references on the new_cfqq, then it is 2051 * If there are no process references on the new_cfqq, then it is
2052 * unsafe to follow the ->new_cfqq chain as other cfqq's in the 2052 * unsafe to follow the ->new_cfqq chain as other cfqq's in the
2053 * chain may have dropped their last reference (not just their 2053 * chain may have dropped their last reference (not just their
2054 * last process reference). 2054 * last process reference).
2055 */ 2055 */
2056 if (!cfqq_process_refs(new_cfqq)) 2056 if (!cfqq_process_refs(new_cfqq))
2057 return; 2057 return;
2058 2058
2059 /* Avoid a circular list and skip interim queue merges */ 2059 /* Avoid a circular list and skip interim queue merges */
2060 while ((__cfqq = new_cfqq->new_cfqq)) { 2060 while ((__cfqq = new_cfqq->new_cfqq)) {
2061 if (__cfqq == cfqq) 2061 if (__cfqq == cfqq)
2062 return; 2062 return;
2063 new_cfqq = __cfqq; 2063 new_cfqq = __cfqq;
2064 } 2064 }
2065 2065
2066 process_refs = cfqq_process_refs(cfqq); 2066 process_refs = cfqq_process_refs(cfqq);
2067 new_process_refs = cfqq_process_refs(new_cfqq); 2067 new_process_refs = cfqq_process_refs(new_cfqq);
2068 /* 2068 /*
2069 * If the process for the cfqq has gone away, there is no 2069 * If the process for the cfqq has gone away, there is no
2070 * sense in merging the queues. 2070 * sense in merging the queues.
2071 */ 2071 */
2072 if (process_refs == 0 || new_process_refs == 0) 2072 if (process_refs == 0 || new_process_refs == 0)
2073 return; 2073 return;
2074 2074
2075 /* 2075 /*
2076 * Merge in the direction of the lesser amount of work. 2076 * Merge in the direction of the lesser amount of work.
2077 */ 2077 */
2078 if (new_process_refs >= process_refs) { 2078 if (new_process_refs >= process_refs) {
2079 cfqq->new_cfqq = new_cfqq; 2079 cfqq->new_cfqq = new_cfqq;
2080 new_cfqq->ref += process_refs; 2080 new_cfqq->ref += process_refs;
2081 } else { 2081 } else {
2082 new_cfqq->new_cfqq = cfqq; 2082 new_cfqq->new_cfqq = cfqq;
2083 cfqq->ref += new_process_refs; 2083 cfqq->ref += new_process_refs;
2084 } 2084 }
2085 } 2085 }
2086 2086
2087 static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, 2087 static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
2088 struct cfq_group *cfqg, enum wl_prio_t prio) 2088 struct cfq_group *cfqg, enum wl_prio_t prio)
2089 { 2089 {
2090 struct cfq_queue *queue; 2090 struct cfq_queue *queue;
2091 int i; 2091 int i;
2092 bool key_valid = false; 2092 bool key_valid = false;
2093 unsigned long lowest_key = 0; 2093 unsigned long lowest_key = 0;
2094 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; 2094 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
2095 2095
2096 for (i = 0; i <= SYNC_WORKLOAD; ++i) { 2096 for (i = 0; i <= SYNC_WORKLOAD; ++i) {
2097 /* select the one with lowest rb_key */ 2097 /* select the one with lowest rb_key */
2098 queue = cfq_rb_first(service_tree_for(cfqg, prio, i)); 2098 queue = cfq_rb_first(service_tree_for(cfqg, prio, i));
2099 if (queue && 2099 if (queue &&
2100 (!key_valid || time_before(queue->rb_key, lowest_key))) { 2100 (!key_valid || time_before(queue->rb_key, lowest_key))) {
2101 lowest_key = queue->rb_key; 2101 lowest_key = queue->rb_key;
2102 cur_best = i; 2102 cur_best = i;
2103 key_valid = true; 2103 key_valid = true;
2104 } 2104 }
2105 } 2105 }
2106 2106
2107 return cur_best; 2107 return cur_best;
2108 } 2108 }
2109 2109
2110 static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) 2110 static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2111 { 2111 {
2112 unsigned slice; 2112 unsigned slice;
2113 unsigned count; 2113 unsigned count;
2114 struct cfq_rb_root *st; 2114 struct cfq_rb_root *st;
2115 unsigned group_slice; 2115 unsigned group_slice;
2116 enum wl_prio_t original_prio = cfqd->serving_prio; 2116 enum wl_prio_t original_prio = cfqd->serving_prio;
2117 2117
2118 /* Choose next priority. RT > BE > IDLE */ 2118 /* Choose next priority. RT > BE > IDLE */
2119 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) 2119 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
2120 cfqd->serving_prio = RT_WORKLOAD; 2120 cfqd->serving_prio = RT_WORKLOAD;
2121 else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg)) 2121 else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
2122 cfqd->serving_prio = BE_WORKLOAD; 2122 cfqd->serving_prio = BE_WORKLOAD;
2123 else { 2123 else {
2124 cfqd->serving_prio = IDLE_WORKLOAD; 2124 cfqd->serving_prio = IDLE_WORKLOAD;
2125 cfqd->workload_expires = jiffies + 1; 2125 cfqd->workload_expires = jiffies + 1;
2126 return; 2126 return;
2127 } 2127 }
2128 2128
2129 if (original_prio != cfqd->serving_prio) 2129 if (original_prio != cfqd->serving_prio)
2130 goto new_workload; 2130 goto new_workload;
2131 2131
2132 /* 2132 /*
2133 * For RT and BE, we have to choose also the type 2133 * For RT and BE, we have to choose also the type
2134 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload 2134 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
2135 * expiration time 2135 * expiration time
2136 */ 2136 */
2137 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); 2137 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
2138 count = st->count; 2138 count = st->count;
2139 2139
2140 /* 2140 /*
2141 * check workload expiration, and that we still have other queues ready 2141 * check workload expiration, and that we still have other queues ready
2142 */ 2142 */
2143 if (count && !time_after(jiffies, cfqd->workload_expires)) 2143 if (count && !time_after(jiffies, cfqd->workload_expires))
2144 return; 2144 return;
2145 2145
2146 new_workload: 2146 new_workload:
2147 /* otherwise select new workload type */ 2147 /* otherwise select new workload type */
2148 cfqd->serving_type = 2148 cfqd->serving_type =
2149 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); 2149 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
2150 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); 2150 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
2151 count = st->count; 2151 count = st->count;
2152 2152
2153 /* 2153 /*
2154 * the workload slice is computed as a fraction of target latency 2154 * the workload slice is computed as a fraction of target latency
2155 * proportional to the number of queues in that workload, over 2155 * proportional to the number of queues in that workload, over
2156 * all the queues in the same priority class 2156 * all the queues in the same priority class
2157 */ 2157 */
2158 group_slice = cfq_group_slice(cfqd, cfqg); 2158 group_slice = cfq_group_slice(cfqd, cfqg);
2159 2159
2160 slice = group_slice * count / 2160 slice = group_slice * count /
2161 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio], 2161 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
2162 cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg)); 2162 cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
2163 2163
2164 if (cfqd->serving_type == ASYNC_WORKLOAD) { 2164 if (cfqd->serving_type == ASYNC_WORKLOAD) {
2165 unsigned int tmp; 2165 unsigned int tmp;
2166 2166
2167 /* 2167 /*
2168 * Async queues are currently system wide. Just taking 2168 * Async queues are currently system wide. Just taking
2169 * proportion of queues with-in same group will lead to higher 2169 * proportion of queues with-in same group will lead to higher
2170 * async ratio system wide as generally root group is going 2170 * async ratio system wide as generally root group is going
2171 * to have higher weight. A more accurate thing would be to 2171 * to have higher weight. A more accurate thing would be to
2172 * calculate system wide asnc/sync ratio. 2172 * calculate system wide asnc/sync ratio.
2173 */ 2173 */
2174 tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg); 2174 tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg);
2175 tmp = tmp/cfqd->busy_queues; 2175 tmp = tmp/cfqd->busy_queues;
2176 slice = min_t(unsigned, slice, tmp); 2176 slice = min_t(unsigned, slice, tmp);
2177 2177
2178 /* async workload slice is scaled down according to 2178 /* async workload slice is scaled down according to
2179 * the sync/async slice ratio. */ 2179 * the sync/async slice ratio. */
2180 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]; 2180 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
2181 } else 2181 } else
2182 /* sync workload slice is at least 2 * cfq_slice_idle */ 2182 /* sync workload slice is at least 2 * cfq_slice_idle */
2183 slice = max(slice, 2 * cfqd->cfq_slice_idle); 2183 slice = max(slice, 2 * cfqd->cfq_slice_idle);
2184 2184
2185 slice = max_t(unsigned, slice, CFQ_MIN_TT); 2185 slice = max_t(unsigned, slice, CFQ_MIN_TT);
2186 cfq_log(cfqd, "workload slice:%d", slice); 2186 cfq_log(cfqd, "workload slice:%d", slice);
2187 cfqd->workload_expires = jiffies + slice; 2187 cfqd->workload_expires = jiffies + slice;
2188 } 2188 }
2189 2189
2190 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) 2190 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
2191 { 2191 {
2192 struct cfq_rb_root *st = &cfqd->grp_service_tree; 2192 struct cfq_rb_root *st = &cfqd->grp_service_tree;
2193 struct cfq_group *cfqg; 2193 struct cfq_group *cfqg;
2194 2194
2195 if (RB_EMPTY_ROOT(&st->rb)) 2195 if (RB_EMPTY_ROOT(&st->rb))
2196 return NULL; 2196 return NULL;
2197 cfqg = cfq_rb_first_group(st); 2197 cfqg = cfq_rb_first_group(st);
2198 update_min_vdisktime(st); 2198 update_min_vdisktime(st);
2199 return cfqg; 2199 return cfqg;
2200 } 2200 }
2201 2201
2202 static void cfq_choose_cfqg(struct cfq_data *cfqd) 2202 static void cfq_choose_cfqg(struct cfq_data *cfqd)
2203 { 2203 {
2204 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd); 2204 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
2205 2205
2206 cfqd->serving_group = cfqg; 2206 cfqd->serving_group = cfqg;
2207 2207
2208 /* Restore the workload type data */ 2208 /* Restore the workload type data */
2209 if (cfqg->saved_workload_slice) { 2209 if (cfqg->saved_workload_slice) {
2210 cfqd->workload_expires = jiffies + cfqg->saved_workload_slice; 2210 cfqd->workload_expires = jiffies + cfqg->saved_workload_slice;
2211 cfqd->serving_type = cfqg->saved_workload; 2211 cfqd->serving_type = cfqg->saved_workload;
2212 cfqd->serving_prio = cfqg->saved_serving_prio; 2212 cfqd->serving_prio = cfqg->saved_serving_prio;
2213 } else 2213 } else
2214 cfqd->workload_expires = jiffies - 1; 2214 cfqd->workload_expires = jiffies - 1;
2215 2215
2216 choose_service_tree(cfqd, cfqg); 2216 choose_service_tree(cfqd, cfqg);
2217 } 2217 }
2218 2218
2219 /* 2219 /*
2220 * Select a queue for service. If we have a current active queue, 2220 * Select a queue for service. If we have a current active queue,
2221 * check whether to continue servicing it, or retrieve and set a new one. 2221 * check whether to continue servicing it, or retrieve and set a new one.
2222 */ 2222 */
2223 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 2223 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2224 { 2224 {
2225 struct cfq_queue *cfqq, *new_cfqq = NULL; 2225 struct cfq_queue *cfqq, *new_cfqq = NULL;
2226 2226
2227 cfqq = cfqd->active_queue; 2227 cfqq = cfqd->active_queue;
2228 if (!cfqq) 2228 if (!cfqq)
2229 goto new_queue; 2229 goto new_queue;
2230 2230
2231 if (!cfqd->rq_queued) 2231 if (!cfqd->rq_queued)
2232 return NULL; 2232 return NULL;
2233 2233
2234 /* 2234 /*
2235 * We were waiting for group to get backlogged. Expire the queue 2235 * We were waiting for group to get backlogged. Expire the queue
2236 */ 2236 */
2237 if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list)) 2237 if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list))
2238 goto expire; 2238 goto expire;
2239 2239
2240 /* 2240 /*
2241 * The active queue has run out of time, expire it and select new. 2241 * The active queue has run out of time, expire it and select new.
2242 */ 2242 */
2243 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) { 2243 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) {
2244 /* 2244 /*
2245 * If slice had not expired at the completion of last request 2245 * If slice had not expired at the completion of last request
2246 * we might not have turned on wait_busy flag. Don't expire 2246 * we might not have turned on wait_busy flag. Don't expire
2247 * the queue yet. Allow the group to get backlogged. 2247 * the queue yet. Allow the group to get backlogged.
2248 * 2248 *
2249 * The very fact that we have used the slice, that means we 2249 * The very fact that we have used the slice, that means we
2250 * have been idling all along on this queue and it should be 2250 * have been idling all along on this queue and it should be
2251 * ok to wait for this request to complete. 2251 * ok to wait for this request to complete.
2252 */ 2252 */
2253 if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list) 2253 if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list)
2254 && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { 2254 && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
2255 cfqq = NULL; 2255 cfqq = NULL;
2256 goto keep_queue; 2256 goto keep_queue;
2257 } else 2257 } else
2258 goto check_group_idle; 2258 goto check_group_idle;
2259 } 2259 }
2260 2260
2261 /* 2261 /*
2262 * The active queue has requests and isn't expired, allow it to 2262 * The active queue has requests and isn't expired, allow it to
2263 * dispatch. 2263 * dispatch.
2264 */ 2264 */
2265 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 2265 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
2266 goto keep_queue; 2266 goto keep_queue;
2267 2267
2268 /* 2268 /*
2269 * If another queue has a request waiting within our mean seek 2269 * If another queue has a request waiting within our mean seek
2270 * distance, let it run. The expire code will check for close 2270 * distance, let it run. The expire code will check for close
2271 * cooperators and put the close queue at the front of the service 2271 * cooperators and put the close queue at the front of the service
2272 * tree. If possible, merge the expiring queue with the new cfqq. 2272 * tree. If possible, merge the expiring queue with the new cfqq.
2273 */ 2273 */
2274 new_cfqq = cfq_close_cooperator(cfqd, cfqq); 2274 new_cfqq = cfq_close_cooperator(cfqd, cfqq);
2275 if (new_cfqq) { 2275 if (new_cfqq) {
2276 if (!cfqq->new_cfqq) 2276 if (!cfqq->new_cfqq)
2277 cfq_setup_merge(cfqq, new_cfqq); 2277 cfq_setup_merge(cfqq, new_cfqq);
2278 goto expire; 2278 goto expire;
2279 } 2279 }
2280 2280
2281 /* 2281 /*
2282 * No requests pending. If the active queue still has requests in 2282 * No requests pending. If the active queue still has requests in
2283 * flight or is idling for a new request, allow either of these 2283 * flight or is idling for a new request, allow either of these
2284 * conditions to happen (or time out) before selecting a new queue. 2284 * conditions to happen (or time out) before selecting a new queue.
2285 */ 2285 */
2286 if (timer_pending(&cfqd->idle_slice_timer)) { 2286 if (timer_pending(&cfqd->idle_slice_timer)) {
2287 cfqq = NULL; 2287 cfqq = NULL;
2288 goto keep_queue; 2288 goto keep_queue;
2289 } 2289 }
2290 2290
2291 /* 2291 /*
2292 * This is a deep seek queue, but the device is much faster than 2292 * This is a deep seek queue, but the device is much faster than
2293 * the queue can deliver, don't idle 2293 * the queue can deliver, don't idle
2294 **/ 2294 **/
2295 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && 2295 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
2296 (cfq_cfqq_slice_new(cfqq) || 2296 (cfq_cfqq_slice_new(cfqq) ||
2297 (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { 2297 (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
2298 cfq_clear_cfqq_deep(cfqq); 2298 cfq_clear_cfqq_deep(cfqq);
2299 cfq_clear_cfqq_idle_window(cfqq); 2299 cfq_clear_cfqq_idle_window(cfqq);
2300 } 2300 }
2301 2301
2302 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { 2302 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
2303 cfqq = NULL; 2303 cfqq = NULL;
2304 goto keep_queue; 2304 goto keep_queue;
2305 } 2305 }
2306 2306
2307 /* 2307 /*
2308 * If group idle is enabled and there are requests dispatched from 2308 * If group idle is enabled and there are requests dispatched from
2309 * this group, wait for requests to complete. 2309 * this group, wait for requests to complete.
2310 */ 2310 */
2311 check_group_idle: 2311 check_group_idle:
2312 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 2312 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1
2313 && cfqq->cfqg->dispatched) { 2313 && cfqq->cfqg->dispatched) {
2314 cfqq = NULL; 2314 cfqq = NULL;
2315 goto keep_queue; 2315 goto keep_queue;
2316 } 2316 }
2317 2317
2318 expire: 2318 expire:
2319 cfq_slice_expired(cfqd, 0); 2319 cfq_slice_expired(cfqd, 0);
2320 new_queue: 2320 new_queue:
2321 /* 2321 /*
2322 * Current queue expired. Check if we have to switch to a new 2322 * Current queue expired. Check if we have to switch to a new
2323 * service tree 2323 * service tree
2324 */ 2324 */
2325 if (!new_cfqq) 2325 if (!new_cfqq)
2326 cfq_choose_cfqg(cfqd); 2326 cfq_choose_cfqg(cfqd);
2327 2327
2328 cfqq = cfq_set_active_queue(cfqd, new_cfqq); 2328 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
2329 keep_queue: 2329 keep_queue:
2330 return cfqq; 2330 return cfqq;
2331 } 2331 }
2332 2332
2333 static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) 2333 static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
2334 { 2334 {
2335 int dispatched = 0; 2335 int dispatched = 0;
2336 2336
2337 while (cfqq->next_rq) { 2337 while (cfqq->next_rq) {
2338 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); 2338 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
2339 dispatched++; 2339 dispatched++;
2340 } 2340 }
2341 2341
2342 BUG_ON(!list_empty(&cfqq->fifo)); 2342 BUG_ON(!list_empty(&cfqq->fifo));
2343 2343
2344 /* By default cfqq is not expired if it is empty. Do it explicitly */ 2344 /* By default cfqq is not expired if it is empty. Do it explicitly */
2345 __cfq_slice_expired(cfqq->cfqd, cfqq, 0); 2345 __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
2346 return dispatched; 2346 return dispatched;
2347 } 2347 }
2348 2348
2349 /* 2349 /*
2350 * Drain our current requests. Used for barriers and when switching 2350 * Drain our current requests. Used for barriers and when switching
2351 * io schedulers on-the-fly. 2351 * io schedulers on-the-fly.
2352 */ 2352 */
2353 static int cfq_forced_dispatch(struct cfq_data *cfqd) 2353 static int cfq_forced_dispatch(struct cfq_data *cfqd)
2354 { 2354 {
2355 struct cfq_queue *cfqq; 2355 struct cfq_queue *cfqq;
2356 int dispatched = 0; 2356 int dispatched = 0;
2357 2357
2358 /* Expire the timeslice of the current active queue first */ 2358 /* Expire the timeslice of the current active queue first */
2359 cfq_slice_expired(cfqd, 0); 2359 cfq_slice_expired(cfqd, 0);
2360 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) { 2360 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
2361 __cfq_set_active_queue(cfqd, cfqq); 2361 __cfq_set_active_queue(cfqd, cfqq);
2362 dispatched += __cfq_forced_dispatch_cfqq(cfqq); 2362 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
2363 } 2363 }
2364 2364
2365 BUG_ON(cfqd->busy_queues); 2365 BUG_ON(cfqd->busy_queues);
2366 2366
2367 cfq_log(cfqd, "forced_dispatch=%d", dispatched); 2367 cfq_log(cfqd, "forced_dispatch=%d", dispatched);
2368 return dispatched; 2368 return dispatched;
2369 } 2369 }
2370 2370
2371 static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, 2371 static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
2372 struct cfq_queue *cfqq) 2372 struct cfq_queue *cfqq)
2373 { 2373 {
2374 /* the queue hasn't finished any request, can't estimate */ 2374 /* the queue hasn't finished any request, can't estimate */
2375 if (cfq_cfqq_slice_new(cfqq)) 2375 if (cfq_cfqq_slice_new(cfqq))
2376 return true; 2376 return true;
2377 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, 2377 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
2378 cfqq->slice_end)) 2378 cfqq->slice_end))
2379 return true; 2379 return true;
2380 2380
2381 return false; 2381 return false;
2382 } 2382 }
2383 2383
2384 static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2384 static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2385 { 2385 {
2386 unsigned int max_dispatch; 2386 unsigned int max_dispatch;
2387 2387
2388 /* 2388 /*
2389 * Drain async requests before we start sync IO 2389 * Drain async requests before we start sync IO
2390 */ 2390 */
2391 if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC]) 2391 if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC])
2392 return false; 2392 return false;
2393 2393
2394 /* 2394 /*
2395 * If this is an async queue and we have sync IO in flight, let it wait 2395 * If this is an async queue and we have sync IO in flight, let it wait
2396 */ 2396 */
2397 if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq)) 2397 if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq))
2398 return false; 2398 return false;
2399 2399
2400 max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1); 2400 max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1);
2401 if (cfq_class_idle(cfqq)) 2401 if (cfq_class_idle(cfqq))
2402 max_dispatch = 1; 2402 max_dispatch = 1;
2403 2403
2404 /* 2404 /*
2405 * Does this cfqq already have too much IO in flight? 2405 * Does this cfqq already have too much IO in flight?
2406 */ 2406 */
2407 if (cfqq->dispatched >= max_dispatch) { 2407 if (cfqq->dispatched >= max_dispatch) {
2408 /* 2408 /*
2409 * idle queue must always only have a single IO in flight 2409 * idle queue must always only have a single IO in flight
2410 */ 2410 */
2411 if (cfq_class_idle(cfqq)) 2411 if (cfq_class_idle(cfqq))
2412 return false; 2412 return false;
2413 2413
2414 /* 2414 /*
2415 * We have other queues, don't allow more IO from this one 2415 * We have other queues, don't allow more IO from this one
2416 */ 2416 */
2417 if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq)) 2417 if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq))
2418 return false; 2418 return false;
2419 2419
2420 /* 2420 /*
2421 * Sole queue user, no limit 2421 * Sole queue user, no limit
2422 */ 2422 */
2423 if (cfqd->busy_queues == 1) 2423 if (cfqd->busy_queues == 1)
2424 max_dispatch = -1; 2424 max_dispatch = -1;
2425 else 2425 else
2426 /* 2426 /*
2427 * Normally we start throttling cfqq when cfq_quantum/2 2427 * Normally we start throttling cfqq when cfq_quantum/2
2428 * requests have been dispatched. But we can drive 2428 * requests have been dispatched. But we can drive
2429 * deeper queue depths at the beginning of slice 2429 * deeper queue depths at the beginning of slice
2430 * subjected to upper limit of cfq_quantum. 2430 * subjected to upper limit of cfq_quantum.
2431 * */ 2431 * */
2432 max_dispatch = cfqd->cfq_quantum; 2432 max_dispatch = cfqd->cfq_quantum;
2433 } 2433 }
2434 2434
2435 /* 2435 /*
2436 * Async queues must wait a bit before being allowed dispatch. 2436 * Async queues must wait a bit before being allowed dispatch.
2437 * We also ramp up the dispatch depth gradually for async IO, 2437 * We also ramp up the dispatch depth gradually for async IO,
2438 * based on the last sync IO we serviced 2438 * based on the last sync IO we serviced
2439 */ 2439 */
2440 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { 2440 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
2441 unsigned long last_sync = jiffies - cfqd->last_delayed_sync; 2441 unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
2442 unsigned int depth; 2442 unsigned int depth;
2443 2443
2444 depth = last_sync / cfqd->cfq_slice[1]; 2444 depth = last_sync / cfqd->cfq_slice[1];
2445 if (!depth && !cfqq->dispatched) 2445 if (!depth && !cfqq->dispatched)
2446 depth = 1; 2446 depth = 1;
2447 if (depth < max_dispatch) 2447 if (depth < max_dispatch)
2448 max_dispatch = depth; 2448 max_dispatch = depth;
2449 } 2449 }
2450 2450
2451 /* 2451 /*
2452 * If we're below the current max, allow a dispatch 2452 * If we're below the current max, allow a dispatch
2453 */ 2453 */
2454 return cfqq->dispatched < max_dispatch; 2454 return cfqq->dispatched < max_dispatch;
2455 } 2455 }
2456 2456
2457 /* 2457 /*
2458 * Dispatch a request from cfqq, moving them to the request queue 2458 * Dispatch a request from cfqq, moving them to the request queue
2459 * dispatch list. 2459 * dispatch list.
2460 */ 2460 */
2461 static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2461 static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2462 { 2462 {
2463 struct request *rq; 2463 struct request *rq;
2464 2464
2465 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); 2465 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
2466 2466
2467 if (!cfq_may_dispatch(cfqd, cfqq)) 2467 if (!cfq_may_dispatch(cfqd, cfqq))
2468 return false; 2468 return false;
2469 2469
2470 /* 2470 /*
2471 * follow expired path, else get first next available 2471 * follow expired path, else get first next available
2472 */ 2472 */
2473 rq = cfq_check_fifo(cfqq); 2473 rq = cfq_check_fifo(cfqq);
2474 if (!rq) 2474 if (!rq)
2475 rq = cfqq->next_rq; 2475 rq = cfqq->next_rq;
2476 2476
2477 /* 2477 /*
2478 * insert request into driver dispatch list 2478 * insert request into driver dispatch list
2479 */ 2479 */
2480 cfq_dispatch_insert(cfqd->queue, rq); 2480 cfq_dispatch_insert(cfqd->queue, rq);
2481 2481
2482 if (!cfqd->active_cic) { 2482 if (!cfqd->active_cic) {
2483 struct cfq_io_context *cic = RQ_CIC(rq); 2483 struct cfq_io_context *cic = RQ_CIC(rq);
2484 2484
2485 atomic_long_inc(&cic->ioc->refcount); 2485 atomic_long_inc(&cic->ioc->refcount);
2486 cfqd->active_cic = cic; 2486 cfqd->active_cic = cic;
2487 } 2487 }
2488 2488
2489 return true; 2489 return true;
2490 } 2490 }
2491 2491
2492 /* 2492 /*
2493 * Find the cfqq that we need to service and move a request from that to the 2493 * Find the cfqq that we need to service and move a request from that to the
2494 * dispatch list 2494 * dispatch list
2495 */ 2495 */
2496 static int cfq_dispatch_requests(struct request_queue *q, int force) 2496 static int cfq_dispatch_requests(struct request_queue *q, int force)
2497 { 2497 {
2498 struct cfq_data *cfqd = q->elevator->elevator_data; 2498 struct cfq_data *cfqd = q->elevator->elevator_data;
2499 struct cfq_queue *cfqq; 2499 struct cfq_queue *cfqq;
2500 2500
2501 if (!cfqd->busy_queues) 2501 if (!cfqd->busy_queues)
2502 return 0; 2502 return 0;
2503 2503
2504 if (unlikely(force)) 2504 if (unlikely(force))
2505 return cfq_forced_dispatch(cfqd); 2505 return cfq_forced_dispatch(cfqd);
2506 2506
2507 cfqq = cfq_select_queue(cfqd); 2507 cfqq = cfq_select_queue(cfqd);
2508 if (!cfqq) 2508 if (!cfqq)
2509 return 0; 2509 return 0;
2510 2510
2511 /* 2511 /*
2512 * Dispatch a request from this cfqq, if it is allowed 2512 * Dispatch a request from this cfqq, if it is allowed
2513 */ 2513 */
2514 if (!cfq_dispatch_request(cfqd, cfqq)) 2514 if (!cfq_dispatch_request(cfqd, cfqq))
2515 return 0; 2515 return 0;
2516 2516
2517 cfqq->slice_dispatch++; 2517 cfqq->slice_dispatch++;
2518 cfq_clear_cfqq_must_dispatch(cfqq); 2518 cfq_clear_cfqq_must_dispatch(cfqq);
2519 2519
2520 /* 2520 /*
2521 * expire an async queue immediately if it has used up its slice. idle 2521 * expire an async queue immediately if it has used up its slice. idle
2522 * queue always expire after 1 dispatch round. 2522 * queue always expire after 1 dispatch round.
2523 */ 2523 */
2524 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && 2524 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
2525 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || 2525 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
2526 cfq_class_idle(cfqq))) { 2526 cfq_class_idle(cfqq))) {
2527 cfqq->slice_end = jiffies + 1; 2527 cfqq->slice_end = jiffies + 1;
2528 cfq_slice_expired(cfqd, 0); 2528 cfq_slice_expired(cfqd, 0);
2529 } 2529 }
2530 2530
2531 cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); 2531 cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
2532 return 1; 2532 return 1;
2533 } 2533 }
2534 2534
2535 /* 2535 /*
2536 * task holds one reference to the queue, dropped when task exits. each rq 2536 * task holds one reference to the queue, dropped when task exits. each rq
2537 * in-flight on this queue also holds a reference, dropped when rq is freed. 2537 * in-flight on this queue also holds a reference, dropped when rq is freed.
2538 * 2538 *
2539 * Each cfq queue took a reference on the parent group. Drop it now. 2539 * Each cfq queue took a reference on the parent group. Drop it now.
2540 * queue lock must be held here. 2540 * queue lock must be held here.
2541 */ 2541 */
2542 static void cfq_put_queue(struct cfq_queue *cfqq) 2542 static void cfq_put_queue(struct cfq_queue *cfqq)
2543 { 2543 {
2544 struct cfq_data *cfqd = cfqq->cfqd; 2544 struct cfq_data *cfqd = cfqq->cfqd;
2545 struct cfq_group *cfqg, *orig_cfqg; 2545 struct cfq_group *cfqg, *orig_cfqg;
2546 2546
2547 BUG_ON(cfqq->ref <= 0); 2547 BUG_ON(cfqq->ref <= 0);
2548 2548
2549 cfqq->ref--; 2549 cfqq->ref--;
2550 if (cfqq->ref) 2550 if (cfqq->ref)
2551 return; 2551 return;
2552 2552
2553 cfq_log_cfqq(cfqd, cfqq, "put_queue"); 2553 cfq_log_cfqq(cfqd, cfqq, "put_queue");
2554 BUG_ON(rb_first(&cfqq->sort_list)); 2554 BUG_ON(rb_first(&cfqq->sort_list));
2555 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 2555 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
2556 cfqg = cfqq->cfqg; 2556 cfqg = cfqq->cfqg;
2557 orig_cfqg = cfqq->orig_cfqg; 2557 orig_cfqg = cfqq->orig_cfqg;
2558 2558
2559 if (unlikely(cfqd->active_queue == cfqq)) { 2559 if (unlikely(cfqd->active_queue == cfqq)) {
2560 __cfq_slice_expired(cfqd, cfqq, 0); 2560 __cfq_slice_expired(cfqd, cfqq, 0);
2561 cfq_schedule_dispatch(cfqd); 2561 cfq_schedule_dispatch(cfqd);
2562 } 2562 }
2563 2563
2564 BUG_ON(cfq_cfqq_on_rr(cfqq)); 2564 BUG_ON(cfq_cfqq_on_rr(cfqq));
2565 kmem_cache_free(cfq_pool, cfqq); 2565 kmem_cache_free(cfq_pool, cfqq);
2566 cfq_put_cfqg(cfqg); 2566 cfq_put_cfqg(cfqg);
2567 if (orig_cfqg) 2567 if (orig_cfqg)
2568 cfq_put_cfqg(orig_cfqg); 2568 cfq_put_cfqg(orig_cfqg);
2569 } 2569 }
2570 2570
2571 /* 2571 /*
2572 * Must always be called with the rcu_read_lock() held 2572 * Must always be called with the rcu_read_lock() held
2573 */ 2573 */
2574 static void 2574 static void
2575 __call_for_each_cic(struct io_context *ioc, 2575 __call_for_each_cic(struct io_context *ioc,
2576 void (*func)(struct io_context *, struct cfq_io_context *)) 2576 void (*func)(struct io_context *, struct cfq_io_context *))
2577 { 2577 {
2578 struct cfq_io_context *cic; 2578 struct cfq_io_context *cic;
2579 struct hlist_node *n; 2579 struct hlist_node *n;
2580 2580
2581 hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) 2581 hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
2582 func(ioc, cic); 2582 func(ioc, cic);
2583 } 2583 }
2584 2584
2585 /* 2585 /*
2586 * Call func for each cic attached to this ioc. 2586 * Call func for each cic attached to this ioc.
2587 */ 2587 */
2588 static void 2588 static void
2589 call_for_each_cic(struct io_context *ioc, 2589 call_for_each_cic(struct io_context *ioc,
2590 void (*func)(struct io_context *, struct cfq_io_context *)) 2590 void (*func)(struct io_context *, struct cfq_io_context *))
2591 { 2591 {
2592 rcu_read_lock(); 2592 rcu_read_lock();
2593 __call_for_each_cic(ioc, func); 2593 __call_for_each_cic(ioc, func);
2594 rcu_read_unlock(); 2594 rcu_read_unlock();
2595 } 2595 }
2596 2596
2597 static void cfq_cic_free_rcu(struct rcu_head *head) 2597 static void cfq_cic_free_rcu(struct rcu_head *head)
2598 { 2598 {
2599 struct cfq_io_context *cic; 2599 struct cfq_io_context *cic;
2600 2600
2601 cic = container_of(head, struct cfq_io_context, rcu_head); 2601 cic = container_of(head, struct cfq_io_context, rcu_head);
2602 2602
2603 kmem_cache_free(cfq_ioc_pool, cic); 2603 kmem_cache_free(cfq_ioc_pool, cic);
2604 elv_ioc_count_dec(cfq_ioc_count); 2604 elv_ioc_count_dec(cfq_ioc_count);
2605 2605
2606 if (ioc_gone) { 2606 if (ioc_gone) {
2607 /* 2607 /*
2608 * CFQ scheduler is exiting, grab exit lock and check 2608 * CFQ scheduler is exiting, grab exit lock and check
2609 * the pending io context count. If it hits zero, 2609 * the pending io context count. If it hits zero,
2610 * complete ioc_gone and set it back to NULL 2610 * complete ioc_gone and set it back to NULL
2611 */ 2611 */
2612 spin_lock(&ioc_gone_lock); 2612 spin_lock(&ioc_gone_lock);
2613 if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) { 2613 if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
2614 complete(ioc_gone); 2614 complete(ioc_gone);
2615 ioc_gone = NULL; 2615 ioc_gone = NULL;
2616 } 2616 }
2617 spin_unlock(&ioc_gone_lock); 2617 spin_unlock(&ioc_gone_lock);
2618 } 2618 }
2619 } 2619 }
2620 2620
2621 static void cfq_cic_free(struct cfq_io_context *cic) 2621 static void cfq_cic_free(struct cfq_io_context *cic)
2622 { 2622 {
2623 call_rcu(&cic->rcu_head, cfq_cic_free_rcu); 2623 call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
2624 } 2624 }
2625 2625
2626 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) 2626 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
2627 { 2627 {
2628 unsigned long flags; 2628 unsigned long flags;
2629 unsigned long dead_key = (unsigned long) cic->key; 2629 unsigned long dead_key = (unsigned long) cic->key;
2630 2630
2631 BUG_ON(!(dead_key & CIC_DEAD_KEY)); 2631 BUG_ON(!(dead_key & CIC_DEAD_KEY));
2632 2632
2633 spin_lock_irqsave(&ioc->lock, flags); 2633 spin_lock_irqsave(&ioc->lock, flags);
2634 radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT); 2634 radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
2635 hlist_del_rcu(&cic->cic_list); 2635 hlist_del_rcu(&cic->cic_list);
2636 spin_unlock_irqrestore(&ioc->lock, flags); 2636 spin_unlock_irqrestore(&ioc->lock, flags);
2637 2637
2638 cfq_cic_free(cic); 2638 cfq_cic_free(cic);
2639 } 2639 }
2640 2640
2641 /* 2641 /*
2642 * Must be called with rcu_read_lock() held or preemption otherwise disabled. 2642 * Must be called with rcu_read_lock() held or preemption otherwise disabled.
2643 * Only two callers of this - ->dtor() which is called with the rcu_read_lock(), 2643 * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
2644 * and ->trim() which is called with the task lock held 2644 * and ->trim() which is called with the task lock held
2645 */ 2645 */
2646 static void cfq_free_io_context(struct io_context *ioc) 2646 static void cfq_free_io_context(struct io_context *ioc)
2647 { 2647 {
2648 /* 2648 /*
2649 * ioc->refcount is zero here, or we are called from elv_unregister(), 2649 * ioc->refcount is zero here, or we are called from elv_unregister(),
2650 * so no more cic's are allowed to be linked into this ioc. So it 2650 * so no more cic's are allowed to be linked into this ioc. So it
2651 * should be ok to iterate over the known list, we will see all cic's 2651 * should be ok to iterate over the known list, we will see all cic's
2652 * since no new ones are added. 2652 * since no new ones are added.
2653 */ 2653 */
2654 __call_for_each_cic(ioc, cic_free_func); 2654 __call_for_each_cic(ioc, cic_free_func);
2655 } 2655 }
2656 2656
2657 static void cfq_put_cooperator(struct cfq_queue *cfqq) 2657 static void cfq_put_cooperator(struct cfq_queue *cfqq)
2658 { 2658 {
2659 struct cfq_queue *__cfqq, *next; 2659 struct cfq_queue *__cfqq, *next;
2660 2660
2661 /* 2661 /*
2662 * If this queue was scheduled to merge with another queue, be 2662 * If this queue was scheduled to merge with another queue, be
2663 * sure to drop the reference taken on that queue (and others in 2663 * sure to drop the reference taken on that queue (and others in
2664 * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs. 2664 * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs.
2665 */ 2665 */
2666 __cfqq = cfqq->new_cfqq; 2666 __cfqq = cfqq->new_cfqq;
2667 while (__cfqq) { 2667 while (__cfqq) {
2668 if (__cfqq == cfqq) { 2668 if (__cfqq == cfqq) {
2669 WARN(1, "cfqq->new_cfqq loop detected\n"); 2669 WARN(1, "cfqq->new_cfqq loop detected\n");
2670 break; 2670 break;
2671 } 2671 }
2672 next = __cfqq->new_cfqq; 2672 next = __cfqq->new_cfqq;
2673 cfq_put_queue(__cfqq); 2673 cfq_put_queue(__cfqq);
2674 __cfqq = next; 2674 __cfqq = next;
2675 } 2675 }
2676 } 2676 }
2677 2677
2678 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2678 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2679 { 2679 {
2680 if (unlikely(cfqq == cfqd->active_queue)) { 2680 if (unlikely(cfqq == cfqd->active_queue)) {
2681 __cfq_slice_expired(cfqd, cfqq, 0); 2681 __cfq_slice_expired(cfqd, cfqq, 0);
2682 cfq_schedule_dispatch(cfqd); 2682 cfq_schedule_dispatch(cfqd);
2683 } 2683 }
2684 2684
2685 cfq_put_cooperator(cfqq); 2685 cfq_put_cooperator(cfqq);
2686 2686
2687 cfq_put_queue(cfqq); 2687 cfq_put_queue(cfqq);
2688 } 2688 }
2689 2689
2690 static void __cfq_exit_single_io_context(struct cfq_data *cfqd, 2690 static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
2691 struct cfq_io_context *cic) 2691 struct cfq_io_context *cic)
2692 { 2692 {
2693 struct io_context *ioc = cic->ioc; 2693 struct io_context *ioc = cic->ioc;
2694 2694
2695 list_del_init(&cic->queue_list); 2695 list_del_init(&cic->queue_list);
2696 2696
2697 /* 2697 /*
2698 * Make sure dead mark is seen for dead queues 2698 * Make sure dead mark is seen for dead queues
2699 */ 2699 */
2700 smp_wmb(); 2700 smp_wmb();
2701 cic->key = cfqd_dead_key(cfqd); 2701 cic->key = cfqd_dead_key(cfqd);
2702 2702
2703 if (ioc->ioc_data == cic) 2703 if (ioc->ioc_data == cic)
2704 rcu_assign_pointer(ioc->ioc_data, NULL); 2704 rcu_assign_pointer(ioc->ioc_data, NULL);
2705 2705
2706 if (cic->cfqq[BLK_RW_ASYNC]) { 2706 if (cic->cfqq[BLK_RW_ASYNC]) {
2707 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); 2707 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
2708 cic->cfqq[BLK_RW_ASYNC] = NULL; 2708 cic->cfqq[BLK_RW_ASYNC] = NULL;
2709 } 2709 }
2710 2710
2711 if (cic->cfqq[BLK_RW_SYNC]) { 2711 if (cic->cfqq[BLK_RW_SYNC]) {
2712 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]); 2712 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]);
2713 cic->cfqq[BLK_RW_SYNC] = NULL; 2713 cic->cfqq[BLK_RW_SYNC] = NULL;
2714 } 2714 }
2715 } 2715 }
2716 2716
2717 static void cfq_exit_single_io_context(struct io_context *ioc, 2717 static void cfq_exit_single_io_context(struct io_context *ioc,
2718 struct cfq_io_context *cic) 2718 struct cfq_io_context *cic)
2719 { 2719 {
2720 struct cfq_data *cfqd = cic_to_cfqd(cic); 2720 struct cfq_data *cfqd = cic_to_cfqd(cic);
2721 2721
2722 if (cfqd) { 2722 if (cfqd) {
2723 struct request_queue *q = cfqd->queue; 2723 struct request_queue *q = cfqd->queue;
2724 unsigned long flags; 2724 unsigned long flags;
2725 2725
2726 spin_lock_irqsave(q->queue_lock, flags); 2726 spin_lock_irqsave(q->queue_lock, flags);
2727 2727
2728 /* 2728 /*
2729 * Ensure we get a fresh copy of the ->key to prevent 2729 * Ensure we get a fresh copy of the ->key to prevent
2730 * race between exiting task and queue 2730 * race between exiting task and queue
2731 */ 2731 */
2732 smp_read_barrier_depends(); 2732 smp_read_barrier_depends();
2733 if (cic->key == cfqd) 2733 if (cic->key == cfqd)
2734 __cfq_exit_single_io_context(cfqd, cic); 2734 __cfq_exit_single_io_context(cfqd, cic);
2735 2735
2736 spin_unlock_irqrestore(q->queue_lock, flags); 2736 spin_unlock_irqrestore(q->queue_lock, flags);
2737 } 2737 }
2738 } 2738 }
2739 2739
2740 /* 2740 /*
2741 * The process that ioc belongs to has exited, we need to clean up 2741 * The process that ioc belongs to has exited, we need to clean up
2742 * and put the internal structures we have that belongs to that process. 2742 * and put the internal structures we have that belongs to that process.
2743 */ 2743 */
2744 static void cfq_exit_io_context(struct io_context *ioc) 2744 static void cfq_exit_io_context(struct io_context *ioc)
2745 { 2745 {
2746 call_for_each_cic(ioc, cfq_exit_single_io_context); 2746 call_for_each_cic(ioc, cfq_exit_single_io_context);
2747 } 2747 }
2748 2748
2749 static struct cfq_io_context * 2749 static struct cfq_io_context *
2750 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 2750 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
2751 { 2751 {
2752 struct cfq_io_context *cic; 2752 struct cfq_io_context *cic;
2753 2753
2754 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO, 2754 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
2755 cfqd->queue->node); 2755 cfqd->queue->node);
2756 if (cic) { 2756 if (cic) {
2757 cic->last_end_request = jiffies; 2757 cic->last_end_request = jiffies;
2758 INIT_LIST_HEAD(&cic->queue_list); 2758 INIT_LIST_HEAD(&cic->queue_list);
2759 INIT_HLIST_NODE(&cic->cic_list); 2759 INIT_HLIST_NODE(&cic->cic_list);
2760 cic->dtor = cfq_free_io_context; 2760 cic->dtor = cfq_free_io_context;
2761 cic->exit = cfq_exit_io_context; 2761 cic->exit = cfq_exit_io_context;
2762 elv_ioc_count_inc(cfq_ioc_count); 2762 elv_ioc_count_inc(cfq_ioc_count);
2763 } 2763 }
2764 2764
2765 return cic; 2765 return cic;
2766 } 2766 }
2767 2767
2768 static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) 2768 static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
2769 { 2769 {
2770 struct task_struct *tsk = current; 2770 struct task_struct *tsk = current;
2771 int ioprio_class; 2771 int ioprio_class;
2772 2772
2773 if (!cfq_cfqq_prio_changed(cfqq)) 2773 if (!cfq_cfqq_prio_changed(cfqq))
2774 return; 2774 return;
2775 2775
2776 ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); 2776 ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
2777 switch (ioprio_class) { 2777 switch (ioprio_class) {
2778 default: 2778 default:
2779 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); 2779 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
2780 case IOPRIO_CLASS_NONE: 2780 case IOPRIO_CLASS_NONE:
2781 /* 2781 /*
2782 * no prio set, inherit CPU scheduling settings 2782 * no prio set, inherit CPU scheduling settings
2783 */ 2783 */
2784 cfqq->ioprio = task_nice_ioprio(tsk); 2784 cfqq->ioprio = task_nice_ioprio(tsk);
2785 cfqq->ioprio_class = task_nice_ioclass(tsk); 2785 cfqq->ioprio_class = task_nice_ioclass(tsk);
2786 break; 2786 break;
2787 case IOPRIO_CLASS_RT: 2787 case IOPRIO_CLASS_RT:
2788 cfqq->ioprio = task_ioprio(ioc); 2788 cfqq->ioprio = task_ioprio(ioc);
2789 cfqq->ioprio_class = IOPRIO_CLASS_RT; 2789 cfqq->ioprio_class = IOPRIO_CLASS_RT;
2790 break; 2790 break;
2791 case IOPRIO_CLASS_BE: 2791 case IOPRIO_CLASS_BE:
2792 cfqq->ioprio = task_ioprio(ioc); 2792 cfqq->ioprio = task_ioprio(ioc);
2793 cfqq->ioprio_class = IOPRIO_CLASS_BE; 2793 cfqq->ioprio_class = IOPRIO_CLASS_BE;
2794 break; 2794 break;
2795 case IOPRIO_CLASS_IDLE: 2795 case IOPRIO_CLASS_IDLE:
2796 cfqq->ioprio_class = IOPRIO_CLASS_IDLE; 2796 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
2797 cfqq->ioprio = 7; 2797 cfqq->ioprio = 7;
2798 cfq_clear_cfqq_idle_window(cfqq); 2798 cfq_clear_cfqq_idle_window(cfqq);
2799 break; 2799 break;
2800 } 2800 }
2801 2801
2802 /* 2802 /*
2803 * keep track of original prio settings in case we have to temporarily 2803 * keep track of original prio settings in case we have to temporarily
2804 * elevate the priority of this queue 2804 * elevate the priority of this queue
2805 */ 2805 */
2806 cfqq->org_ioprio = cfqq->ioprio; 2806 cfqq->org_ioprio = cfqq->ioprio;
2807 cfqq->org_ioprio_class = cfqq->ioprio_class; 2807 cfqq->org_ioprio_class = cfqq->ioprio_class;
2808 cfq_clear_cfqq_prio_changed(cfqq); 2808 cfq_clear_cfqq_prio_changed(cfqq);
2809 } 2809 }
2810 2810
2811 static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) 2811 static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
2812 { 2812 {
2813 struct cfq_data *cfqd = cic_to_cfqd(cic); 2813 struct cfq_data *cfqd = cic_to_cfqd(cic);
2814 struct cfq_queue *cfqq; 2814 struct cfq_queue *cfqq;
2815 unsigned long flags; 2815 unsigned long flags;
2816 2816
2817 if (unlikely(!cfqd)) 2817 if (unlikely(!cfqd))
2818 return; 2818 return;
2819 2819
2820 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2820 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2821 2821
2822 cfqq = cic->cfqq[BLK_RW_ASYNC]; 2822 cfqq = cic->cfqq[BLK_RW_ASYNC];
2823 if (cfqq) { 2823 if (cfqq) {
2824 struct cfq_queue *new_cfqq; 2824 struct cfq_queue *new_cfqq;
2825 new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc, 2825 new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
2826 GFP_ATOMIC); 2826 GFP_ATOMIC);
2827 if (new_cfqq) { 2827 if (new_cfqq) {
2828 cic->cfqq[BLK_RW_ASYNC] = new_cfqq; 2828 cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
2829 cfq_put_queue(cfqq); 2829 cfq_put_queue(cfqq);
2830 } 2830 }
2831 } 2831 }
2832 2832
2833 cfqq = cic->cfqq[BLK_RW_SYNC]; 2833 cfqq = cic->cfqq[BLK_RW_SYNC];
2834 if (cfqq) 2834 if (cfqq)
2835 cfq_mark_cfqq_prio_changed(cfqq); 2835 cfq_mark_cfqq_prio_changed(cfqq);
2836 2836
2837 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2837 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2838 } 2838 }
2839 2839
2840 static void cfq_ioc_set_ioprio(struct io_context *ioc) 2840 static void cfq_ioc_set_ioprio(struct io_context *ioc)
2841 { 2841 {
2842 call_for_each_cic(ioc, changed_ioprio); 2842 call_for_each_cic(ioc, changed_ioprio);
2843 ioc->ioprio_changed = 0; 2843 ioc->ioprio_changed = 0;
2844 } 2844 }
2845 2845
2846 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 2846 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2847 pid_t pid, bool is_sync) 2847 pid_t pid, bool is_sync)
2848 { 2848 {
2849 RB_CLEAR_NODE(&cfqq->rb_node); 2849 RB_CLEAR_NODE(&cfqq->rb_node);
2850 RB_CLEAR_NODE(&cfqq->p_node); 2850 RB_CLEAR_NODE(&cfqq->p_node);
2851 INIT_LIST_HEAD(&cfqq->fifo); 2851 INIT_LIST_HEAD(&cfqq->fifo);
2852 2852
2853 cfqq->ref = 0; 2853 cfqq->ref = 0;
2854 cfqq->cfqd = cfqd; 2854 cfqq->cfqd = cfqd;
2855 2855
2856 cfq_mark_cfqq_prio_changed(cfqq); 2856 cfq_mark_cfqq_prio_changed(cfqq);
2857 2857
2858 if (is_sync) { 2858 if (is_sync) {
2859 if (!cfq_class_idle(cfqq)) 2859 if (!cfq_class_idle(cfqq))
2860 cfq_mark_cfqq_idle_window(cfqq); 2860 cfq_mark_cfqq_idle_window(cfqq);
2861 cfq_mark_cfqq_sync(cfqq); 2861 cfq_mark_cfqq_sync(cfqq);
2862 } 2862 }
2863 cfqq->pid = pid; 2863 cfqq->pid = pid;
2864 } 2864 }
2865 2865
2866 #ifdef CONFIG_CFQ_GROUP_IOSCHED 2866 #ifdef CONFIG_CFQ_GROUP_IOSCHED
2867 static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) 2867 static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
2868 { 2868 {
2869 struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); 2869 struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
2870 struct cfq_data *cfqd = cic_to_cfqd(cic); 2870 struct cfq_data *cfqd = cic_to_cfqd(cic);
2871 unsigned long flags; 2871 unsigned long flags;
2872 struct request_queue *q; 2872 struct request_queue *q;
2873 2873
2874 if (unlikely(!cfqd)) 2874 if (unlikely(!cfqd))
2875 return; 2875 return;
2876 2876
2877 q = cfqd->queue; 2877 q = cfqd->queue;
2878 2878
2879 spin_lock_irqsave(q->queue_lock, flags); 2879 spin_lock_irqsave(q->queue_lock, flags);
2880 2880
2881 if (sync_cfqq) { 2881 if (sync_cfqq) {
2882 /* 2882 /*
2883 * Drop reference to sync queue. A new sync queue will be 2883 * Drop reference to sync queue. A new sync queue will be
2884 * assigned in new group upon arrival of a fresh request. 2884 * assigned in new group upon arrival of a fresh request.
2885 */ 2885 */
2886 cfq_log_cfqq(cfqd, sync_cfqq, "changed cgroup"); 2886 cfq_log_cfqq(cfqd, sync_cfqq, "changed cgroup");
2887 cic_set_cfqq(cic, NULL, 1); 2887 cic_set_cfqq(cic, NULL, 1);
2888 cfq_put_queue(sync_cfqq); 2888 cfq_put_queue(sync_cfqq);
2889 } 2889 }
2890 2890
2891 spin_unlock_irqrestore(q->queue_lock, flags); 2891 spin_unlock_irqrestore(q->queue_lock, flags);
2892 } 2892 }
2893 2893
2894 static void cfq_ioc_set_cgroup(struct io_context *ioc) 2894 static void cfq_ioc_set_cgroup(struct io_context *ioc)
2895 { 2895 {
2896 call_for_each_cic(ioc, changed_cgroup); 2896 call_for_each_cic(ioc, changed_cgroup);
2897 ioc->cgroup_changed = 0; 2897 ioc->cgroup_changed = 0;
2898 } 2898 }
2899 #endif /* CONFIG_CFQ_GROUP_IOSCHED */ 2899 #endif /* CONFIG_CFQ_GROUP_IOSCHED */
2900 2900
2901 static struct cfq_queue * 2901 static struct cfq_queue *
2902 cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, 2902 cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
2903 struct io_context *ioc, gfp_t gfp_mask) 2903 struct io_context *ioc, gfp_t gfp_mask)
2904 { 2904 {
2905 struct cfq_queue *cfqq, *new_cfqq = NULL; 2905 struct cfq_queue *cfqq, *new_cfqq = NULL;
2906 struct cfq_io_context *cic; 2906 struct cfq_io_context *cic;
2907 struct cfq_group *cfqg; 2907 struct cfq_group *cfqg;
2908 2908
2909 retry: 2909 retry:
2910 cfqg = cfq_get_cfqg(cfqd, 1); 2910 cfqg = cfq_get_cfqg(cfqd, 1);
2911 cic = cfq_cic_lookup(cfqd, ioc); 2911 cic = cfq_cic_lookup(cfqd, ioc);
2912 /* cic always exists here */ 2912 /* cic always exists here */
2913 cfqq = cic_to_cfqq(cic, is_sync); 2913 cfqq = cic_to_cfqq(cic, is_sync);
2914 2914
2915 /* 2915 /*
2916 * Always try a new alloc if we fell back to the OOM cfqq 2916 * Always try a new alloc if we fell back to the OOM cfqq
2917 * originally, since it should just be a temporary situation. 2917 * originally, since it should just be a temporary situation.
2918 */ 2918 */
2919 if (!cfqq || cfqq == &cfqd->oom_cfqq) { 2919 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
2920 cfqq = NULL; 2920 cfqq = NULL;
2921 if (new_cfqq) { 2921 if (new_cfqq) {
2922 cfqq = new_cfqq; 2922 cfqq = new_cfqq;
2923 new_cfqq = NULL; 2923 new_cfqq = NULL;
2924 } else if (gfp_mask & __GFP_WAIT) { 2924 } else if (gfp_mask & __GFP_WAIT) {
2925 spin_unlock_irq(cfqd->queue->queue_lock); 2925 spin_unlock_irq(cfqd->queue->queue_lock);
2926 new_cfqq = kmem_cache_alloc_node(cfq_pool, 2926 new_cfqq = kmem_cache_alloc_node(cfq_pool,
2927 gfp_mask | __GFP_ZERO, 2927 gfp_mask | __GFP_ZERO,
2928 cfqd->queue->node); 2928 cfqd->queue->node);
2929 spin_lock_irq(cfqd->queue->queue_lock); 2929 spin_lock_irq(cfqd->queue->queue_lock);
2930 if (new_cfqq) 2930 if (new_cfqq)
2931 goto retry; 2931 goto retry;
2932 } else { 2932 } else {
2933 cfqq = kmem_cache_alloc_node(cfq_pool, 2933 cfqq = kmem_cache_alloc_node(cfq_pool,
2934 gfp_mask | __GFP_ZERO, 2934 gfp_mask | __GFP_ZERO,
2935 cfqd->queue->node); 2935 cfqd->queue->node);
2936 } 2936 }
2937 2937
2938 if (cfqq) { 2938 if (cfqq) {
2939 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); 2939 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
2940 cfq_init_prio_data(cfqq, ioc); 2940 cfq_init_prio_data(cfqq, ioc);
2941 cfq_link_cfqq_cfqg(cfqq, cfqg); 2941 cfq_link_cfqq_cfqg(cfqq, cfqg);
2942 cfq_log_cfqq(cfqd, cfqq, "alloced"); 2942 cfq_log_cfqq(cfqd, cfqq, "alloced");
2943 } else 2943 } else
2944 cfqq = &cfqd->oom_cfqq; 2944 cfqq = &cfqd->oom_cfqq;
2945 } 2945 }
2946 2946
2947 if (new_cfqq) 2947 if (new_cfqq)
2948 kmem_cache_free(cfq_pool, new_cfqq); 2948 kmem_cache_free(cfq_pool, new_cfqq);
2949 2949
2950 return cfqq; 2950 return cfqq;
2951 } 2951 }
2952 2952
2953 static struct cfq_queue ** 2953 static struct cfq_queue **
2954 cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) 2954 cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
2955 { 2955 {
2956 switch (ioprio_class) { 2956 switch (ioprio_class) {
2957 case IOPRIO_CLASS_RT: 2957 case IOPRIO_CLASS_RT:
2958 return &cfqd->async_cfqq[0][ioprio]; 2958 return &cfqd->async_cfqq[0][ioprio];
2959 case IOPRIO_CLASS_BE: 2959 case IOPRIO_CLASS_BE:
2960 return &cfqd->async_cfqq[1][ioprio]; 2960 return &cfqd->async_cfqq[1][ioprio];
2961 case IOPRIO_CLASS_IDLE: 2961 case IOPRIO_CLASS_IDLE:
2962 return &cfqd->async_idle_cfqq; 2962 return &cfqd->async_idle_cfqq;
2963 default: 2963 default:
2964 BUG(); 2964 BUG();
2965 } 2965 }
2966 } 2966 }
2967 2967
2968 static struct cfq_queue * 2968 static struct cfq_queue *
2969 cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, 2969 cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
2970 gfp_t gfp_mask) 2970 gfp_t gfp_mask)
2971 { 2971 {
2972 const int ioprio = task_ioprio(ioc); 2972 const int ioprio = task_ioprio(ioc);
2973 const int ioprio_class = task_ioprio_class(ioc); 2973 const int ioprio_class = task_ioprio_class(ioc);
2974 struct cfq_queue **async_cfqq = NULL; 2974 struct cfq_queue **async_cfqq = NULL;
2975 struct cfq_queue *cfqq = NULL; 2975 struct cfq_queue *cfqq = NULL;
2976 2976
2977 if (!is_sync) { 2977 if (!is_sync) {
2978 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); 2978 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
2979 cfqq = *async_cfqq; 2979 cfqq = *async_cfqq;
2980 } 2980 }
2981 2981
2982 if (!cfqq) 2982 if (!cfqq)
2983 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); 2983 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
2984 2984
2985 /* 2985 /*
2986 * pin the queue now that it's allocated, scheduler exit will prune it 2986 * pin the queue now that it's allocated, scheduler exit will prune it
2987 */ 2987 */
2988 if (!is_sync && !(*async_cfqq)) { 2988 if (!is_sync && !(*async_cfqq)) {
2989 cfqq->ref++; 2989 cfqq->ref++;
2990 *async_cfqq = cfqq; 2990 *async_cfqq = cfqq;
2991 } 2991 }
2992 2992
2993 cfqq->ref++; 2993 cfqq->ref++;
2994 return cfqq; 2994 return cfqq;
2995 } 2995 }
2996 2996
2997 /* 2997 /*
2998 * We drop cfq io contexts lazily, so we may find a dead one. 2998 * We drop cfq io contexts lazily, so we may find a dead one.
2999 */ 2999 */
3000 static void 3000 static void
3001 cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, 3001 cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
3002 struct cfq_io_context *cic) 3002 struct cfq_io_context *cic)
3003 { 3003 {
3004 unsigned long flags; 3004 unsigned long flags;
3005 3005
3006 WARN_ON(!list_empty(&cic->queue_list)); 3006 WARN_ON(!list_empty(&cic->queue_list));
3007 BUG_ON(cic->key != cfqd_dead_key(cfqd)); 3007 BUG_ON(cic->key != cfqd_dead_key(cfqd));
3008 3008
3009 spin_lock_irqsave(&ioc->lock, flags); 3009 spin_lock_irqsave(&ioc->lock, flags);
3010 3010
3011 BUG_ON(ioc->ioc_data == cic); 3011 BUG_ON(ioc->ioc_data == cic);
3012 3012
3013 radix_tree_delete(&ioc->radix_root, cfqd->cic_index); 3013 radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
3014 hlist_del_rcu(&cic->cic_list); 3014 hlist_del_rcu(&cic->cic_list);
3015 spin_unlock_irqrestore(&ioc->lock, flags); 3015 spin_unlock_irqrestore(&ioc->lock, flags);
3016 3016
3017 cfq_cic_free(cic); 3017 cfq_cic_free(cic);
3018 } 3018 }
3019 3019
3020 static struct cfq_io_context * 3020 static struct cfq_io_context *
3021 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) 3021 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
3022 { 3022 {
3023 struct cfq_io_context *cic; 3023 struct cfq_io_context *cic;
3024 unsigned long flags; 3024 unsigned long flags;
3025 3025
3026 if (unlikely(!ioc)) 3026 if (unlikely(!ioc))
3027 return NULL; 3027 return NULL;
3028 3028
3029 rcu_read_lock(); 3029 rcu_read_lock();
3030 3030
3031 /* 3031 /*
3032 * we maintain a last-hit cache, to avoid browsing over the tree 3032 * we maintain a last-hit cache, to avoid browsing over the tree
3033 */ 3033 */
3034 cic = rcu_dereference(ioc->ioc_data); 3034 cic = rcu_dereference(ioc->ioc_data);
3035 if (cic && cic->key == cfqd) { 3035 if (cic && cic->key == cfqd) {
3036 rcu_read_unlock(); 3036 rcu_read_unlock();
3037 return cic; 3037 return cic;
3038 } 3038 }
3039 3039
3040 do { 3040 do {
3041 cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index); 3041 cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
3042 rcu_read_unlock(); 3042 rcu_read_unlock();
3043 if (!cic) 3043 if (!cic)
3044 break; 3044 break;
3045 if (unlikely(cic->key != cfqd)) { 3045 if (unlikely(cic->key != cfqd)) {
3046 cfq_drop_dead_cic(cfqd, ioc, cic); 3046 cfq_drop_dead_cic(cfqd, ioc, cic);
3047 rcu_read_lock(); 3047 rcu_read_lock();
3048 continue; 3048 continue;
3049 } 3049 }
3050 3050
3051 spin_lock_irqsave(&ioc->lock, flags); 3051 spin_lock_irqsave(&ioc->lock, flags);
3052 rcu_assign_pointer(ioc->ioc_data, cic); 3052 rcu_assign_pointer(ioc->ioc_data, cic);
3053 spin_unlock_irqrestore(&ioc->lock, flags); 3053 spin_unlock_irqrestore(&ioc->lock, flags);
3054 break; 3054 break;
3055 } while (1); 3055 } while (1);
3056 3056
3057 return cic; 3057 return cic;
3058 } 3058 }
3059 3059
3060 /* 3060 /*
3061 * Add cic into ioc, using cfqd as the search key. This enables us to lookup 3061 * Add cic into ioc, using cfqd as the search key. This enables us to lookup
3062 * the process specific cfq io context when entered from the block layer. 3062 * the process specific cfq io context when entered from the block layer.
3063 * Also adds the cic to a per-cfqd list, used when this queue is removed. 3063 * Also adds the cic to a per-cfqd list, used when this queue is removed.
3064 */ 3064 */
3065 static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, 3065 static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
3066 struct cfq_io_context *cic, gfp_t gfp_mask) 3066 struct cfq_io_context *cic, gfp_t gfp_mask)
3067 { 3067 {
3068 unsigned long flags; 3068 unsigned long flags;
3069 int ret; 3069 int ret;
3070 3070
3071 ret = radix_tree_preload(gfp_mask); 3071 ret = radix_tree_preload(gfp_mask);
3072 if (!ret) { 3072 if (!ret) {
3073 cic->ioc = ioc; 3073 cic->ioc = ioc;
3074 cic->key = cfqd; 3074 cic->key = cfqd;
3075 3075
3076 spin_lock_irqsave(&ioc->lock, flags); 3076 spin_lock_irqsave(&ioc->lock, flags);
3077 ret = radix_tree_insert(&ioc->radix_root, 3077 ret = radix_tree_insert(&ioc->radix_root,
3078 cfqd->cic_index, cic); 3078 cfqd->cic_index, cic);
3079 if (!ret) 3079 if (!ret)
3080 hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); 3080 hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
3081 spin_unlock_irqrestore(&ioc->lock, flags); 3081 spin_unlock_irqrestore(&ioc->lock, flags);
3082 3082
3083 radix_tree_preload_end(); 3083 radix_tree_preload_end();
3084 3084
3085 if (!ret) { 3085 if (!ret) {
3086 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 3086 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
3087 list_add(&cic->queue_list, &cfqd->cic_list); 3087 list_add(&cic->queue_list, &cfqd->cic_list);
3088 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 3088 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
3089 } 3089 }
3090 } 3090 }
3091 3091
3092 if (ret) 3092 if (ret)
3093 printk(KERN_ERR "cfq: cic link failed!\n"); 3093 printk(KERN_ERR "cfq: cic link failed!\n");
3094 3094
3095 return ret; 3095 return ret;
3096 } 3096 }
3097 3097
3098 /* 3098 /*
3099 * Setup general io context and cfq io context. There can be several cfq 3099 * Setup general io context and cfq io context. There can be several cfq
3100 * io contexts per general io context, if this process is doing io to more 3100 * io contexts per general io context, if this process is doing io to more
3101 * than one device managed by cfq. 3101 * than one device managed by cfq.
3102 */ 3102 */
3103 static struct cfq_io_context * 3103 static struct cfq_io_context *
3104 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 3104 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
3105 { 3105 {
3106 struct io_context *ioc = NULL; 3106 struct io_context *ioc = NULL;
3107 struct cfq_io_context *cic; 3107 struct cfq_io_context *cic;
3108 3108
3109 might_sleep_if(gfp_mask & __GFP_WAIT); 3109 might_sleep_if(gfp_mask & __GFP_WAIT);
3110 3110
3111 ioc = get_io_context(gfp_mask, cfqd->queue->node); 3111 ioc = get_io_context(gfp_mask, cfqd->queue->node);
3112 if (!ioc) 3112 if (!ioc)
3113 return NULL; 3113 return NULL;
3114 3114
3115 cic = cfq_cic_lookup(cfqd, ioc); 3115 cic = cfq_cic_lookup(cfqd, ioc);
3116 if (cic) 3116 if (cic)
3117 goto out; 3117 goto out;
3118 3118
3119 cic = cfq_alloc_io_context(cfqd, gfp_mask); 3119 cic = cfq_alloc_io_context(cfqd, gfp_mask);
3120 if (cic == NULL) 3120 if (cic == NULL)
3121 goto err; 3121 goto err;
3122 3122
3123 if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) 3123 if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
3124 goto err_free; 3124 goto err_free;
3125 3125
3126 out: 3126 out:
3127 smp_read_barrier_depends(); 3127 smp_read_barrier_depends();
3128 if (unlikely(ioc->ioprio_changed)) 3128 if (unlikely(ioc->ioprio_changed))
3129 cfq_ioc_set_ioprio(ioc); 3129 cfq_ioc_set_ioprio(ioc);
3130 3130
3131 #ifdef CONFIG_CFQ_GROUP_IOSCHED 3131 #ifdef CONFIG_CFQ_GROUP_IOSCHED
3132 if (unlikely(ioc->cgroup_changed)) 3132 if (unlikely(ioc->cgroup_changed))
3133 cfq_ioc_set_cgroup(ioc); 3133 cfq_ioc_set_cgroup(ioc);
3134 #endif 3134 #endif
3135 return cic; 3135 return cic;
3136 err_free: 3136 err_free:
3137 cfq_cic_free(cic); 3137 cfq_cic_free(cic);
3138 err: 3138 err:
3139 put_io_context(ioc); 3139 put_io_context(ioc);
3140 return NULL; 3140 return NULL;
3141 } 3141 }
3142 3142
3143 static void 3143 static void
3144 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 3144 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
3145 { 3145 {
3146 unsigned long elapsed = jiffies - cic->last_end_request; 3146 unsigned long elapsed = jiffies - cic->last_end_request;
3147 unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 3147 unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
3148 3148
3149 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 3149 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
3150 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 3150 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
3151 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 3151 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
3152 } 3152 }
3153 3153
3154 static void 3154 static void
3155 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, 3155 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3156 struct request *rq) 3156 struct request *rq)
3157 { 3157 {
3158 sector_t sdist = 0; 3158 sector_t sdist = 0;
3159 sector_t n_sec = blk_rq_sectors(rq); 3159 sector_t n_sec = blk_rq_sectors(rq);
3160 if (cfqq->last_request_pos) { 3160 if (cfqq->last_request_pos) {
3161 if (cfqq->last_request_pos < blk_rq_pos(rq)) 3161 if (cfqq->last_request_pos < blk_rq_pos(rq))
3162 sdist = blk_rq_pos(rq) - cfqq->last_request_pos; 3162 sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
3163 else 3163 else
3164 sdist = cfqq->last_request_pos - blk_rq_pos(rq); 3164 sdist = cfqq->last_request_pos - blk_rq_pos(rq);
3165 } 3165 }
3166 3166
3167 cfqq->seek_history <<= 1; 3167 cfqq->seek_history <<= 1;
3168 if (blk_queue_nonrot(cfqd->queue)) 3168 if (blk_queue_nonrot(cfqd->queue))
3169 cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT); 3169 cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT);
3170 else 3170 else
3171 cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); 3171 cfqq->seek_history |= (sdist > CFQQ_SEEK_THR);
3172 } 3172 }
3173 3173
3174 /* 3174 /*
3175 * Disable idle window if the process thinks too long or seeks so much that 3175 * Disable idle window if the process thinks too long or seeks so much that
3176 * it doesn't matter 3176 * it doesn't matter
3177 */ 3177 */
3178 static void 3178 static void
3179 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 3179 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3180 struct cfq_io_context *cic) 3180 struct cfq_io_context *cic)
3181 { 3181 {
3182 int old_idle, enable_idle; 3182 int old_idle, enable_idle;
3183 3183
3184 /* 3184 /*
3185 * Don't idle for async or idle io prio class 3185 * Don't idle for async or idle io prio class
3186 */ 3186 */
3187 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) 3187 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
3188 return; 3188 return;
3189 3189
3190 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); 3190 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
3191 3191
3192 if (cfqq->queued[0] + cfqq->queued[1] >= 4) 3192 if (cfqq->queued[0] + cfqq->queued[1] >= 4)
3193 cfq_mark_cfqq_deep(cfqq); 3193 cfq_mark_cfqq_deep(cfqq);
3194 3194
3195 if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) 3195 if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
3196 enable_idle = 0; 3196 enable_idle = 0;
3197 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 3197 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
3198 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) 3198 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
3199 enable_idle = 0; 3199 enable_idle = 0;
3200 else if (sample_valid(cic->ttime_samples)) { 3200 else if (sample_valid(cic->ttime_samples)) {
3201 if (cic->ttime_mean > cfqd->cfq_slice_idle) 3201 if (cic->ttime_mean > cfqd->cfq_slice_idle)
3202 enable_idle = 0; 3202 enable_idle = 0;
3203 else 3203 else
3204 enable_idle = 1; 3204 enable_idle = 1;
3205 } 3205 }
3206 3206
3207 if (old_idle != enable_idle) { 3207 if (old_idle != enable_idle) {
3208 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle); 3208 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
3209 if (enable_idle) 3209 if (enable_idle)
3210 cfq_mark_cfqq_idle_window(cfqq); 3210 cfq_mark_cfqq_idle_window(cfqq);
3211 else 3211 else
3212 cfq_clear_cfqq_idle_window(cfqq); 3212 cfq_clear_cfqq_idle_window(cfqq);
3213 } 3213 }
3214 } 3214 }
3215 3215
3216 /* 3216 /*
3217 * Check if new_cfqq should preempt the currently active queue. Return 0 for 3217 * Check if new_cfqq should preempt the currently active queue. Return 0 for
3218 * no or if we aren't sure, a 1 will cause a preempt. 3218 * no or if we aren't sure, a 1 will cause a preempt.
3219 */ 3219 */
3220 static bool 3220 static bool
3221 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 3221 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3222 struct request *rq) 3222 struct request *rq)
3223 { 3223 {
3224 struct cfq_queue *cfqq; 3224 struct cfq_queue *cfqq;
3225 3225
3226 cfqq = cfqd->active_queue; 3226 cfqq = cfqd->active_queue;
3227 if (!cfqq) 3227 if (!cfqq)
3228 return false; 3228 return false;
3229 3229
3230 if (cfq_class_idle(new_cfqq)) 3230 if (cfq_class_idle(new_cfqq))
3231 return false; 3231 return false;
3232 3232
3233 if (cfq_class_idle(cfqq)) 3233 if (cfq_class_idle(cfqq))
3234 return true; 3234 return true;
3235 3235
3236 /* 3236 /*
3237 * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice. 3237 * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice.
3238 */ 3238 */
3239 if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq)) 3239 if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq))
3240 return false; 3240 return false;
3241 3241
3242 /* 3242 /*
3243 * if the new request is sync, but the currently running queue is 3243 * if the new request is sync, but the currently running queue is
3244 * not, let the sync request have priority. 3244 * not, let the sync request have priority.
3245 */ 3245 */
3246 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) 3246 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
3247 return true; 3247 return true;
3248 3248
3249 if (new_cfqq->cfqg != cfqq->cfqg) 3249 if (new_cfqq->cfqg != cfqq->cfqg)
3250 return false; 3250 return false;
3251 3251
3252 if (cfq_slice_used(cfqq)) 3252 if (cfq_slice_used(cfqq))
3253 return true; 3253 return true;
3254 3254
3255 /* Allow preemption only if we are idling on sync-noidle tree */ 3255 /* Allow preemption only if we are idling on sync-noidle tree */
3256 if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && 3256 if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
3257 cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD && 3257 cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
3258 new_cfqq->service_tree->count == 2 && 3258 new_cfqq->service_tree->count == 2 &&
3259 RB_EMPTY_ROOT(&cfqq->sort_list)) 3259 RB_EMPTY_ROOT(&cfqq->sort_list))
3260 return true; 3260 return true;
3261 3261
3262 /* 3262 /*
3263 * So both queues are sync. Let the new request get disk time if 3263 * So both queues are sync. Let the new request get disk time if
3264 * it's a metadata request and the current queue is doing regular IO. 3264 * it's a metadata request and the current queue is doing regular IO.
3265 */ 3265 */
3266 if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) 3266 if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
3267 return true; 3267 return true;
3268 3268
3269 /* 3269 /*
3270 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. 3270 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
3271 */ 3271 */
3272 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3272 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
3273 return true; 3273 return true;
3274 3274
3275 /* An idle queue should not be idle now for some reason */ 3275 /* An idle queue should not be idle now for some reason */
3276 if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq)) 3276 if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
3277 return true; 3277 return true;
3278 3278
3279 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) 3279 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
3280 return false; 3280 return false;
3281 3281
3282 /* 3282 /*
3283 * if this request is as-good as one we would expect from the 3283 * if this request is as-good as one we would expect from the
3284 * current cfqq, let it preempt 3284 * current cfqq, let it preempt
3285 */ 3285 */
3286 if (cfq_rq_close(cfqd, cfqq, rq)) 3286 if (cfq_rq_close(cfqd, cfqq, rq))
3287 return true; 3287 return true;
3288 3288
3289 return false; 3289 return false;
3290 } 3290 }
3291 3291
3292 /* 3292 /*
3293 * cfqq preempts the active queue. if we allowed preempt with no slice left, 3293 * cfqq preempts the active queue. if we allowed preempt with no slice left,
3294 * let it have half of its nominal slice. 3294 * let it have half of its nominal slice.
3295 */ 3295 */
3296 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 3296 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3297 { 3297 {
3298 struct cfq_queue *old_cfqq = cfqd->active_queue; 3298 struct cfq_queue *old_cfqq = cfqd->active_queue;
3299 3299
3300 cfq_log_cfqq(cfqd, cfqq, "preempt"); 3300 cfq_log_cfqq(cfqd, cfqq, "preempt");
3301 cfq_slice_expired(cfqd, 1); 3301 cfq_slice_expired(cfqd, 1);
3302 3302
3303 /* 3303 /*
3304 * workload type is changed, don't save slice, otherwise preempt 3304 * workload type is changed, don't save slice, otherwise preempt
3305 * doesn't happen 3305 * doesn't happen
3306 */ 3306 */
3307 if (cfqq_type(old_cfqq) != cfqq_type(cfqq)) 3307 if (cfqq_type(old_cfqq) != cfqq_type(cfqq))
3308 cfqq->cfqg->saved_workload_slice = 0; 3308 cfqq->cfqg->saved_workload_slice = 0;
3309 3309
3310 /* 3310 /*
3311 * Put the new queue at the front of the of the current list, 3311 * Put the new queue at the front of the of the current list,
3312 * so we know that it will be selected next. 3312 * so we know that it will be selected next.
3313 */ 3313 */
3314 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 3314 BUG_ON(!cfq_cfqq_on_rr(cfqq));
3315 3315
3316 cfq_service_tree_add(cfqd, cfqq, 1); 3316 cfq_service_tree_add(cfqd, cfqq, 1);
3317 3317
3318 cfqq->slice_end = 0; 3318 cfqq->slice_end = 0;
3319 cfq_mark_cfqq_slice_new(cfqq); 3319 cfq_mark_cfqq_slice_new(cfqq);
3320 } 3320 }
3321 3321
3322 /* 3322 /*
3323 * Called when a new fs request (rq) is added (to cfqq). Check if there's 3323 * Called when a new fs request (rq) is added (to cfqq). Check if there's
3324 * something we should do about it 3324 * something we should do about it
3325 */ 3325 */
3326 static void 3326 static void
3327 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 3327 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3328 struct request *rq) 3328 struct request *rq)
3329 { 3329 {
3330 struct cfq_io_context *cic = RQ_CIC(rq); 3330 struct cfq_io_context *cic = RQ_CIC(rq);
3331 3331
3332 cfqd->rq_queued++; 3332 cfqd->rq_queued++;
3333 if (rq->cmd_flags & REQ_META) 3333 if (rq->cmd_flags & REQ_META)
3334 cfqq->meta_pending++; 3334 cfqq->meta_pending++;
3335 3335
3336 cfq_update_io_thinktime(cfqd, cic); 3336 cfq_update_io_thinktime(cfqd, cic);
3337 cfq_update_io_seektime(cfqd, cfqq, rq); 3337 cfq_update_io_seektime(cfqd, cfqq, rq);
3338 cfq_update_idle_window(cfqd, cfqq, cic); 3338 cfq_update_idle_window(cfqd, cfqq, cic);
3339 3339
3340 cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); 3340 cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
3341 3341
3342 if (cfqq == cfqd->active_queue) { 3342 if (cfqq == cfqd->active_queue) {
3343 /* 3343 /*
3344 * Remember that we saw a request from this process, but 3344 * Remember that we saw a request from this process, but
3345 * don't start queuing just yet. Otherwise we risk seeing lots 3345 * don't start queuing just yet. Otherwise we risk seeing lots
3346 * of tiny requests, because we disrupt the normal plugging 3346 * of tiny requests, because we disrupt the normal plugging
3347 * and merging. If the request is already larger than a single 3347 * and merging. If the request is already larger than a single
3348 * page, let it rip immediately. For that case we assume that 3348 * page, let it rip immediately. For that case we assume that
3349 * merging is already done. Ditto for a busy system that 3349 * merging is already done. Ditto for a busy system that
3350 * has other work pending, don't risk delaying until the 3350 * has other work pending, don't risk delaying until the
3351 * idle timer unplug to continue working. 3351 * idle timer unplug to continue working.
3352 */ 3352 */
3353 if (cfq_cfqq_wait_request(cfqq)) { 3353 if (cfq_cfqq_wait_request(cfqq)) {
3354 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || 3354 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
3355 cfqd->busy_queues > 1) { 3355 cfqd->busy_queues > 1) {
3356 cfq_del_timer(cfqd, cfqq); 3356 cfq_del_timer(cfqd, cfqq);
3357 cfq_clear_cfqq_wait_request(cfqq); 3357 cfq_clear_cfqq_wait_request(cfqq);
3358 __blk_run_queue(cfqd->queue); 3358 __blk_run_queue(cfqd->queue);
3359 } else { 3359 } else {
3360 cfq_blkiocg_update_idle_time_stats( 3360 cfq_blkiocg_update_idle_time_stats(
3361 &cfqq->cfqg->blkg); 3361 &cfqq->cfqg->blkg);
3362 cfq_mark_cfqq_must_dispatch(cfqq); 3362 cfq_mark_cfqq_must_dispatch(cfqq);
3363 } 3363 }
3364 } 3364 }
3365 } else if (cfq_should_preempt(cfqd, cfqq, rq)) { 3365 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
3366 /* 3366 /*
3367 * not the active queue - expire current slice if it is 3367 * not the active queue - expire current slice if it is
3368 * idle and has expired it's mean thinktime or this new queue 3368 * idle and has expired it's mean thinktime or this new queue
3369 * has some old slice time left and is of higher priority or 3369 * has some old slice time left and is of higher priority or
3370 * this new queue is RT and the current one is BE 3370 * this new queue is RT and the current one is BE
3371 */ 3371 */
3372 cfq_preempt_queue(cfqd, cfqq); 3372 cfq_preempt_queue(cfqd, cfqq);
3373 __blk_run_queue(cfqd->queue); 3373 __blk_run_queue(cfqd->queue);
3374 } 3374 }
3375 } 3375 }
3376 3376
3377 static void cfq_insert_request(struct request_queue *q, struct request *rq) 3377 static void cfq_insert_request(struct request_queue *q, struct request *rq)
3378 { 3378 {
3379 struct cfq_data *cfqd = q->elevator->elevator_data; 3379 struct cfq_data *cfqd = q->elevator->elevator_data;
3380 struct cfq_queue *cfqq = RQ_CFQQ(rq); 3380 struct cfq_queue *cfqq = RQ_CFQQ(rq);
3381 3381
3382 cfq_log_cfqq(cfqd, cfqq, "insert_request"); 3382 cfq_log_cfqq(cfqd, cfqq, "insert_request");
3383 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); 3383 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
3384 3384
3385 rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); 3385 rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
3386 list_add_tail(&rq->queuelist, &cfqq->fifo); 3386 list_add_tail(&rq->queuelist, &cfqq->fifo);
3387 cfq_add_rq_rb(rq); 3387 cfq_add_rq_rb(rq);
3388 cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, 3388 cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
3389 &cfqd->serving_group->blkg, rq_data_dir(rq), 3389 &cfqd->serving_group->blkg, rq_data_dir(rq),
3390 rq_is_sync(rq)); 3390 rq_is_sync(rq));
3391 cfq_rq_enqueued(cfqd, cfqq, rq); 3391 cfq_rq_enqueued(cfqd, cfqq, rq);
3392 } 3392 }
3393 3393
3394 /* 3394 /*
3395 * Update hw_tag based on peak queue depth over 50 samples under 3395 * Update hw_tag based on peak queue depth over 50 samples under
3396 * sufficient load. 3396 * sufficient load.
3397 */ 3397 */
3398 static void cfq_update_hw_tag(struct cfq_data *cfqd) 3398 static void cfq_update_hw_tag(struct cfq_data *cfqd)
3399 { 3399 {
3400 struct cfq_queue *cfqq = cfqd->active_queue; 3400 struct cfq_queue *cfqq = cfqd->active_queue;
3401 3401
3402 if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth) 3402 if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth)
3403 cfqd->hw_tag_est_depth = cfqd->rq_in_driver; 3403 cfqd->hw_tag_est_depth = cfqd->rq_in_driver;
3404 3404
3405 if (cfqd->hw_tag == 1) 3405 if (cfqd->hw_tag == 1)
3406 return; 3406 return;
3407 3407
3408 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && 3408 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
3409 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) 3409 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
3410 return; 3410 return;
3411 3411
3412 /* 3412 /*
3413 * If active queue hasn't enough requests and can idle, cfq might not 3413 * If active queue hasn't enough requests and can idle, cfq might not
3414 * dispatch sufficient requests to hardware. Don't zero hw_tag in this 3414 * dispatch sufficient requests to hardware. Don't zero hw_tag in this
3415 * case 3415 * case
3416 */ 3416 */
3417 if (cfqq && cfq_cfqq_idle_window(cfqq) && 3417 if (cfqq && cfq_cfqq_idle_window(cfqq) &&
3418 cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] < 3418 cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
3419 CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN) 3419 CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN)
3420 return; 3420 return;
3421 3421
3422 if (cfqd->hw_tag_samples++ < 50) 3422 if (cfqd->hw_tag_samples++ < 50)
3423 return; 3423 return;
3424 3424
3425 if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN) 3425 if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN)
3426 cfqd->hw_tag = 1; 3426 cfqd->hw_tag = 1;
3427 else 3427 else
3428 cfqd->hw_tag = 0; 3428 cfqd->hw_tag = 0;
3429 } 3429 }
3430 3430
3431 static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) 3431 static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3432 { 3432 {
3433 struct cfq_io_context *cic = cfqd->active_cic; 3433 struct cfq_io_context *cic = cfqd->active_cic;
3434 3434
3435 /* If the queue already has requests, don't wait */
3436 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
3437 return false;
3438
3435 /* If there are other queues in the group, don't wait */ 3439 /* If there are other queues in the group, don't wait */
3436 if (cfqq->cfqg->nr_cfqq > 1) 3440 if (cfqq->cfqg->nr_cfqq > 1)
3437 return false; 3441 return false;
3438 3442
3439 if (cfq_slice_used(cfqq)) 3443 if (cfq_slice_used(cfqq))
3440 return true; 3444 return true;
3441 3445
3442 /* if slice left is less than think time, wait busy */ 3446 /* if slice left is less than think time, wait busy */
3443 if (cic && sample_valid(cic->ttime_samples) 3447 if (cic && sample_valid(cic->ttime_samples)
3444 && (cfqq->slice_end - jiffies < cic->ttime_mean)) 3448 && (cfqq->slice_end - jiffies < cic->ttime_mean))
3445 return true; 3449 return true;
3446 3450
3447 /* 3451 /*
3448 * If think times is less than a jiffy than ttime_mean=0 and above 3452 * If think times is less than a jiffy than ttime_mean=0 and above
3449 * will not be true. It might happen that slice has not expired yet 3453 * will not be true. It might happen that slice has not expired yet
3450 * but will expire soon (4-5 ns) during select_queue(). To cover the 3454 * but will expire soon (4-5 ns) during select_queue(). To cover the
3451 * case where think time is less than a jiffy, mark the queue wait 3455 * case where think time is less than a jiffy, mark the queue wait
3452 * busy if only 1 jiffy is left in the slice. 3456 * busy if only 1 jiffy is left in the slice.
3453 */ 3457 */
3454 if (cfqq->slice_end - jiffies == 1) 3458 if (cfqq->slice_end - jiffies == 1)
3455 return true; 3459 return true;
3456 3460
3457 return false; 3461 return false;
3458 } 3462 }
3459 3463
3460 static void cfq_completed_request(struct request_queue *q, struct request *rq) 3464 static void cfq_completed_request(struct request_queue *q, struct request *rq)
3461 { 3465 {
3462 struct cfq_queue *cfqq = RQ_CFQQ(rq); 3466 struct cfq_queue *cfqq = RQ_CFQQ(rq);
3463 struct cfq_data *cfqd = cfqq->cfqd; 3467 struct cfq_data *cfqd = cfqq->cfqd;
3464 const int sync = rq_is_sync(rq); 3468 const int sync = rq_is_sync(rq);
3465 unsigned long now; 3469 unsigned long now;
3466 3470
3467 now = jiffies; 3471 now = jiffies;
3468 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", 3472 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
3469 !!(rq->cmd_flags & REQ_NOIDLE)); 3473 !!(rq->cmd_flags & REQ_NOIDLE));
3470 3474
3471 cfq_update_hw_tag(cfqd); 3475 cfq_update_hw_tag(cfqd);
3472 3476
3473 WARN_ON(!cfqd->rq_in_driver); 3477 WARN_ON(!cfqd->rq_in_driver);
3474 WARN_ON(!cfqq->dispatched); 3478 WARN_ON(!cfqq->dispatched);
3475 cfqd->rq_in_driver--; 3479 cfqd->rq_in_driver--;
3476 cfqq->dispatched--; 3480 cfqq->dispatched--;
3477 (RQ_CFQG(rq))->dispatched--; 3481 (RQ_CFQG(rq))->dispatched--;
3478 cfq_blkiocg_update_completion_stats(&cfqq->cfqg->blkg, 3482 cfq_blkiocg_update_completion_stats(&cfqq->cfqg->blkg,
3479 rq_start_time_ns(rq), rq_io_start_time_ns(rq), 3483 rq_start_time_ns(rq), rq_io_start_time_ns(rq),
3480 rq_data_dir(rq), rq_is_sync(rq)); 3484 rq_data_dir(rq), rq_is_sync(rq));
3481 3485
3482 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; 3486 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
3483 3487
3484 if (sync) { 3488 if (sync) {
3485 RQ_CIC(rq)->last_end_request = now; 3489 RQ_CIC(rq)->last_end_request = now;
3486 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) 3490 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
3487 cfqd->last_delayed_sync = now; 3491 cfqd->last_delayed_sync = now;
3488 } 3492 }
3489 3493
3490 /* 3494 /*
3491 * If this is the active queue, check if it needs to be expired, 3495 * If this is the active queue, check if it needs to be expired,
3492 * or if we want to idle in case it has no pending requests. 3496 * or if we want to idle in case it has no pending requests.
3493 */ 3497 */
3494 if (cfqd->active_queue == cfqq) { 3498 if (cfqd->active_queue == cfqq) {
3495 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list); 3499 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
3496 3500
3497 if (cfq_cfqq_slice_new(cfqq)) { 3501 if (cfq_cfqq_slice_new(cfqq)) {
3498 cfq_set_prio_slice(cfqd, cfqq); 3502 cfq_set_prio_slice(cfqd, cfqq);
3499 cfq_clear_cfqq_slice_new(cfqq); 3503 cfq_clear_cfqq_slice_new(cfqq);
3500 } 3504 }
3501 3505
3502 /* 3506 /*
3503 * Should we wait for next request to come in before we expire 3507 * Should we wait for next request to come in before we expire
3504 * the queue. 3508 * the queue.
3505 */ 3509 */
3506 if (cfq_should_wait_busy(cfqd, cfqq)) { 3510 if (cfq_should_wait_busy(cfqd, cfqq)) {
3507 unsigned long extend_sl = cfqd->cfq_slice_idle; 3511 unsigned long extend_sl = cfqd->cfq_slice_idle;
3508 if (!cfqd->cfq_slice_idle) 3512 if (!cfqd->cfq_slice_idle)
3509 extend_sl = cfqd->cfq_group_idle; 3513 extend_sl = cfqd->cfq_group_idle;
3510 cfqq->slice_end = jiffies + extend_sl; 3514 cfqq->slice_end = jiffies + extend_sl;
3511 cfq_mark_cfqq_wait_busy(cfqq); 3515 cfq_mark_cfqq_wait_busy(cfqq);
3512 cfq_log_cfqq(cfqd, cfqq, "will busy wait"); 3516 cfq_log_cfqq(cfqd, cfqq, "will busy wait");
3513 } 3517 }
3514 3518
3515 /* 3519 /*
3516 * Idling is not enabled on: 3520 * Idling is not enabled on:
3517 * - expired queues 3521 * - expired queues
3518 * - idle-priority queues 3522 * - idle-priority queues
3519 * - async queues 3523 * - async queues
3520 * - queues with still some requests queued 3524 * - queues with still some requests queued
3521 * - when there is a close cooperator 3525 * - when there is a close cooperator
3522 */ 3526 */
3523 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) 3527 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
3524 cfq_slice_expired(cfqd, 1); 3528 cfq_slice_expired(cfqd, 1);
3525 else if (sync && cfqq_empty && 3529 else if (sync && cfqq_empty &&
3526 !cfq_close_cooperator(cfqd, cfqq)) { 3530 !cfq_close_cooperator(cfqd, cfqq)) {
3527 cfq_arm_slice_timer(cfqd); 3531 cfq_arm_slice_timer(cfqd);
3528 } 3532 }
3529 } 3533 }
3530 3534
3531 if (!cfqd->rq_in_driver) 3535 if (!cfqd->rq_in_driver)
3532 cfq_schedule_dispatch(cfqd); 3536 cfq_schedule_dispatch(cfqd);
3533 } 3537 }
3534 3538
3535 /* 3539 /*
3536 * we temporarily boost lower priority queues if they are holding fs exclusive 3540 * we temporarily boost lower priority queues if they are holding fs exclusive
3537 * resources. they are boosted to normal prio (CLASS_BE/4) 3541 * resources. they are boosted to normal prio (CLASS_BE/4)
3538 */ 3542 */
3539 static void cfq_prio_boost(struct cfq_queue *cfqq) 3543 static void cfq_prio_boost(struct cfq_queue *cfqq)
3540 { 3544 {
3541 if (has_fs_excl()) { 3545 if (has_fs_excl()) {
3542 /* 3546 /*
3543 * boost idle prio on transactions that would lock out other 3547 * boost idle prio on transactions that would lock out other
3544 * users of the filesystem 3548 * users of the filesystem
3545 */ 3549 */
3546 if (cfq_class_idle(cfqq)) 3550 if (cfq_class_idle(cfqq))
3547 cfqq->ioprio_class = IOPRIO_CLASS_BE; 3551 cfqq->ioprio_class = IOPRIO_CLASS_BE;
3548 if (cfqq->ioprio > IOPRIO_NORM) 3552 if (cfqq->ioprio > IOPRIO_NORM)
3549 cfqq->ioprio = IOPRIO_NORM; 3553 cfqq->ioprio = IOPRIO_NORM;
3550 } else { 3554 } else {
3551 /* 3555 /*
3552 * unboost the queue (if needed) 3556 * unboost the queue (if needed)
3553 */ 3557 */
3554 cfqq->ioprio_class = cfqq->org_ioprio_class; 3558 cfqq->ioprio_class = cfqq->org_ioprio_class;
3555 cfqq->ioprio = cfqq->org_ioprio; 3559 cfqq->ioprio = cfqq->org_ioprio;
3556 } 3560 }
3557 } 3561 }
3558 3562
3559 static inline int __cfq_may_queue(struct cfq_queue *cfqq) 3563 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
3560 { 3564 {
3561 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { 3565 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
3562 cfq_mark_cfqq_must_alloc_slice(cfqq); 3566 cfq_mark_cfqq_must_alloc_slice(cfqq);
3563 return ELV_MQUEUE_MUST; 3567 return ELV_MQUEUE_MUST;
3564 } 3568 }
3565 3569
3566 return ELV_MQUEUE_MAY; 3570 return ELV_MQUEUE_MAY;
3567 } 3571 }
3568 3572
3569 static int cfq_may_queue(struct request_queue *q, int rw) 3573 static int cfq_may_queue(struct request_queue *q, int rw)
3570 { 3574 {
3571 struct cfq_data *cfqd = q->elevator->elevator_data; 3575 struct cfq_data *cfqd = q->elevator->elevator_data;
3572 struct task_struct *tsk = current; 3576 struct task_struct *tsk = current;
3573 struct cfq_io_context *cic; 3577 struct cfq_io_context *cic;
3574 struct cfq_queue *cfqq; 3578 struct cfq_queue *cfqq;
3575 3579
3576 /* 3580 /*
3577 * don't force setup of a queue from here, as a call to may_queue 3581 * don't force setup of a queue from here, as a call to may_queue
3578 * does not necessarily imply that a request actually will be queued. 3582 * does not necessarily imply that a request actually will be queued.
3579 * so just lookup a possibly existing queue, or return 'may queue' 3583 * so just lookup a possibly existing queue, or return 'may queue'
3580 * if that fails 3584 * if that fails
3581 */ 3585 */
3582 cic = cfq_cic_lookup(cfqd, tsk->io_context); 3586 cic = cfq_cic_lookup(cfqd, tsk->io_context);
3583 if (!cic) 3587 if (!cic)
3584 return ELV_MQUEUE_MAY; 3588 return ELV_MQUEUE_MAY;
3585 3589
3586 cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); 3590 cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
3587 if (cfqq) { 3591 if (cfqq) {
3588 cfq_init_prio_data(cfqq, cic->ioc); 3592 cfq_init_prio_data(cfqq, cic->ioc);
3589 cfq_prio_boost(cfqq); 3593 cfq_prio_boost(cfqq);
3590 3594
3591 return __cfq_may_queue(cfqq); 3595 return __cfq_may_queue(cfqq);
3592 } 3596 }
3593 3597
3594 return ELV_MQUEUE_MAY; 3598 return ELV_MQUEUE_MAY;
3595 } 3599 }
3596 3600
3597 /* 3601 /*
3598 * queue lock held here 3602 * queue lock held here
3599 */ 3603 */
3600 static void cfq_put_request(struct request *rq) 3604 static void cfq_put_request(struct request *rq)
3601 { 3605 {
3602 struct cfq_queue *cfqq = RQ_CFQQ(rq); 3606 struct cfq_queue *cfqq = RQ_CFQQ(rq);
3603 3607
3604 if (cfqq) { 3608 if (cfqq) {
3605 const int rw = rq_data_dir(rq); 3609 const int rw = rq_data_dir(rq);
3606 3610
3607 BUG_ON(!cfqq->allocated[rw]); 3611 BUG_ON(!cfqq->allocated[rw]);
3608 cfqq->allocated[rw]--; 3612 cfqq->allocated[rw]--;
3609 3613
3610 put_io_context(RQ_CIC(rq)->ioc); 3614 put_io_context(RQ_CIC(rq)->ioc);
3611 3615
3612 rq->elevator_private = NULL; 3616 rq->elevator_private = NULL;
3613 rq->elevator_private2 = NULL; 3617 rq->elevator_private2 = NULL;
3614 3618
3615 /* Put down rq reference on cfqg */ 3619 /* Put down rq reference on cfqg */
3616 cfq_put_cfqg(RQ_CFQG(rq)); 3620 cfq_put_cfqg(RQ_CFQG(rq));
3617 rq->elevator_private3 = NULL; 3621 rq->elevator_private3 = NULL;
3618 3622
3619 cfq_put_queue(cfqq); 3623 cfq_put_queue(cfqq);
3620 } 3624 }
3621 } 3625 }
3622 3626
3623 static struct cfq_queue * 3627 static struct cfq_queue *
3624 cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic, 3628 cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
3625 struct cfq_queue *cfqq) 3629 struct cfq_queue *cfqq)
3626 { 3630 {
3627 cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq); 3631 cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
3628 cic_set_cfqq(cic, cfqq->new_cfqq, 1); 3632 cic_set_cfqq(cic, cfqq->new_cfqq, 1);
3629 cfq_mark_cfqq_coop(cfqq->new_cfqq); 3633 cfq_mark_cfqq_coop(cfqq->new_cfqq);
3630 cfq_put_queue(cfqq); 3634 cfq_put_queue(cfqq);
3631 return cic_to_cfqq(cic, 1); 3635 return cic_to_cfqq(cic, 1);
3632 } 3636 }
3633 3637
3634 /* 3638 /*
3635 * Returns NULL if a new cfqq should be allocated, or the old cfqq if this 3639 * Returns NULL if a new cfqq should be allocated, or the old cfqq if this
3636 * was the last process referring to said cfqq. 3640 * was the last process referring to said cfqq.
3637 */ 3641 */
3638 static struct cfq_queue * 3642 static struct cfq_queue *
3639 split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq) 3643 split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
3640 { 3644 {
3641 if (cfqq_process_refs(cfqq) == 1) { 3645 if (cfqq_process_refs(cfqq) == 1) {
3642 cfqq->pid = current->pid; 3646 cfqq->pid = current->pid;
3643 cfq_clear_cfqq_coop(cfqq); 3647 cfq_clear_cfqq_coop(cfqq);
3644 cfq_clear_cfqq_split_coop(cfqq); 3648 cfq_clear_cfqq_split_coop(cfqq);
3645 return cfqq; 3649 return cfqq;
3646 } 3650 }
3647 3651
3648 cic_set_cfqq(cic, NULL, 1); 3652 cic_set_cfqq(cic, NULL, 1);
3649 3653
3650 cfq_put_cooperator(cfqq); 3654 cfq_put_cooperator(cfqq);
3651 3655
3652 cfq_put_queue(cfqq); 3656 cfq_put_queue(cfqq);
3653 return NULL; 3657 return NULL;
3654 } 3658 }
3655 /* 3659 /*
3656 * Allocate cfq data structures associated with this request. 3660 * Allocate cfq data structures associated with this request.
3657 */ 3661 */
3658 static int 3662 static int
3659 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 3663 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
3660 { 3664 {
3661 struct cfq_data *cfqd = q->elevator->elevator_data; 3665 struct cfq_data *cfqd = q->elevator->elevator_data;
3662 struct cfq_io_context *cic; 3666 struct cfq_io_context *cic;
3663 const int rw = rq_data_dir(rq); 3667 const int rw = rq_data_dir(rq);
3664 const bool is_sync = rq_is_sync(rq); 3668 const bool is_sync = rq_is_sync(rq);
3665 struct cfq_queue *cfqq; 3669 struct cfq_queue *cfqq;
3666 unsigned long flags; 3670 unsigned long flags;
3667 3671
3668 might_sleep_if(gfp_mask & __GFP_WAIT); 3672 might_sleep_if(gfp_mask & __GFP_WAIT);
3669 3673
3670 cic = cfq_get_io_context(cfqd, gfp_mask); 3674 cic = cfq_get_io_context(cfqd, gfp_mask);
3671 3675
3672 spin_lock_irqsave(q->queue_lock, flags); 3676 spin_lock_irqsave(q->queue_lock, flags);
3673 3677
3674 if (!cic) 3678 if (!cic)
3675 goto queue_fail; 3679 goto queue_fail;
3676 3680
3677 new_queue: 3681 new_queue:
3678 cfqq = cic_to_cfqq(cic, is_sync); 3682 cfqq = cic_to_cfqq(cic, is_sync);
3679 if (!cfqq || cfqq == &cfqd->oom_cfqq) { 3683 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
3680 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); 3684 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
3681 cic_set_cfqq(cic, cfqq, is_sync); 3685 cic_set_cfqq(cic, cfqq, is_sync);
3682 } else { 3686 } else {
3683 /* 3687 /*
3684 * If the queue was seeky for too long, break it apart. 3688 * If the queue was seeky for too long, break it apart.
3685 */ 3689 */
3686 if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) { 3690 if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) {
3687 cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq"); 3691 cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
3688 cfqq = split_cfqq(cic, cfqq); 3692 cfqq = split_cfqq(cic, cfqq);
3689 if (!cfqq) 3693 if (!cfqq)
3690 goto new_queue; 3694 goto new_queue;
3691 } 3695 }
3692 3696
3693 /* 3697 /*
3694 * Check to see if this queue is scheduled to merge with 3698 * Check to see if this queue is scheduled to merge with
3695 * another, closely cooperating queue. The merging of 3699 * another, closely cooperating queue. The merging of
3696 * queues happens here as it must be done in process context. 3700 * queues happens here as it must be done in process context.
3697 * The reference on new_cfqq was taken in merge_cfqqs. 3701 * The reference on new_cfqq was taken in merge_cfqqs.
3698 */ 3702 */
3699 if (cfqq->new_cfqq) 3703 if (cfqq->new_cfqq)
3700 cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq); 3704 cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq);
3701 } 3705 }
3702 3706
3703 cfqq->allocated[rw]++; 3707 cfqq->allocated[rw]++;
3704 cfqq->ref++; 3708 cfqq->ref++;
3705 rq->elevator_private = cic; 3709 rq->elevator_private = cic;
3706 rq->elevator_private2 = cfqq; 3710 rq->elevator_private2 = cfqq;
3707 rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); 3711 rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
3708 3712
3709 spin_unlock_irqrestore(q->queue_lock, flags); 3713 spin_unlock_irqrestore(q->queue_lock, flags);
3710 3714
3711 return 0; 3715 return 0;
3712 3716
3713 queue_fail: 3717 queue_fail:
3714 if (cic) 3718 if (cic)
3715 put_io_context(cic->ioc); 3719 put_io_context(cic->ioc);
3716 3720
3717 cfq_schedule_dispatch(cfqd); 3721 cfq_schedule_dispatch(cfqd);
3718 spin_unlock_irqrestore(q->queue_lock, flags); 3722 spin_unlock_irqrestore(q->queue_lock, flags);
3719 cfq_log(cfqd, "set_request fail"); 3723 cfq_log(cfqd, "set_request fail");
3720 return 1; 3724 return 1;
3721 } 3725 }
3722 3726
3723 static void cfq_kick_queue(struct work_struct *work) 3727 static void cfq_kick_queue(struct work_struct *work)
3724 { 3728 {
3725 struct cfq_data *cfqd = 3729 struct cfq_data *cfqd =
3726 container_of(work, struct cfq_data, unplug_work); 3730 container_of(work, struct cfq_data, unplug_work);
3727 struct request_queue *q = cfqd->queue; 3731 struct request_queue *q = cfqd->queue;
3728 3732
3729 spin_lock_irq(q->queue_lock); 3733 spin_lock_irq(q->queue_lock);
3730 __blk_run_queue(cfqd->queue); 3734 __blk_run_queue(cfqd->queue);
3731 spin_unlock_irq(q->queue_lock); 3735 spin_unlock_irq(q->queue_lock);
3732 } 3736 }
3733 3737
3734 /* 3738 /*
3735 * Timer running if the active_queue is currently idling inside its time slice 3739 * Timer running if the active_queue is currently idling inside its time slice
3736 */ 3740 */
3737 static void cfq_idle_slice_timer(unsigned long data) 3741 static void cfq_idle_slice_timer(unsigned long data)
3738 { 3742 {
3739 struct cfq_data *cfqd = (struct cfq_data *) data; 3743 struct cfq_data *cfqd = (struct cfq_data *) data;
3740 struct cfq_queue *cfqq; 3744 struct cfq_queue *cfqq;
3741 unsigned long flags; 3745 unsigned long flags;
3742 int timed_out = 1; 3746 int timed_out = 1;
3743 3747
3744 cfq_log(cfqd, "idle timer fired"); 3748 cfq_log(cfqd, "idle timer fired");
3745 3749
3746 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 3750 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
3747 3751
3748 cfqq = cfqd->active_queue; 3752 cfqq = cfqd->active_queue;
3749 if (cfqq) { 3753 if (cfqq) {
3750 timed_out = 0; 3754 timed_out = 0;
3751 3755
3752 /* 3756 /*
3753 * We saw a request before the queue expired, let it through 3757 * We saw a request before the queue expired, let it through
3754 */ 3758 */
3755 if (cfq_cfqq_must_dispatch(cfqq)) 3759 if (cfq_cfqq_must_dispatch(cfqq))
3756 goto out_kick; 3760 goto out_kick;
3757 3761
3758 /* 3762 /*
3759 * expired 3763 * expired
3760 */ 3764 */
3761 if (cfq_slice_used(cfqq)) 3765 if (cfq_slice_used(cfqq))
3762 goto expire; 3766 goto expire;
3763 3767
3764 /* 3768 /*
3765 * only expire and reinvoke request handler, if there are 3769 * only expire and reinvoke request handler, if there are
3766 * other queues with pending requests 3770 * other queues with pending requests
3767 */ 3771 */
3768 if (!cfqd->busy_queues) 3772 if (!cfqd->busy_queues)
3769 goto out_cont; 3773 goto out_cont;
3770 3774
3771 /* 3775 /*
3772 * not expired and it has a request pending, let it dispatch 3776 * not expired and it has a request pending, let it dispatch
3773 */ 3777 */
3774 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 3778 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
3775 goto out_kick; 3779 goto out_kick;
3776 3780
3777 /* 3781 /*
3778 * Queue depth flag is reset only when the idle didn't succeed 3782 * Queue depth flag is reset only when the idle didn't succeed
3779 */ 3783 */
3780 cfq_clear_cfqq_deep(cfqq); 3784 cfq_clear_cfqq_deep(cfqq);
3781 } 3785 }
3782 expire: 3786 expire:
3783 cfq_slice_expired(cfqd, timed_out); 3787 cfq_slice_expired(cfqd, timed_out);
3784 out_kick: 3788 out_kick:
3785 cfq_schedule_dispatch(cfqd); 3789 cfq_schedule_dispatch(cfqd);
3786 out_cont: 3790 out_cont:
3787 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 3791 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
3788 } 3792 }
3789 3793
3790 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 3794 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
3791 { 3795 {
3792 del_timer_sync(&cfqd->idle_slice_timer); 3796 del_timer_sync(&cfqd->idle_slice_timer);
3793 cancel_work_sync(&cfqd->unplug_work); 3797 cancel_work_sync(&cfqd->unplug_work);
3794 } 3798 }
3795 3799
3796 static void cfq_put_async_queues(struct cfq_data *cfqd) 3800 static void cfq_put_async_queues(struct cfq_data *cfqd)
3797 { 3801 {
3798 int i; 3802 int i;
3799 3803
3800 for (i = 0; i < IOPRIO_BE_NR; i++) { 3804 for (i = 0; i < IOPRIO_BE_NR; i++) {
3801 if (cfqd->async_cfqq[0][i]) 3805 if (cfqd->async_cfqq[0][i])
3802 cfq_put_queue(cfqd->async_cfqq[0][i]); 3806 cfq_put_queue(cfqd->async_cfqq[0][i]);
3803 if (cfqd->async_cfqq[1][i]) 3807 if (cfqd->async_cfqq[1][i])
3804 cfq_put_queue(cfqd->async_cfqq[1][i]); 3808 cfq_put_queue(cfqd->async_cfqq[1][i]);
3805 } 3809 }
3806 3810
3807 if (cfqd->async_idle_cfqq) 3811 if (cfqd->async_idle_cfqq)
3808 cfq_put_queue(cfqd->async_idle_cfqq); 3812 cfq_put_queue(cfqd->async_idle_cfqq);
3809 } 3813 }
3810 3814
3811 static void cfq_cfqd_free(struct rcu_head *head) 3815 static void cfq_cfqd_free(struct rcu_head *head)
3812 { 3816 {
3813 kfree(container_of(head, struct cfq_data, rcu)); 3817 kfree(container_of(head, struct cfq_data, rcu));
3814 } 3818 }
3815 3819
3816 static void cfq_exit_queue(struct elevator_queue *e) 3820 static void cfq_exit_queue(struct elevator_queue *e)
3817 { 3821 {
3818 struct cfq_data *cfqd = e->elevator_data; 3822 struct cfq_data *cfqd = e->elevator_data;
3819 struct request_queue *q = cfqd->queue; 3823 struct request_queue *q = cfqd->queue;
3820 3824
3821 cfq_shutdown_timer_wq(cfqd); 3825 cfq_shutdown_timer_wq(cfqd);
3822 3826
3823 spin_lock_irq(q->queue_lock); 3827 spin_lock_irq(q->queue_lock);
3824 3828
3825 if (cfqd->active_queue) 3829 if (cfqd->active_queue)
3826 __cfq_slice_expired(cfqd, cfqd->active_queue, 0); 3830 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
3827 3831
3828 while (!list_empty(&cfqd->cic_list)) { 3832 while (!list_empty(&cfqd->cic_list)) {
3829 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 3833 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
3830 struct cfq_io_context, 3834 struct cfq_io_context,
3831 queue_list); 3835 queue_list);
3832 3836
3833 __cfq_exit_single_io_context(cfqd, cic); 3837 __cfq_exit_single_io_context(cfqd, cic);
3834 } 3838 }
3835 3839
3836 cfq_put_async_queues(cfqd); 3840 cfq_put_async_queues(cfqd);
3837 cfq_release_cfq_groups(cfqd); 3841 cfq_release_cfq_groups(cfqd);
3838 cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg); 3842 cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg);
3839 3843
3840 spin_unlock_irq(q->queue_lock); 3844 spin_unlock_irq(q->queue_lock);
3841 3845
3842 cfq_shutdown_timer_wq(cfqd); 3846 cfq_shutdown_timer_wq(cfqd);
3843 3847
3844 spin_lock(&cic_index_lock); 3848 spin_lock(&cic_index_lock);
3845 ida_remove(&cic_index_ida, cfqd->cic_index); 3849 ida_remove(&cic_index_ida, cfqd->cic_index);
3846 spin_unlock(&cic_index_lock); 3850 spin_unlock(&cic_index_lock);
3847 3851
3848 /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ 3852 /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
3849 call_rcu(&cfqd->rcu, cfq_cfqd_free); 3853 call_rcu(&cfqd->rcu, cfq_cfqd_free);
3850 } 3854 }
3851 3855
3852 static int cfq_alloc_cic_index(void) 3856 static int cfq_alloc_cic_index(void)
3853 { 3857 {
3854 int index, error; 3858 int index, error;
3855 3859
3856 do { 3860 do {
3857 if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) 3861 if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
3858 return -ENOMEM; 3862 return -ENOMEM;
3859 3863
3860 spin_lock(&cic_index_lock); 3864 spin_lock(&cic_index_lock);
3861 error = ida_get_new(&cic_index_ida, &index); 3865 error = ida_get_new(&cic_index_ida, &index);
3862 spin_unlock(&cic_index_lock); 3866 spin_unlock(&cic_index_lock);
3863 if (error && error != -EAGAIN) 3867 if (error && error != -EAGAIN)
3864 return error; 3868 return error;
3865 } while (error); 3869 } while (error);
3866 3870
3867 return index; 3871 return index;
3868 } 3872 }
3869 3873
3870 static void *cfq_init_queue(struct request_queue *q) 3874 static void *cfq_init_queue(struct request_queue *q)
3871 { 3875 {
3872 struct cfq_data *cfqd; 3876 struct cfq_data *cfqd;
3873 int i, j; 3877 int i, j;
3874 struct cfq_group *cfqg; 3878 struct cfq_group *cfqg;
3875 struct cfq_rb_root *st; 3879 struct cfq_rb_root *st;
3876 3880
3877 i = cfq_alloc_cic_index(); 3881 i = cfq_alloc_cic_index();
3878 if (i < 0) 3882 if (i < 0)
3879 return NULL; 3883 return NULL;
3880 3884
3881 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 3885 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
3882 if (!cfqd) 3886 if (!cfqd)
3883 return NULL; 3887 return NULL;
3884 3888
3885 /* 3889 /*
3886 * Don't need take queue_lock in the routine, since we are 3890 * Don't need take queue_lock in the routine, since we are
3887 * initializing the ioscheduler, and nobody is using cfqd 3891 * initializing the ioscheduler, and nobody is using cfqd
3888 */ 3892 */
3889 cfqd->cic_index = i; 3893 cfqd->cic_index = i;
3890 3894
3891 /* Init root service tree */ 3895 /* Init root service tree */
3892 cfqd->grp_service_tree = CFQ_RB_ROOT; 3896 cfqd->grp_service_tree = CFQ_RB_ROOT;
3893 3897
3894 /* Init root group */ 3898 /* Init root group */
3895 cfqg = &cfqd->root_group; 3899 cfqg = &cfqd->root_group;
3896 for_each_cfqg_st(cfqg, i, j, st) 3900 for_each_cfqg_st(cfqg, i, j, st)
3897 *st = CFQ_RB_ROOT; 3901 *st = CFQ_RB_ROOT;
3898 RB_CLEAR_NODE(&cfqg->rb_node); 3902 RB_CLEAR_NODE(&cfqg->rb_node);
3899 3903
3900 /* Give preference to root group over other groups */ 3904 /* Give preference to root group over other groups */
3901 cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT; 3905 cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
3902 3906
3903 #ifdef CONFIG_CFQ_GROUP_IOSCHED 3907 #ifdef CONFIG_CFQ_GROUP_IOSCHED
3904 /* 3908 /*
3905 * Take a reference to root group which we never drop. This is just 3909 * Take a reference to root group which we never drop. This is just
3906 * to make sure that cfq_put_cfqg() does not try to kfree root group 3910 * to make sure that cfq_put_cfqg() does not try to kfree root group
3907 */ 3911 */
3908 cfqg->ref = 1; 3912 cfqg->ref = 1;
3909 rcu_read_lock(); 3913 rcu_read_lock();
3910 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, 3914 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
3911 (void *)cfqd, 0); 3915 (void *)cfqd, 0);
3912 rcu_read_unlock(); 3916 rcu_read_unlock();
3913 #endif 3917 #endif
3914 /* 3918 /*
3915 * Not strictly needed (since RB_ROOT just clears the node and we 3919 * Not strictly needed (since RB_ROOT just clears the node and we
3916 * zeroed cfqd on alloc), but better be safe in case someone decides 3920 * zeroed cfqd on alloc), but better be safe in case someone decides
3917 * to add magic to the rb code 3921 * to add magic to the rb code
3918 */ 3922 */
3919 for (i = 0; i < CFQ_PRIO_LISTS; i++) 3923 for (i = 0; i < CFQ_PRIO_LISTS; i++)
3920 cfqd->prio_trees[i] = RB_ROOT; 3924 cfqd->prio_trees[i] = RB_ROOT;
3921 3925
3922 /* 3926 /*
3923 * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues. 3927 * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
3924 * Grab a permanent reference to it, so that the normal code flow 3928 * Grab a permanent reference to it, so that the normal code flow
3925 * will not attempt to free it. 3929 * will not attempt to free it.
3926 */ 3930 */
3927 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); 3931 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
3928 cfqd->oom_cfqq.ref++; 3932 cfqd->oom_cfqq.ref++;
3929 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); 3933 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
3930 3934
3931 INIT_LIST_HEAD(&cfqd->cic_list); 3935 INIT_LIST_HEAD(&cfqd->cic_list);
3932 3936
3933 cfqd->queue = q; 3937 cfqd->queue = q;
3934 3938
3935 init_timer(&cfqd->idle_slice_timer); 3939 init_timer(&cfqd->idle_slice_timer);
3936 cfqd->idle_slice_timer.function = cfq_idle_slice_timer; 3940 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
3937 cfqd->idle_slice_timer.data = (unsigned long) cfqd; 3941 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
3938 3942
3939 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); 3943 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
3940 3944
3941 cfqd->cfq_quantum = cfq_quantum; 3945 cfqd->cfq_quantum = cfq_quantum;
3942 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 3946 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
3943 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 3947 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
3944 cfqd->cfq_back_max = cfq_back_max; 3948 cfqd->cfq_back_max = cfq_back_max;
3945 cfqd->cfq_back_penalty = cfq_back_penalty; 3949 cfqd->cfq_back_penalty = cfq_back_penalty;
3946 cfqd->cfq_slice[0] = cfq_slice_async; 3950 cfqd->cfq_slice[0] = cfq_slice_async;
3947 cfqd->cfq_slice[1] = cfq_slice_sync; 3951 cfqd->cfq_slice[1] = cfq_slice_sync;
3948 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 3952 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
3949 cfqd->cfq_slice_idle = cfq_slice_idle; 3953 cfqd->cfq_slice_idle = cfq_slice_idle;
3950 cfqd->cfq_group_idle = cfq_group_idle; 3954 cfqd->cfq_group_idle = cfq_group_idle;
3951 cfqd->cfq_latency = 1; 3955 cfqd->cfq_latency = 1;
3952 cfqd->cfq_group_isolation = 0; 3956 cfqd->cfq_group_isolation = 0;
3953 cfqd->hw_tag = -1; 3957 cfqd->hw_tag = -1;
3954 /* 3958 /*
3955 * we optimistically start assuming sync ops weren't delayed in last 3959 * we optimistically start assuming sync ops weren't delayed in last
3956 * second, in order to have larger depth for async operations. 3960 * second, in order to have larger depth for async operations.
3957 */ 3961 */
3958 cfqd->last_delayed_sync = jiffies - HZ; 3962 cfqd->last_delayed_sync = jiffies - HZ;
3959 return cfqd; 3963 return cfqd;
3960 } 3964 }
3961 3965
3962 static void cfq_slab_kill(void) 3966 static void cfq_slab_kill(void)
3963 { 3967 {
3964 /* 3968 /*
3965 * Caller already ensured that pending RCU callbacks are completed, 3969 * Caller already ensured that pending RCU callbacks are completed,
3966 * so we should have no busy allocations at this point. 3970 * so we should have no busy allocations at this point.
3967 */ 3971 */
3968 if (cfq_pool) 3972 if (cfq_pool)
3969 kmem_cache_destroy(cfq_pool); 3973 kmem_cache_destroy(cfq_pool);
3970 if (cfq_ioc_pool) 3974 if (cfq_ioc_pool)
3971 kmem_cache_destroy(cfq_ioc_pool); 3975 kmem_cache_destroy(cfq_ioc_pool);
3972 } 3976 }
3973 3977
3974 static int __init cfq_slab_setup(void) 3978 static int __init cfq_slab_setup(void)
3975 { 3979 {
3976 cfq_pool = KMEM_CACHE(cfq_queue, 0); 3980 cfq_pool = KMEM_CACHE(cfq_queue, 0);
3977 if (!cfq_pool) 3981 if (!cfq_pool)
3978 goto fail; 3982 goto fail;
3979 3983
3980 cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); 3984 cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0);
3981 if (!cfq_ioc_pool) 3985 if (!cfq_ioc_pool)
3982 goto fail; 3986 goto fail;
3983 3987
3984 return 0; 3988 return 0;
3985 fail: 3989 fail:
3986 cfq_slab_kill(); 3990 cfq_slab_kill();
3987 return -ENOMEM; 3991 return -ENOMEM;
3988 } 3992 }
3989 3993
3990 /* 3994 /*
3991 * sysfs parts below --> 3995 * sysfs parts below -->
3992 */ 3996 */
3993 static ssize_t 3997 static ssize_t
3994 cfq_var_show(unsigned int var, char *page) 3998 cfq_var_show(unsigned int var, char *page)
3995 { 3999 {
3996 return sprintf(page, "%d\n", var); 4000 return sprintf(page, "%d\n", var);
3997 } 4001 }
3998 4002
3999 static ssize_t 4003 static ssize_t
4000 cfq_var_store(unsigned int *var, const char *page, size_t count) 4004 cfq_var_store(unsigned int *var, const char *page, size_t count)
4001 { 4005 {
4002 char *p = (char *) page; 4006 char *p = (char *) page;
4003 4007
4004 *var = simple_strtoul(p, &p, 10); 4008 *var = simple_strtoul(p, &p, 10);
4005 return count; 4009 return count;
4006 } 4010 }
4007 4011
4008 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 4012 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
4009 static ssize_t __FUNC(struct elevator_queue *e, char *page) \ 4013 static ssize_t __FUNC(struct elevator_queue *e, char *page) \
4010 { \ 4014 { \
4011 struct cfq_data *cfqd = e->elevator_data; \ 4015 struct cfq_data *cfqd = e->elevator_data; \
4012 unsigned int __data = __VAR; \ 4016 unsigned int __data = __VAR; \
4013 if (__CONV) \ 4017 if (__CONV) \
4014 __data = jiffies_to_msecs(__data); \ 4018 __data = jiffies_to_msecs(__data); \
4015 return cfq_var_show(__data, (page)); \ 4019 return cfq_var_show(__data, (page)); \
4016 } 4020 }
4017 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 4021 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
4018 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 4022 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
4019 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 4023 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
4020 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); 4024 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
4021 SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); 4025 SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
4022 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); 4026 SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
4023 SHOW_FUNCTION(cfq_group_idle_show, cfqd->cfq_group_idle, 1); 4027 SHOW_FUNCTION(cfq_group_idle_show, cfqd->cfq_group_idle, 1);
4024 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 4028 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
4025 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 4029 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
4026 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 4030 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
4027 SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); 4031 SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
4028 SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0); 4032 SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0);
4029 #undef SHOW_FUNCTION 4033 #undef SHOW_FUNCTION
4030 4034
4031 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 4035 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
4032 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ 4036 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
4033 { \ 4037 { \
4034 struct cfq_data *cfqd = e->elevator_data; \ 4038 struct cfq_data *cfqd = e->elevator_data; \
4035 unsigned int __data; \ 4039 unsigned int __data; \
4036 int ret = cfq_var_store(&__data, (page), count); \ 4040 int ret = cfq_var_store(&__data, (page), count); \
4037 if (__data < (MIN)) \ 4041 if (__data < (MIN)) \
4038 __data = (MIN); \ 4042 __data = (MIN); \
4039 else if (__data > (MAX)) \ 4043 else if (__data > (MAX)) \
4040 __data = (MAX); \ 4044 __data = (MAX); \
4041 if (__CONV) \ 4045 if (__CONV) \
4042 *(__PTR) = msecs_to_jiffies(__data); \ 4046 *(__PTR) = msecs_to_jiffies(__data); \
4043 else \ 4047 else \
4044 *(__PTR) = __data; \ 4048 *(__PTR) = __data; \
4045 return ret; \ 4049 return ret; \
4046 } 4050 }
4047 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 4051 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
4048 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, 4052 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1,
4049 UINT_MAX, 1); 4053 UINT_MAX, 1);
4050 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, 4054 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1,
4051 UINT_MAX, 1); 4055 UINT_MAX, 1);
4052 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 4056 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
4053 STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, 4057 STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1,
4054 UINT_MAX, 0); 4058 UINT_MAX, 0);
4055 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); 4059 STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
4056 STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1); 4060 STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1);
4057 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 4061 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
4058 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 4062 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
4059 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, 4063 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
4060 UINT_MAX, 0); 4064 UINT_MAX, 0);
4061 STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); 4065 STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
4062 STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0); 4066 STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0);
4063 #undef STORE_FUNCTION 4067 #undef STORE_FUNCTION
4064 4068
4065 #define CFQ_ATTR(name) \ 4069 #define CFQ_ATTR(name) \
4066 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) 4070 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
4067 4071
4068 static struct elv_fs_entry cfq_attrs[] = { 4072 static struct elv_fs_entry cfq_attrs[] = {
4069 CFQ_ATTR(quantum), 4073 CFQ_ATTR(quantum),
4070 CFQ_ATTR(fifo_expire_sync), 4074 CFQ_ATTR(fifo_expire_sync),
4071 CFQ_ATTR(fifo_expire_async), 4075 CFQ_ATTR(fifo_expire_async),
4072 CFQ_ATTR(back_seek_max), 4076 CFQ_ATTR(back_seek_max),
4073 CFQ_ATTR(back_seek_penalty), 4077 CFQ_ATTR(back_seek_penalty),
4074 CFQ_ATTR(slice_sync), 4078 CFQ_ATTR(slice_sync),
4075 CFQ_ATTR(slice_async), 4079 CFQ_ATTR(slice_async),
4076 CFQ_ATTR(slice_async_rq), 4080 CFQ_ATTR(slice_async_rq),
4077 CFQ_ATTR(slice_idle), 4081 CFQ_ATTR(slice_idle),
4078 CFQ_ATTR(group_idle), 4082 CFQ_ATTR(group_idle),
4079 CFQ_ATTR(low_latency), 4083 CFQ_ATTR(low_latency),
4080 CFQ_ATTR(group_isolation), 4084 CFQ_ATTR(group_isolation),
4081 __ATTR_NULL 4085 __ATTR_NULL
4082 }; 4086 };
4083 4087
4084 static struct elevator_type iosched_cfq = { 4088 static struct elevator_type iosched_cfq = {
4085 .ops = { 4089 .ops = {
4086 .elevator_merge_fn = cfq_merge, 4090 .elevator_merge_fn = cfq_merge,
4087 .elevator_merged_fn = cfq_merged_request, 4091 .elevator_merged_fn = cfq_merged_request,
4088 .elevator_merge_req_fn = cfq_merged_requests, 4092 .elevator_merge_req_fn = cfq_merged_requests,
4089 .elevator_allow_merge_fn = cfq_allow_merge, 4093 .elevator_allow_merge_fn = cfq_allow_merge,
4090 .elevator_bio_merged_fn = cfq_bio_merged, 4094 .elevator_bio_merged_fn = cfq_bio_merged,
4091 .elevator_dispatch_fn = cfq_dispatch_requests, 4095 .elevator_dispatch_fn = cfq_dispatch_requests,
4092 .elevator_add_req_fn = cfq_insert_request, 4096 .elevator_add_req_fn = cfq_insert_request,
4093 .elevator_activate_req_fn = cfq_activate_request, 4097 .elevator_activate_req_fn = cfq_activate_request,
4094 .elevator_deactivate_req_fn = cfq_deactivate_request, 4098 .elevator_deactivate_req_fn = cfq_deactivate_request,
4095 .elevator_queue_empty_fn = cfq_queue_empty, 4099 .elevator_queue_empty_fn = cfq_queue_empty,
4096 .elevator_completed_req_fn = cfq_completed_request, 4100 .elevator_completed_req_fn = cfq_completed_request,
4097 .elevator_former_req_fn = elv_rb_former_request, 4101 .elevator_former_req_fn = elv_rb_former_request,
4098 .elevator_latter_req_fn = elv_rb_latter_request, 4102 .elevator_latter_req_fn = elv_rb_latter_request,
4099 .elevator_set_req_fn = cfq_set_request, 4103 .elevator_set_req_fn = cfq_set_request,
4100 .elevator_put_req_fn = cfq_put_request, 4104 .elevator_put_req_fn = cfq_put_request,
4101 .elevator_may_queue_fn = cfq_may_queue, 4105 .elevator_may_queue_fn = cfq_may_queue,
4102 .elevator_init_fn = cfq_init_queue, 4106 .elevator_init_fn = cfq_init_queue,
4103 .elevator_exit_fn = cfq_exit_queue, 4107 .elevator_exit_fn = cfq_exit_queue,
4104 .trim = cfq_free_io_context, 4108 .trim = cfq_free_io_context,
4105 }, 4109 },
4106 .elevator_attrs = cfq_attrs, 4110 .elevator_attrs = cfq_attrs,
4107 .elevator_name = "cfq", 4111 .elevator_name = "cfq",
4108 .elevator_owner = THIS_MODULE, 4112 .elevator_owner = THIS_MODULE,
4109 }; 4113 };
4110 4114
4111 #ifdef CONFIG_CFQ_GROUP_IOSCHED 4115 #ifdef CONFIG_CFQ_GROUP_IOSCHED
4112 static struct blkio_policy_type blkio_policy_cfq = { 4116 static struct blkio_policy_type blkio_policy_cfq = {
4113 .ops = { 4117 .ops = {
4114 .blkio_unlink_group_fn = cfq_unlink_blkio_group, 4118 .blkio_unlink_group_fn = cfq_unlink_blkio_group,
4115 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, 4119 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
4116 }, 4120 },
4117 .plid = BLKIO_POLICY_PROP, 4121 .plid = BLKIO_POLICY_PROP,
4118 }; 4122 };
4119 #else 4123 #else
4120 static struct blkio_policy_type blkio_policy_cfq; 4124 static struct blkio_policy_type blkio_policy_cfq;
4121 #endif 4125 #endif
4122 4126
4123 static int __init cfq_init(void) 4127 static int __init cfq_init(void)
4124 { 4128 {
4125 /* 4129 /*
4126 * could be 0 on HZ < 1000 setups 4130 * could be 0 on HZ < 1000 setups
4127 */ 4131 */
4128 if (!cfq_slice_async) 4132 if (!cfq_slice_async)
4129 cfq_slice_async = 1; 4133 cfq_slice_async = 1;
4130 if (!cfq_slice_idle) 4134 if (!cfq_slice_idle)
4131 cfq_slice_idle = 1; 4135 cfq_slice_idle = 1;
4132 4136
4133 #ifdef CONFIG_CFQ_GROUP_IOSCHED 4137 #ifdef CONFIG_CFQ_GROUP_IOSCHED
4134 if (!cfq_group_idle) 4138 if (!cfq_group_idle)
4135 cfq_group_idle = 1; 4139 cfq_group_idle = 1;
4136 #else 4140 #else
4137 cfq_group_idle = 0; 4141 cfq_group_idle = 0;
4138 #endif 4142 #endif
4139 if (cfq_slab_setup()) 4143 if (cfq_slab_setup())
4140 return -ENOMEM; 4144 return -ENOMEM;
4141 4145
4142 elv_register(&iosched_cfq); 4146 elv_register(&iosched_cfq);
4143 blkio_policy_register(&blkio_policy_cfq); 4147 blkio_policy_register(&blkio_policy_cfq);
4144 4148
4145 return 0; 4149 return 0;
4146 } 4150 }
4147 4151
4148 static void __exit cfq_exit(void) 4152 static void __exit cfq_exit(void)
4149 { 4153 {
4150 DECLARE_COMPLETION_ONSTACK(all_gone); 4154 DECLARE_COMPLETION_ONSTACK(all_gone);
4151 blkio_policy_unregister(&blkio_policy_cfq); 4155 blkio_policy_unregister(&blkio_policy_cfq);
4152 elv_unregister(&iosched_cfq); 4156 elv_unregister(&iosched_cfq);
4153 ioc_gone = &all_gone; 4157 ioc_gone = &all_gone;
4154 /* ioc_gone's update must be visible before reading ioc_count */ 4158 /* ioc_gone's update must be visible before reading ioc_count */
4155 smp_wmb(); 4159 smp_wmb();
4156 4160
4157 /* 4161 /*
4158 * this also protects us from entering cfq_slab_kill() with 4162 * this also protects us from entering cfq_slab_kill() with
4159 * pending RCU callbacks 4163 * pending RCU callbacks
4160 */ 4164 */
4161 if (elv_ioc_count_read(cfq_ioc_count)) 4165 if (elv_ioc_count_read(cfq_ioc_count))
4162 wait_for_completion(&all_gone); 4166 wait_for_completion(&all_gone);
4163 ida_destroy(&cic_index_ida); 4167 ida_destroy(&cic_index_ida);
4164 cfq_slab_kill(); 4168 cfq_slab_kill();
4165 } 4169 }
4166 4170
4167 module_init(cfq_init); 4171 module_init(cfq_init);
4168 module_exit(cfq_exit); 4172 module_exit(cfq_exit);
4169 4173
4170 MODULE_AUTHOR("Jens Axboe"); 4174 MODULE_AUTHOR("Jens Axboe");
4171 MODULE_LICENSE("GPL"); 4175 MODULE_LICENSE("GPL");
4172 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); 4176 MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");
4173 4177