Commit 03d8e11142a893ad322285d3c8a08e88b570cda1
Committed by
Jens Axboe
1 parent
b82d4b197c
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
blkcg: add request_queue->root_blkg
With per-queue policy activation, root blkg creation will be moved to blkcg core. Add q->root_blkg in preparation. For blk-throtl, this replaces throtl_data->root_tg; however, cfq needs to keep cfqd->root_group for !CONFIG_CFQ_GROUP_IOSCHED. This is to prepare for per-queue policy activation and doesn't cause any functional difference. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Showing 3 changed files with 15 additions and 7 deletions Inline Diff
block/blk-throttle.c
1 | /* | 1 | /* |
2 | * Interface for controlling IO bandwidth on a request queue | 2 | * Interface for controlling IO bandwidth on a request queue |
3 | * | 3 | * |
4 | * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com> | 4 | * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com> |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/blkdev.h> | 9 | #include <linux/blkdev.h> |
10 | #include <linux/bio.h> | 10 | #include <linux/bio.h> |
11 | #include <linux/blktrace_api.h> | 11 | #include <linux/blktrace_api.h> |
12 | #include "blk-cgroup.h" | 12 | #include "blk-cgroup.h" |
13 | #include "blk.h" | 13 | #include "blk.h" |
14 | 14 | ||
15 | /* Max dispatch from a group in 1 round */ | 15 | /* Max dispatch from a group in 1 round */ |
16 | static int throtl_grp_quantum = 8; | 16 | static int throtl_grp_quantum = 8; |
17 | 17 | ||
18 | /* Total max dispatch from all groups in one round */ | 18 | /* Total max dispatch from all groups in one round */ |
19 | static int throtl_quantum = 32; | 19 | static int throtl_quantum = 32; |
20 | 20 | ||
21 | /* Throttling is performed over 100ms slice and after that slice is renewed */ | 21 | /* Throttling is performed over 100ms slice and after that slice is renewed */ |
22 | static unsigned long throtl_slice = HZ/10; /* 100 ms */ | 22 | static unsigned long throtl_slice = HZ/10; /* 100 ms */ |
23 | 23 | ||
24 | static struct blkio_policy_type blkio_policy_throtl; | 24 | static struct blkio_policy_type blkio_policy_throtl; |
25 | 25 | ||
26 | /* A workqueue to queue throttle related work */ | 26 | /* A workqueue to queue throttle related work */ |
27 | static struct workqueue_struct *kthrotld_workqueue; | 27 | static struct workqueue_struct *kthrotld_workqueue; |
28 | static void throtl_schedule_delayed_work(struct throtl_data *td, | 28 | static void throtl_schedule_delayed_work(struct throtl_data *td, |
29 | unsigned long delay); | 29 | unsigned long delay); |
30 | 30 | ||
31 | struct throtl_rb_root { | 31 | struct throtl_rb_root { |
32 | struct rb_root rb; | 32 | struct rb_root rb; |
33 | struct rb_node *left; | 33 | struct rb_node *left; |
34 | unsigned int count; | 34 | unsigned int count; |
35 | unsigned long min_disptime; | 35 | unsigned long min_disptime; |
36 | }; | 36 | }; |
37 | 37 | ||
38 | #define THROTL_RB_ROOT (struct throtl_rb_root) { .rb = RB_ROOT, .left = NULL, \ | 38 | #define THROTL_RB_ROOT (struct throtl_rb_root) { .rb = RB_ROOT, .left = NULL, \ |
39 | .count = 0, .min_disptime = 0} | 39 | .count = 0, .min_disptime = 0} |
40 | 40 | ||
41 | #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) | 41 | #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) |
42 | 42 | ||
43 | /* Per-cpu group stats */ | 43 | /* Per-cpu group stats */ |
44 | struct tg_stats_cpu { | 44 | struct tg_stats_cpu { |
45 | /* total bytes transferred */ | 45 | /* total bytes transferred */ |
46 | struct blkg_rwstat service_bytes; | 46 | struct blkg_rwstat service_bytes; |
47 | /* total IOs serviced, post merge */ | 47 | /* total IOs serviced, post merge */ |
48 | struct blkg_rwstat serviced; | 48 | struct blkg_rwstat serviced; |
49 | }; | 49 | }; |
50 | 50 | ||
51 | struct throtl_grp { | 51 | struct throtl_grp { |
52 | /* active throtl group service_tree member */ | 52 | /* active throtl group service_tree member */ |
53 | struct rb_node rb_node; | 53 | struct rb_node rb_node; |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * Dispatch time in jiffies. This is the estimated time when group | 56 | * Dispatch time in jiffies. This is the estimated time when group |
57 | * will unthrottle and is ready to dispatch more bio. It is used as | 57 | * will unthrottle and is ready to dispatch more bio. It is used as |
58 | * key to sort active groups in service tree. | 58 | * key to sort active groups in service tree. |
59 | */ | 59 | */ |
60 | unsigned long disptime; | 60 | unsigned long disptime; |
61 | 61 | ||
62 | unsigned int flags; | 62 | unsigned int flags; |
63 | 63 | ||
64 | /* Two lists for READ and WRITE */ | 64 | /* Two lists for READ and WRITE */ |
65 | struct bio_list bio_lists[2]; | 65 | struct bio_list bio_lists[2]; |
66 | 66 | ||
67 | /* Number of queued bios on READ and WRITE lists */ | 67 | /* Number of queued bios on READ and WRITE lists */ |
68 | unsigned int nr_queued[2]; | 68 | unsigned int nr_queued[2]; |
69 | 69 | ||
70 | /* bytes per second rate limits */ | 70 | /* bytes per second rate limits */ |
71 | uint64_t bps[2]; | 71 | uint64_t bps[2]; |
72 | 72 | ||
73 | /* IOPS limits */ | 73 | /* IOPS limits */ |
74 | unsigned int iops[2]; | 74 | unsigned int iops[2]; |
75 | 75 | ||
76 | /* Number of bytes disptached in current slice */ | 76 | /* Number of bytes disptached in current slice */ |
77 | uint64_t bytes_disp[2]; | 77 | uint64_t bytes_disp[2]; |
78 | /* Number of bio's dispatched in current slice */ | 78 | /* Number of bio's dispatched in current slice */ |
79 | unsigned int io_disp[2]; | 79 | unsigned int io_disp[2]; |
80 | 80 | ||
81 | /* When did we start a new slice */ | 81 | /* When did we start a new slice */ |
82 | unsigned long slice_start[2]; | 82 | unsigned long slice_start[2]; |
83 | unsigned long slice_end[2]; | 83 | unsigned long slice_end[2]; |
84 | 84 | ||
85 | /* Some throttle limits got updated for the group */ | 85 | /* Some throttle limits got updated for the group */ |
86 | int limits_changed; | 86 | int limits_changed; |
87 | 87 | ||
88 | /* Per cpu stats pointer */ | 88 | /* Per cpu stats pointer */ |
89 | struct tg_stats_cpu __percpu *stats_cpu; | 89 | struct tg_stats_cpu __percpu *stats_cpu; |
90 | 90 | ||
91 | /* List of tgs waiting for per cpu stats memory to be allocated */ | 91 | /* List of tgs waiting for per cpu stats memory to be allocated */ |
92 | struct list_head stats_alloc_node; | 92 | struct list_head stats_alloc_node; |
93 | }; | 93 | }; |
94 | 94 | ||
95 | struct throtl_data | 95 | struct throtl_data |
96 | { | 96 | { |
97 | /* service tree for active throtl groups */ | 97 | /* service tree for active throtl groups */ |
98 | struct throtl_rb_root tg_service_tree; | 98 | struct throtl_rb_root tg_service_tree; |
99 | 99 | ||
100 | struct throtl_grp *root_tg; | ||
101 | struct request_queue *queue; | 100 | struct request_queue *queue; |
102 | 101 | ||
103 | /* Total Number of queued bios on READ and WRITE lists */ | 102 | /* Total Number of queued bios on READ and WRITE lists */ |
104 | unsigned int nr_queued[2]; | 103 | unsigned int nr_queued[2]; |
105 | 104 | ||
106 | /* | 105 | /* |
107 | * number of total undestroyed groups | 106 | * number of total undestroyed groups |
108 | */ | 107 | */ |
109 | unsigned int nr_undestroyed_grps; | 108 | unsigned int nr_undestroyed_grps; |
110 | 109 | ||
111 | /* Work for dispatching throttled bios */ | 110 | /* Work for dispatching throttled bios */ |
112 | struct delayed_work throtl_work; | 111 | struct delayed_work throtl_work; |
113 | 112 | ||
114 | int limits_changed; | 113 | int limits_changed; |
115 | }; | 114 | }; |
116 | 115 | ||
117 | /* list and work item to allocate percpu group stats */ | 116 | /* list and work item to allocate percpu group stats */ |
118 | static DEFINE_SPINLOCK(tg_stats_alloc_lock); | 117 | static DEFINE_SPINLOCK(tg_stats_alloc_lock); |
119 | static LIST_HEAD(tg_stats_alloc_list); | 118 | static LIST_HEAD(tg_stats_alloc_list); |
120 | 119 | ||
121 | static void tg_stats_alloc_fn(struct work_struct *); | 120 | static void tg_stats_alloc_fn(struct work_struct *); |
122 | static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn); | 121 | static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn); |
123 | 122 | ||
124 | static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg) | 123 | static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg) |
125 | { | 124 | { |
126 | return blkg_to_pdata(blkg, &blkio_policy_throtl); | 125 | return blkg_to_pdata(blkg, &blkio_policy_throtl); |
127 | } | 126 | } |
128 | 127 | ||
129 | static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg) | 128 | static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg) |
130 | { | 129 | { |
131 | return pdata_to_blkg(tg); | 130 | return pdata_to_blkg(tg); |
132 | } | 131 | } |
133 | 132 | ||
133 | static inline struct throtl_grp *td_root_tg(struct throtl_data *td) | ||
134 | { | ||
135 | return blkg_to_tg(td->queue->root_blkg); | ||
136 | } | ||
137 | |||
134 | enum tg_state_flags { | 138 | enum tg_state_flags { |
135 | THROTL_TG_FLAG_on_rr = 0, /* on round-robin busy list */ | 139 | THROTL_TG_FLAG_on_rr = 0, /* on round-robin busy list */ |
136 | }; | 140 | }; |
137 | 141 | ||
138 | #define THROTL_TG_FNS(name) \ | 142 | #define THROTL_TG_FNS(name) \ |
139 | static inline void throtl_mark_tg_##name(struct throtl_grp *tg) \ | 143 | static inline void throtl_mark_tg_##name(struct throtl_grp *tg) \ |
140 | { \ | 144 | { \ |
141 | (tg)->flags |= (1 << THROTL_TG_FLAG_##name); \ | 145 | (tg)->flags |= (1 << THROTL_TG_FLAG_##name); \ |
142 | } \ | 146 | } \ |
143 | static inline void throtl_clear_tg_##name(struct throtl_grp *tg) \ | 147 | static inline void throtl_clear_tg_##name(struct throtl_grp *tg) \ |
144 | { \ | 148 | { \ |
145 | (tg)->flags &= ~(1 << THROTL_TG_FLAG_##name); \ | 149 | (tg)->flags &= ~(1 << THROTL_TG_FLAG_##name); \ |
146 | } \ | 150 | } \ |
147 | static inline int throtl_tg_##name(const struct throtl_grp *tg) \ | 151 | static inline int throtl_tg_##name(const struct throtl_grp *tg) \ |
148 | { \ | 152 | { \ |
149 | return ((tg)->flags & (1 << THROTL_TG_FLAG_##name)) != 0; \ | 153 | return ((tg)->flags & (1 << THROTL_TG_FLAG_##name)) != 0; \ |
150 | } | 154 | } |
151 | 155 | ||
152 | THROTL_TG_FNS(on_rr); | 156 | THROTL_TG_FNS(on_rr); |
153 | 157 | ||
154 | #define throtl_log_tg(td, tg, fmt, args...) \ | 158 | #define throtl_log_tg(td, tg, fmt, args...) \ |
155 | blk_add_trace_msg((td)->queue, "throtl %s " fmt, \ | 159 | blk_add_trace_msg((td)->queue, "throtl %s " fmt, \ |
156 | blkg_path(tg_to_blkg(tg)), ##args); \ | 160 | blkg_path(tg_to_blkg(tg)), ##args); \ |
157 | 161 | ||
158 | #define throtl_log(td, fmt, args...) \ | 162 | #define throtl_log(td, fmt, args...) \ |
159 | blk_add_trace_msg((td)->queue, "throtl " fmt, ##args) | 163 | blk_add_trace_msg((td)->queue, "throtl " fmt, ##args) |
160 | 164 | ||
161 | static inline unsigned int total_nr_queued(struct throtl_data *td) | 165 | static inline unsigned int total_nr_queued(struct throtl_data *td) |
162 | { | 166 | { |
163 | return td->nr_queued[0] + td->nr_queued[1]; | 167 | return td->nr_queued[0] + td->nr_queued[1]; |
164 | } | 168 | } |
165 | 169 | ||
166 | /* | 170 | /* |
167 | * Worker for allocating per cpu stat for tgs. This is scheduled on the | 171 | * Worker for allocating per cpu stat for tgs. This is scheduled on the |
168 | * system_nrt_wq once there are some groups on the alloc_list waiting for | 172 | * system_nrt_wq once there are some groups on the alloc_list waiting for |
169 | * allocation. | 173 | * allocation. |
170 | */ | 174 | */ |
171 | static void tg_stats_alloc_fn(struct work_struct *work) | 175 | static void tg_stats_alloc_fn(struct work_struct *work) |
172 | { | 176 | { |
173 | static struct tg_stats_cpu *stats_cpu; /* this fn is non-reentrant */ | 177 | static struct tg_stats_cpu *stats_cpu; /* this fn is non-reentrant */ |
174 | struct delayed_work *dwork = to_delayed_work(work); | 178 | struct delayed_work *dwork = to_delayed_work(work); |
175 | bool empty = false; | 179 | bool empty = false; |
176 | 180 | ||
177 | alloc_stats: | 181 | alloc_stats: |
178 | if (!stats_cpu) { | 182 | if (!stats_cpu) { |
179 | stats_cpu = alloc_percpu(struct tg_stats_cpu); | 183 | stats_cpu = alloc_percpu(struct tg_stats_cpu); |
180 | if (!stats_cpu) { | 184 | if (!stats_cpu) { |
181 | /* allocation failed, try again after some time */ | 185 | /* allocation failed, try again after some time */ |
182 | queue_delayed_work(system_nrt_wq, dwork, | 186 | queue_delayed_work(system_nrt_wq, dwork, |
183 | msecs_to_jiffies(10)); | 187 | msecs_to_jiffies(10)); |
184 | return; | 188 | return; |
185 | } | 189 | } |
186 | } | 190 | } |
187 | 191 | ||
188 | spin_lock_irq(&tg_stats_alloc_lock); | 192 | spin_lock_irq(&tg_stats_alloc_lock); |
189 | 193 | ||
190 | if (!list_empty(&tg_stats_alloc_list)) { | 194 | if (!list_empty(&tg_stats_alloc_list)) { |
191 | struct throtl_grp *tg = list_first_entry(&tg_stats_alloc_list, | 195 | struct throtl_grp *tg = list_first_entry(&tg_stats_alloc_list, |
192 | struct throtl_grp, | 196 | struct throtl_grp, |
193 | stats_alloc_node); | 197 | stats_alloc_node); |
194 | swap(tg->stats_cpu, stats_cpu); | 198 | swap(tg->stats_cpu, stats_cpu); |
195 | list_del_init(&tg->stats_alloc_node); | 199 | list_del_init(&tg->stats_alloc_node); |
196 | } | 200 | } |
197 | 201 | ||
198 | empty = list_empty(&tg_stats_alloc_list); | 202 | empty = list_empty(&tg_stats_alloc_list); |
199 | spin_unlock_irq(&tg_stats_alloc_lock); | 203 | spin_unlock_irq(&tg_stats_alloc_lock); |
200 | if (!empty) | 204 | if (!empty) |
201 | goto alloc_stats; | 205 | goto alloc_stats; |
202 | } | 206 | } |
203 | 207 | ||
204 | static void throtl_init_blkio_group(struct blkio_group *blkg) | 208 | static void throtl_init_blkio_group(struct blkio_group *blkg) |
205 | { | 209 | { |
206 | struct throtl_grp *tg = blkg_to_tg(blkg); | 210 | struct throtl_grp *tg = blkg_to_tg(blkg); |
207 | 211 | ||
208 | RB_CLEAR_NODE(&tg->rb_node); | 212 | RB_CLEAR_NODE(&tg->rb_node); |
209 | bio_list_init(&tg->bio_lists[0]); | 213 | bio_list_init(&tg->bio_lists[0]); |
210 | bio_list_init(&tg->bio_lists[1]); | 214 | bio_list_init(&tg->bio_lists[1]); |
211 | tg->limits_changed = false; | 215 | tg->limits_changed = false; |
212 | 216 | ||
213 | tg->bps[READ] = -1; | 217 | tg->bps[READ] = -1; |
214 | tg->bps[WRITE] = -1; | 218 | tg->bps[WRITE] = -1; |
215 | tg->iops[READ] = -1; | 219 | tg->iops[READ] = -1; |
216 | tg->iops[WRITE] = -1; | 220 | tg->iops[WRITE] = -1; |
217 | 221 | ||
218 | /* | 222 | /* |
219 | * Ugh... We need to perform per-cpu allocation for tg->stats_cpu | 223 | * Ugh... We need to perform per-cpu allocation for tg->stats_cpu |
220 | * but percpu allocator can't be called from IO path. Queue tg on | 224 | * but percpu allocator can't be called from IO path. Queue tg on |
221 | * tg_stats_alloc_list and allocate from work item. | 225 | * tg_stats_alloc_list and allocate from work item. |
222 | */ | 226 | */ |
223 | spin_lock(&tg_stats_alloc_lock); | 227 | spin_lock(&tg_stats_alloc_lock); |
224 | list_add(&tg->stats_alloc_node, &tg_stats_alloc_list); | 228 | list_add(&tg->stats_alloc_node, &tg_stats_alloc_list); |
225 | queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0); | 229 | queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0); |
226 | spin_unlock(&tg_stats_alloc_lock); | 230 | spin_unlock(&tg_stats_alloc_lock); |
227 | } | 231 | } |
228 | 232 | ||
229 | static void throtl_exit_blkio_group(struct blkio_group *blkg) | 233 | static void throtl_exit_blkio_group(struct blkio_group *blkg) |
230 | { | 234 | { |
231 | struct throtl_grp *tg = blkg_to_tg(blkg); | 235 | struct throtl_grp *tg = blkg_to_tg(blkg); |
232 | 236 | ||
233 | spin_lock(&tg_stats_alloc_lock); | 237 | spin_lock(&tg_stats_alloc_lock); |
234 | list_del_init(&tg->stats_alloc_node); | 238 | list_del_init(&tg->stats_alloc_node); |
235 | spin_unlock(&tg_stats_alloc_lock); | 239 | spin_unlock(&tg_stats_alloc_lock); |
236 | 240 | ||
237 | free_percpu(tg->stats_cpu); | 241 | free_percpu(tg->stats_cpu); |
238 | } | 242 | } |
239 | 243 | ||
240 | static void throtl_reset_group_stats(struct blkio_group *blkg) | 244 | static void throtl_reset_group_stats(struct blkio_group *blkg) |
241 | { | 245 | { |
242 | struct throtl_grp *tg = blkg_to_tg(blkg); | 246 | struct throtl_grp *tg = blkg_to_tg(blkg); |
243 | int cpu; | 247 | int cpu; |
244 | 248 | ||
245 | if (tg->stats_cpu == NULL) | 249 | if (tg->stats_cpu == NULL) |
246 | return; | 250 | return; |
247 | 251 | ||
248 | for_each_possible_cpu(cpu) { | 252 | for_each_possible_cpu(cpu) { |
249 | struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu); | 253 | struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu); |
250 | 254 | ||
251 | blkg_rwstat_reset(&sc->service_bytes); | 255 | blkg_rwstat_reset(&sc->service_bytes); |
252 | blkg_rwstat_reset(&sc->serviced); | 256 | blkg_rwstat_reset(&sc->serviced); |
253 | } | 257 | } |
254 | } | 258 | } |
255 | 259 | ||
256 | static struct | 260 | static struct |
257 | throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) | 261 | throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) |
258 | { | 262 | { |
259 | /* | 263 | /* |
260 | * This is the common case when there are no blkio cgroups. | 264 | * This is the common case when there are no blkio cgroups. |
261 | * Avoid lookup in this case | 265 | * Avoid lookup in this case |
262 | */ | 266 | */ |
263 | if (blkcg == &blkio_root_cgroup) | 267 | if (blkcg == &blkio_root_cgroup) |
264 | return td->root_tg; | 268 | return td_root_tg(td); |
265 | 269 | ||
266 | return blkg_to_tg(blkg_lookup(blkcg, td->queue)); | 270 | return blkg_to_tg(blkg_lookup(blkcg, td->queue)); |
267 | } | 271 | } |
268 | 272 | ||
269 | static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, | 273 | static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, |
270 | struct blkio_cgroup *blkcg) | 274 | struct blkio_cgroup *blkcg) |
271 | { | 275 | { |
272 | struct request_queue *q = td->queue; | 276 | struct request_queue *q = td->queue; |
273 | struct throtl_grp *tg = NULL; | 277 | struct throtl_grp *tg = NULL; |
274 | 278 | ||
275 | /* | 279 | /* |
276 | * This is the common case when there are no blkio cgroups. | 280 | * This is the common case when there are no blkio cgroups. |
277 | * Avoid lookup in this case | 281 | * Avoid lookup in this case |
278 | */ | 282 | */ |
279 | if (blkcg == &blkio_root_cgroup) { | 283 | if (blkcg == &blkio_root_cgroup) { |
280 | tg = td->root_tg; | 284 | tg = td_root_tg(td); |
281 | } else { | 285 | } else { |
282 | struct blkio_group *blkg; | 286 | struct blkio_group *blkg; |
283 | 287 | ||
284 | blkg = blkg_lookup_create(blkcg, q, false); | 288 | blkg = blkg_lookup_create(blkcg, q, false); |
285 | 289 | ||
286 | /* if %NULL and @q is alive, fall back to root_tg */ | 290 | /* if %NULL and @q is alive, fall back to root_tg */ |
287 | if (!IS_ERR(blkg)) | 291 | if (!IS_ERR(blkg)) |
288 | tg = blkg_to_tg(blkg); | 292 | tg = blkg_to_tg(blkg); |
289 | else if (!blk_queue_dead(q)) | 293 | else if (!blk_queue_dead(q)) |
290 | tg = td->root_tg; | 294 | tg = td_root_tg(td); |
291 | } | 295 | } |
292 | 296 | ||
293 | return tg; | 297 | return tg; |
294 | } | 298 | } |
295 | 299 | ||
296 | static struct throtl_grp *throtl_rb_first(struct throtl_rb_root *root) | 300 | static struct throtl_grp *throtl_rb_first(struct throtl_rb_root *root) |
297 | { | 301 | { |
298 | /* Service tree is empty */ | 302 | /* Service tree is empty */ |
299 | if (!root->count) | 303 | if (!root->count) |
300 | return NULL; | 304 | return NULL; |
301 | 305 | ||
302 | if (!root->left) | 306 | if (!root->left) |
303 | root->left = rb_first(&root->rb); | 307 | root->left = rb_first(&root->rb); |
304 | 308 | ||
305 | if (root->left) | 309 | if (root->left) |
306 | return rb_entry_tg(root->left); | 310 | return rb_entry_tg(root->left); |
307 | 311 | ||
308 | return NULL; | 312 | return NULL; |
309 | } | 313 | } |
310 | 314 | ||
311 | static void rb_erase_init(struct rb_node *n, struct rb_root *root) | 315 | static void rb_erase_init(struct rb_node *n, struct rb_root *root) |
312 | { | 316 | { |
313 | rb_erase(n, root); | 317 | rb_erase(n, root); |
314 | RB_CLEAR_NODE(n); | 318 | RB_CLEAR_NODE(n); |
315 | } | 319 | } |
316 | 320 | ||
317 | static void throtl_rb_erase(struct rb_node *n, struct throtl_rb_root *root) | 321 | static void throtl_rb_erase(struct rb_node *n, struct throtl_rb_root *root) |
318 | { | 322 | { |
319 | if (root->left == n) | 323 | if (root->left == n) |
320 | root->left = NULL; | 324 | root->left = NULL; |
321 | rb_erase_init(n, &root->rb); | 325 | rb_erase_init(n, &root->rb); |
322 | --root->count; | 326 | --root->count; |
323 | } | 327 | } |
324 | 328 | ||
325 | static void update_min_dispatch_time(struct throtl_rb_root *st) | 329 | static void update_min_dispatch_time(struct throtl_rb_root *st) |
326 | { | 330 | { |
327 | struct throtl_grp *tg; | 331 | struct throtl_grp *tg; |
328 | 332 | ||
329 | tg = throtl_rb_first(st); | 333 | tg = throtl_rb_first(st); |
330 | if (!tg) | 334 | if (!tg) |
331 | return; | 335 | return; |
332 | 336 | ||
333 | st->min_disptime = tg->disptime; | 337 | st->min_disptime = tg->disptime; |
334 | } | 338 | } |
335 | 339 | ||
336 | static void | 340 | static void |
337 | tg_service_tree_add(struct throtl_rb_root *st, struct throtl_grp *tg) | 341 | tg_service_tree_add(struct throtl_rb_root *st, struct throtl_grp *tg) |
338 | { | 342 | { |
339 | struct rb_node **node = &st->rb.rb_node; | 343 | struct rb_node **node = &st->rb.rb_node; |
340 | struct rb_node *parent = NULL; | 344 | struct rb_node *parent = NULL; |
341 | struct throtl_grp *__tg; | 345 | struct throtl_grp *__tg; |
342 | unsigned long key = tg->disptime; | 346 | unsigned long key = tg->disptime; |
343 | int left = 1; | 347 | int left = 1; |
344 | 348 | ||
345 | while (*node != NULL) { | 349 | while (*node != NULL) { |
346 | parent = *node; | 350 | parent = *node; |
347 | __tg = rb_entry_tg(parent); | 351 | __tg = rb_entry_tg(parent); |
348 | 352 | ||
349 | if (time_before(key, __tg->disptime)) | 353 | if (time_before(key, __tg->disptime)) |
350 | node = &parent->rb_left; | 354 | node = &parent->rb_left; |
351 | else { | 355 | else { |
352 | node = &parent->rb_right; | 356 | node = &parent->rb_right; |
353 | left = 0; | 357 | left = 0; |
354 | } | 358 | } |
355 | } | 359 | } |
356 | 360 | ||
357 | if (left) | 361 | if (left) |
358 | st->left = &tg->rb_node; | 362 | st->left = &tg->rb_node; |
359 | 363 | ||
360 | rb_link_node(&tg->rb_node, parent, node); | 364 | rb_link_node(&tg->rb_node, parent, node); |
361 | rb_insert_color(&tg->rb_node, &st->rb); | 365 | rb_insert_color(&tg->rb_node, &st->rb); |
362 | } | 366 | } |
363 | 367 | ||
364 | static void __throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg) | 368 | static void __throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg) |
365 | { | 369 | { |
366 | struct throtl_rb_root *st = &td->tg_service_tree; | 370 | struct throtl_rb_root *st = &td->tg_service_tree; |
367 | 371 | ||
368 | tg_service_tree_add(st, tg); | 372 | tg_service_tree_add(st, tg); |
369 | throtl_mark_tg_on_rr(tg); | 373 | throtl_mark_tg_on_rr(tg); |
370 | st->count++; | 374 | st->count++; |
371 | } | 375 | } |
372 | 376 | ||
373 | static void throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg) | 377 | static void throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg) |
374 | { | 378 | { |
375 | if (!throtl_tg_on_rr(tg)) | 379 | if (!throtl_tg_on_rr(tg)) |
376 | __throtl_enqueue_tg(td, tg); | 380 | __throtl_enqueue_tg(td, tg); |
377 | } | 381 | } |
378 | 382 | ||
379 | static void __throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg) | 383 | static void __throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg) |
380 | { | 384 | { |
381 | throtl_rb_erase(&tg->rb_node, &td->tg_service_tree); | 385 | throtl_rb_erase(&tg->rb_node, &td->tg_service_tree); |
382 | throtl_clear_tg_on_rr(tg); | 386 | throtl_clear_tg_on_rr(tg); |
383 | } | 387 | } |
384 | 388 | ||
385 | static void throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg) | 389 | static void throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg) |
386 | { | 390 | { |
387 | if (throtl_tg_on_rr(tg)) | 391 | if (throtl_tg_on_rr(tg)) |
388 | __throtl_dequeue_tg(td, tg); | 392 | __throtl_dequeue_tg(td, tg); |
389 | } | 393 | } |
390 | 394 | ||
391 | static void throtl_schedule_next_dispatch(struct throtl_data *td) | 395 | static void throtl_schedule_next_dispatch(struct throtl_data *td) |
392 | { | 396 | { |
393 | struct throtl_rb_root *st = &td->tg_service_tree; | 397 | struct throtl_rb_root *st = &td->tg_service_tree; |
394 | 398 | ||
395 | /* | 399 | /* |
396 | * If there are more bios pending, schedule more work. | 400 | * If there are more bios pending, schedule more work. |
397 | */ | 401 | */ |
398 | if (!total_nr_queued(td)) | 402 | if (!total_nr_queued(td)) |
399 | return; | 403 | return; |
400 | 404 | ||
401 | BUG_ON(!st->count); | 405 | BUG_ON(!st->count); |
402 | 406 | ||
403 | update_min_dispatch_time(st); | 407 | update_min_dispatch_time(st); |
404 | 408 | ||
405 | if (time_before_eq(st->min_disptime, jiffies)) | 409 | if (time_before_eq(st->min_disptime, jiffies)) |
406 | throtl_schedule_delayed_work(td, 0); | 410 | throtl_schedule_delayed_work(td, 0); |
407 | else | 411 | else |
408 | throtl_schedule_delayed_work(td, (st->min_disptime - jiffies)); | 412 | throtl_schedule_delayed_work(td, (st->min_disptime - jiffies)); |
409 | } | 413 | } |
410 | 414 | ||
411 | static inline void | 415 | static inline void |
412 | throtl_start_new_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) | 416 | throtl_start_new_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) |
413 | { | 417 | { |
414 | tg->bytes_disp[rw] = 0; | 418 | tg->bytes_disp[rw] = 0; |
415 | tg->io_disp[rw] = 0; | 419 | tg->io_disp[rw] = 0; |
416 | tg->slice_start[rw] = jiffies; | 420 | tg->slice_start[rw] = jiffies; |
417 | tg->slice_end[rw] = jiffies + throtl_slice; | 421 | tg->slice_end[rw] = jiffies + throtl_slice; |
418 | throtl_log_tg(td, tg, "[%c] new slice start=%lu end=%lu jiffies=%lu", | 422 | throtl_log_tg(td, tg, "[%c] new slice start=%lu end=%lu jiffies=%lu", |
419 | rw == READ ? 'R' : 'W', tg->slice_start[rw], | 423 | rw == READ ? 'R' : 'W', tg->slice_start[rw], |
420 | tg->slice_end[rw], jiffies); | 424 | tg->slice_end[rw], jiffies); |
421 | } | 425 | } |
422 | 426 | ||
423 | static inline void throtl_set_slice_end(struct throtl_data *td, | 427 | static inline void throtl_set_slice_end(struct throtl_data *td, |
424 | struct throtl_grp *tg, bool rw, unsigned long jiffy_end) | 428 | struct throtl_grp *tg, bool rw, unsigned long jiffy_end) |
425 | { | 429 | { |
426 | tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); | 430 | tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); |
427 | } | 431 | } |
428 | 432 | ||
429 | static inline void throtl_extend_slice(struct throtl_data *td, | 433 | static inline void throtl_extend_slice(struct throtl_data *td, |
430 | struct throtl_grp *tg, bool rw, unsigned long jiffy_end) | 434 | struct throtl_grp *tg, bool rw, unsigned long jiffy_end) |
431 | { | 435 | { |
432 | tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); | 436 | tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); |
433 | throtl_log_tg(td, tg, "[%c] extend slice start=%lu end=%lu jiffies=%lu", | 437 | throtl_log_tg(td, tg, "[%c] extend slice start=%lu end=%lu jiffies=%lu", |
434 | rw == READ ? 'R' : 'W', tg->slice_start[rw], | 438 | rw == READ ? 'R' : 'W', tg->slice_start[rw], |
435 | tg->slice_end[rw], jiffies); | 439 | tg->slice_end[rw], jiffies); |
436 | } | 440 | } |
437 | 441 | ||
438 | /* Determine if previously allocated or extended slice is complete or not */ | 442 | /* Determine if previously allocated or extended slice is complete or not */ |
439 | static bool | 443 | static bool |
440 | throtl_slice_used(struct throtl_data *td, struct throtl_grp *tg, bool rw) | 444 | throtl_slice_used(struct throtl_data *td, struct throtl_grp *tg, bool rw) |
441 | { | 445 | { |
442 | if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) | 446 | if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) |
443 | return 0; | 447 | return 0; |
444 | 448 | ||
445 | return 1; | 449 | return 1; |
446 | } | 450 | } |
447 | 451 | ||
448 | /* Trim the used slices and adjust slice start accordingly */ | 452 | /* Trim the used slices and adjust slice start accordingly */ |
449 | static inline void | 453 | static inline void |
450 | throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) | 454 | throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) |
451 | { | 455 | { |
452 | unsigned long nr_slices, time_elapsed, io_trim; | 456 | unsigned long nr_slices, time_elapsed, io_trim; |
453 | u64 bytes_trim, tmp; | 457 | u64 bytes_trim, tmp; |
454 | 458 | ||
455 | BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw])); | 459 | BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw])); |
456 | 460 | ||
457 | /* | 461 | /* |
458 | * If bps are unlimited (-1), then time slice don't get | 462 | * If bps are unlimited (-1), then time slice don't get |
459 | * renewed. Don't try to trim the slice if slice is used. A new | 463 | * renewed. Don't try to trim the slice if slice is used. A new |
460 | * slice will start when appropriate. | 464 | * slice will start when appropriate. |
461 | */ | 465 | */ |
462 | if (throtl_slice_used(td, tg, rw)) | 466 | if (throtl_slice_used(td, tg, rw)) |
463 | return; | 467 | return; |
464 | 468 | ||
465 | /* | 469 | /* |
466 | * A bio has been dispatched. Also adjust slice_end. It might happen | 470 | * A bio has been dispatched. Also adjust slice_end. It might happen |
467 | * that initially cgroup limit was very low resulting in high | 471 | * that initially cgroup limit was very low resulting in high |
468 | * slice_end, but later limit was bumped up and bio was dispached | 472 | * slice_end, but later limit was bumped up and bio was dispached |
469 | * sooner, then we need to reduce slice_end. A high bogus slice_end | 473 | * sooner, then we need to reduce slice_end. A high bogus slice_end |
470 | * is bad because it does not allow new slice to start. | 474 | * is bad because it does not allow new slice to start. |
471 | */ | 475 | */ |
472 | 476 | ||
473 | throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice); | 477 | throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice); |
474 | 478 | ||
475 | time_elapsed = jiffies - tg->slice_start[rw]; | 479 | time_elapsed = jiffies - tg->slice_start[rw]; |
476 | 480 | ||
477 | nr_slices = time_elapsed / throtl_slice; | 481 | nr_slices = time_elapsed / throtl_slice; |
478 | 482 | ||
479 | if (!nr_slices) | 483 | if (!nr_slices) |
480 | return; | 484 | return; |
481 | tmp = tg->bps[rw] * throtl_slice * nr_slices; | 485 | tmp = tg->bps[rw] * throtl_slice * nr_slices; |
482 | do_div(tmp, HZ); | 486 | do_div(tmp, HZ); |
483 | bytes_trim = tmp; | 487 | bytes_trim = tmp; |
484 | 488 | ||
485 | io_trim = (tg->iops[rw] * throtl_slice * nr_slices)/HZ; | 489 | io_trim = (tg->iops[rw] * throtl_slice * nr_slices)/HZ; |
486 | 490 | ||
487 | if (!bytes_trim && !io_trim) | 491 | if (!bytes_trim && !io_trim) |
488 | return; | 492 | return; |
489 | 493 | ||
490 | if (tg->bytes_disp[rw] >= bytes_trim) | 494 | if (tg->bytes_disp[rw] >= bytes_trim) |
491 | tg->bytes_disp[rw] -= bytes_trim; | 495 | tg->bytes_disp[rw] -= bytes_trim; |
492 | else | 496 | else |
493 | tg->bytes_disp[rw] = 0; | 497 | tg->bytes_disp[rw] = 0; |
494 | 498 | ||
495 | if (tg->io_disp[rw] >= io_trim) | 499 | if (tg->io_disp[rw] >= io_trim) |
496 | tg->io_disp[rw] -= io_trim; | 500 | tg->io_disp[rw] -= io_trim; |
497 | else | 501 | else |
498 | tg->io_disp[rw] = 0; | 502 | tg->io_disp[rw] = 0; |
499 | 503 | ||
500 | tg->slice_start[rw] += nr_slices * throtl_slice; | 504 | tg->slice_start[rw] += nr_slices * throtl_slice; |
501 | 505 | ||
502 | throtl_log_tg(td, tg, "[%c] trim slice nr=%lu bytes=%llu io=%lu" | 506 | throtl_log_tg(td, tg, "[%c] trim slice nr=%lu bytes=%llu io=%lu" |
503 | " start=%lu end=%lu jiffies=%lu", | 507 | " start=%lu end=%lu jiffies=%lu", |
504 | rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim, | 508 | rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim, |
505 | tg->slice_start[rw], tg->slice_end[rw], jiffies); | 509 | tg->slice_start[rw], tg->slice_end[rw], jiffies); |
506 | } | 510 | } |
507 | 511 | ||
508 | static bool tg_with_in_iops_limit(struct throtl_data *td, struct throtl_grp *tg, | 512 | static bool tg_with_in_iops_limit(struct throtl_data *td, struct throtl_grp *tg, |
509 | struct bio *bio, unsigned long *wait) | 513 | struct bio *bio, unsigned long *wait) |
510 | { | 514 | { |
511 | bool rw = bio_data_dir(bio); | 515 | bool rw = bio_data_dir(bio); |
512 | unsigned int io_allowed; | 516 | unsigned int io_allowed; |
513 | unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; | 517 | unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; |
514 | u64 tmp; | 518 | u64 tmp; |
515 | 519 | ||
516 | jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; | 520 | jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; |
517 | 521 | ||
518 | /* Slice has just started. Consider one slice interval */ | 522 | /* Slice has just started. Consider one slice interval */ |
519 | if (!jiffy_elapsed) | 523 | if (!jiffy_elapsed) |
520 | jiffy_elapsed_rnd = throtl_slice; | 524 | jiffy_elapsed_rnd = throtl_slice; |
521 | 525 | ||
522 | jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice); | 526 | jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice); |
523 | 527 | ||
524 | /* | 528 | /* |
525 | * jiffy_elapsed_rnd should not be a big value as minimum iops can be | 529 | * jiffy_elapsed_rnd should not be a big value as minimum iops can be |
526 | * 1 then at max jiffy elapsed should be equivalent of 1 second as we | 530 | * 1 then at max jiffy elapsed should be equivalent of 1 second as we |
527 | * will allow dispatch after 1 second and after that slice should | 531 | * will allow dispatch after 1 second and after that slice should |
528 | * have been trimmed. | 532 | * have been trimmed. |
529 | */ | 533 | */ |
530 | 534 | ||
531 | tmp = (u64)tg->iops[rw] * jiffy_elapsed_rnd; | 535 | tmp = (u64)tg->iops[rw] * jiffy_elapsed_rnd; |
532 | do_div(tmp, HZ); | 536 | do_div(tmp, HZ); |
533 | 537 | ||
534 | if (tmp > UINT_MAX) | 538 | if (tmp > UINT_MAX) |
535 | io_allowed = UINT_MAX; | 539 | io_allowed = UINT_MAX; |
536 | else | 540 | else |
537 | io_allowed = tmp; | 541 | io_allowed = tmp; |
538 | 542 | ||
539 | if (tg->io_disp[rw] + 1 <= io_allowed) { | 543 | if (tg->io_disp[rw] + 1 <= io_allowed) { |
540 | if (wait) | 544 | if (wait) |
541 | *wait = 0; | 545 | *wait = 0; |
542 | return 1; | 546 | return 1; |
543 | } | 547 | } |
544 | 548 | ||
545 | /* Calc approx time to dispatch */ | 549 | /* Calc approx time to dispatch */ |
546 | jiffy_wait = ((tg->io_disp[rw] + 1) * HZ)/tg->iops[rw] + 1; | 550 | jiffy_wait = ((tg->io_disp[rw] + 1) * HZ)/tg->iops[rw] + 1; |
547 | 551 | ||
548 | if (jiffy_wait > jiffy_elapsed) | 552 | if (jiffy_wait > jiffy_elapsed) |
549 | jiffy_wait = jiffy_wait - jiffy_elapsed; | 553 | jiffy_wait = jiffy_wait - jiffy_elapsed; |
550 | else | 554 | else |
551 | jiffy_wait = 1; | 555 | jiffy_wait = 1; |
552 | 556 | ||
553 | if (wait) | 557 | if (wait) |
554 | *wait = jiffy_wait; | 558 | *wait = jiffy_wait; |
555 | return 0; | 559 | return 0; |
556 | } | 560 | } |
557 | 561 | ||
558 | static bool tg_with_in_bps_limit(struct throtl_data *td, struct throtl_grp *tg, | 562 | static bool tg_with_in_bps_limit(struct throtl_data *td, struct throtl_grp *tg, |
559 | struct bio *bio, unsigned long *wait) | 563 | struct bio *bio, unsigned long *wait) |
560 | { | 564 | { |
561 | bool rw = bio_data_dir(bio); | 565 | bool rw = bio_data_dir(bio); |
562 | u64 bytes_allowed, extra_bytes, tmp; | 566 | u64 bytes_allowed, extra_bytes, tmp; |
563 | unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; | 567 | unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; |
564 | 568 | ||
565 | jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; | 569 | jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; |
566 | 570 | ||
567 | /* Slice has just started. Consider one slice interval */ | 571 | /* Slice has just started. Consider one slice interval */ |
568 | if (!jiffy_elapsed) | 572 | if (!jiffy_elapsed) |
569 | jiffy_elapsed_rnd = throtl_slice; | 573 | jiffy_elapsed_rnd = throtl_slice; |
570 | 574 | ||
571 | jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice); | 575 | jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice); |
572 | 576 | ||
573 | tmp = tg->bps[rw] * jiffy_elapsed_rnd; | 577 | tmp = tg->bps[rw] * jiffy_elapsed_rnd; |
574 | do_div(tmp, HZ); | 578 | do_div(tmp, HZ); |
575 | bytes_allowed = tmp; | 579 | bytes_allowed = tmp; |
576 | 580 | ||
577 | if (tg->bytes_disp[rw] + bio->bi_size <= bytes_allowed) { | 581 | if (tg->bytes_disp[rw] + bio->bi_size <= bytes_allowed) { |
578 | if (wait) | 582 | if (wait) |
579 | *wait = 0; | 583 | *wait = 0; |
580 | return 1; | 584 | return 1; |
581 | } | 585 | } |
582 | 586 | ||
583 | /* Calc approx time to dispatch */ | 587 | /* Calc approx time to dispatch */ |
584 | extra_bytes = tg->bytes_disp[rw] + bio->bi_size - bytes_allowed; | 588 | extra_bytes = tg->bytes_disp[rw] + bio->bi_size - bytes_allowed; |
585 | jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]); | 589 | jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]); |
586 | 590 | ||
587 | if (!jiffy_wait) | 591 | if (!jiffy_wait) |
588 | jiffy_wait = 1; | 592 | jiffy_wait = 1; |
589 | 593 | ||
590 | /* | 594 | /* |
591 | * This wait time is without taking into consideration the rounding | 595 | * This wait time is without taking into consideration the rounding |
592 | * up we did. Add that time also. | 596 | * up we did. Add that time also. |
593 | */ | 597 | */ |
594 | jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed); | 598 | jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed); |
595 | if (wait) | 599 | if (wait) |
596 | *wait = jiffy_wait; | 600 | *wait = jiffy_wait; |
597 | return 0; | 601 | return 0; |
598 | } | 602 | } |
599 | 603 | ||
600 | static bool tg_no_rule_group(struct throtl_grp *tg, bool rw) { | 604 | static bool tg_no_rule_group(struct throtl_grp *tg, bool rw) { |
601 | if (tg->bps[rw] == -1 && tg->iops[rw] == -1) | 605 | if (tg->bps[rw] == -1 && tg->iops[rw] == -1) |
602 | return 1; | 606 | return 1; |
603 | return 0; | 607 | return 0; |
604 | } | 608 | } |
605 | 609 | ||
606 | /* | 610 | /* |
607 | * Returns whether one can dispatch a bio or not. Also returns approx number | 611 | * Returns whether one can dispatch a bio or not. Also returns approx number |
608 | * of jiffies to wait before this bio is with-in IO rate and can be dispatched | 612 | * of jiffies to wait before this bio is with-in IO rate and can be dispatched |
609 | */ | 613 | */ |
610 | static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, | 614 | static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, |
611 | struct bio *bio, unsigned long *wait) | 615 | struct bio *bio, unsigned long *wait) |
612 | { | 616 | { |
613 | bool rw = bio_data_dir(bio); | 617 | bool rw = bio_data_dir(bio); |
614 | unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0; | 618 | unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0; |
615 | 619 | ||
616 | /* | 620 | /* |
617 | * Currently whole state machine of group depends on first bio | 621 | * Currently whole state machine of group depends on first bio |
618 | * queued in the group bio list. So one should not be calling | 622 | * queued in the group bio list. So one should not be calling |
619 | * this function with a different bio if there are other bios | 623 | * this function with a different bio if there are other bios |
620 | * queued. | 624 | * queued. |
621 | */ | 625 | */ |
622 | BUG_ON(tg->nr_queued[rw] && bio != bio_list_peek(&tg->bio_lists[rw])); | 626 | BUG_ON(tg->nr_queued[rw] && bio != bio_list_peek(&tg->bio_lists[rw])); |
623 | 627 | ||
624 | /* If tg->bps = -1, then BW is unlimited */ | 628 | /* If tg->bps = -1, then BW is unlimited */ |
625 | if (tg->bps[rw] == -1 && tg->iops[rw] == -1) { | 629 | if (tg->bps[rw] == -1 && tg->iops[rw] == -1) { |
626 | if (wait) | 630 | if (wait) |
627 | *wait = 0; | 631 | *wait = 0; |
628 | return 1; | 632 | return 1; |
629 | } | 633 | } |
630 | 634 | ||
631 | /* | 635 | /* |
632 | * If previous slice expired, start a new one otherwise renew/extend | 636 | * If previous slice expired, start a new one otherwise renew/extend |
633 | * existing slice to make sure it is at least throtl_slice interval | 637 | * existing slice to make sure it is at least throtl_slice interval |
634 | * long since now. | 638 | * long since now. |
635 | */ | 639 | */ |
636 | if (throtl_slice_used(td, tg, rw)) | 640 | if (throtl_slice_used(td, tg, rw)) |
637 | throtl_start_new_slice(td, tg, rw); | 641 | throtl_start_new_slice(td, tg, rw); |
638 | else { | 642 | else { |
639 | if (time_before(tg->slice_end[rw], jiffies + throtl_slice)) | 643 | if (time_before(tg->slice_end[rw], jiffies + throtl_slice)) |
640 | throtl_extend_slice(td, tg, rw, jiffies + throtl_slice); | 644 | throtl_extend_slice(td, tg, rw, jiffies + throtl_slice); |
641 | } | 645 | } |
642 | 646 | ||
643 | if (tg_with_in_bps_limit(td, tg, bio, &bps_wait) | 647 | if (tg_with_in_bps_limit(td, tg, bio, &bps_wait) |
644 | && tg_with_in_iops_limit(td, tg, bio, &iops_wait)) { | 648 | && tg_with_in_iops_limit(td, tg, bio, &iops_wait)) { |
645 | if (wait) | 649 | if (wait) |
646 | *wait = 0; | 650 | *wait = 0; |
647 | return 1; | 651 | return 1; |
648 | } | 652 | } |
649 | 653 | ||
650 | max_wait = max(bps_wait, iops_wait); | 654 | max_wait = max(bps_wait, iops_wait); |
651 | 655 | ||
652 | if (wait) | 656 | if (wait) |
653 | *wait = max_wait; | 657 | *wait = max_wait; |
654 | 658 | ||
655 | if (time_before(tg->slice_end[rw], jiffies + max_wait)) | 659 | if (time_before(tg->slice_end[rw], jiffies + max_wait)) |
656 | throtl_extend_slice(td, tg, rw, jiffies + max_wait); | 660 | throtl_extend_slice(td, tg, rw, jiffies + max_wait); |
657 | 661 | ||
658 | return 0; | 662 | return 0; |
659 | } | 663 | } |
660 | 664 | ||
661 | static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, | 665 | static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, |
662 | int rw) | 666 | int rw) |
663 | { | 667 | { |
664 | struct throtl_grp *tg = blkg_to_tg(blkg); | 668 | struct throtl_grp *tg = blkg_to_tg(blkg); |
665 | struct tg_stats_cpu *stats_cpu; | 669 | struct tg_stats_cpu *stats_cpu; |
666 | unsigned long flags; | 670 | unsigned long flags; |
667 | 671 | ||
668 | /* If per cpu stats are not allocated yet, don't do any accounting. */ | 672 | /* If per cpu stats are not allocated yet, don't do any accounting. */ |
669 | if (tg->stats_cpu == NULL) | 673 | if (tg->stats_cpu == NULL) |
670 | return; | 674 | return; |
671 | 675 | ||
672 | /* | 676 | /* |
673 | * Disabling interrupts to provide mutual exclusion between two | 677 | * Disabling interrupts to provide mutual exclusion between two |
674 | * writes on same cpu. It probably is not needed for 64bit. Not | 678 | * writes on same cpu. It probably is not needed for 64bit. Not |
675 | * optimizing that case yet. | 679 | * optimizing that case yet. |
676 | */ | 680 | */ |
677 | local_irq_save(flags); | 681 | local_irq_save(flags); |
678 | 682 | ||
679 | stats_cpu = this_cpu_ptr(tg->stats_cpu); | 683 | stats_cpu = this_cpu_ptr(tg->stats_cpu); |
680 | 684 | ||
681 | blkg_rwstat_add(&stats_cpu->serviced, rw, 1); | 685 | blkg_rwstat_add(&stats_cpu->serviced, rw, 1); |
682 | blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes); | 686 | blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes); |
683 | 687 | ||
684 | local_irq_restore(flags); | 688 | local_irq_restore(flags); |
685 | } | 689 | } |
686 | 690 | ||
687 | static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) | 691 | static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) |
688 | { | 692 | { |
689 | bool rw = bio_data_dir(bio); | 693 | bool rw = bio_data_dir(bio); |
690 | 694 | ||
691 | /* Charge the bio to the group */ | 695 | /* Charge the bio to the group */ |
692 | tg->bytes_disp[rw] += bio->bi_size; | 696 | tg->bytes_disp[rw] += bio->bi_size; |
693 | tg->io_disp[rw]++; | 697 | tg->io_disp[rw]++; |
694 | 698 | ||
695 | throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, bio->bi_rw); | 699 | throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, bio->bi_rw); |
696 | } | 700 | } |
697 | 701 | ||
698 | static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg, | 702 | static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg, |
699 | struct bio *bio) | 703 | struct bio *bio) |
700 | { | 704 | { |
701 | bool rw = bio_data_dir(bio); | 705 | bool rw = bio_data_dir(bio); |
702 | 706 | ||
703 | bio_list_add(&tg->bio_lists[rw], bio); | 707 | bio_list_add(&tg->bio_lists[rw], bio); |
704 | /* Take a bio reference on tg */ | 708 | /* Take a bio reference on tg */ |
705 | blkg_get(tg_to_blkg(tg)); | 709 | blkg_get(tg_to_blkg(tg)); |
706 | tg->nr_queued[rw]++; | 710 | tg->nr_queued[rw]++; |
707 | td->nr_queued[rw]++; | 711 | td->nr_queued[rw]++; |
708 | throtl_enqueue_tg(td, tg); | 712 | throtl_enqueue_tg(td, tg); |
709 | } | 713 | } |
710 | 714 | ||
711 | static void tg_update_disptime(struct throtl_data *td, struct throtl_grp *tg) | 715 | static void tg_update_disptime(struct throtl_data *td, struct throtl_grp *tg) |
712 | { | 716 | { |
713 | unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; | 717 | unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; |
714 | struct bio *bio; | 718 | struct bio *bio; |
715 | 719 | ||
716 | if ((bio = bio_list_peek(&tg->bio_lists[READ]))) | 720 | if ((bio = bio_list_peek(&tg->bio_lists[READ]))) |
717 | tg_may_dispatch(td, tg, bio, &read_wait); | 721 | tg_may_dispatch(td, tg, bio, &read_wait); |
718 | 722 | ||
719 | if ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) | 723 | if ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) |
720 | tg_may_dispatch(td, tg, bio, &write_wait); | 724 | tg_may_dispatch(td, tg, bio, &write_wait); |
721 | 725 | ||
722 | min_wait = min(read_wait, write_wait); | 726 | min_wait = min(read_wait, write_wait); |
723 | disptime = jiffies + min_wait; | 727 | disptime = jiffies + min_wait; |
724 | 728 | ||
725 | /* Update dispatch time */ | 729 | /* Update dispatch time */ |
726 | throtl_dequeue_tg(td, tg); | 730 | throtl_dequeue_tg(td, tg); |
727 | tg->disptime = disptime; | 731 | tg->disptime = disptime; |
728 | throtl_enqueue_tg(td, tg); | 732 | throtl_enqueue_tg(td, tg); |
729 | } | 733 | } |
730 | 734 | ||
731 | static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg, | 735 | static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg, |
732 | bool rw, struct bio_list *bl) | 736 | bool rw, struct bio_list *bl) |
733 | { | 737 | { |
734 | struct bio *bio; | 738 | struct bio *bio; |
735 | 739 | ||
736 | bio = bio_list_pop(&tg->bio_lists[rw]); | 740 | bio = bio_list_pop(&tg->bio_lists[rw]); |
737 | tg->nr_queued[rw]--; | 741 | tg->nr_queued[rw]--; |
738 | /* Drop bio reference on blkg */ | 742 | /* Drop bio reference on blkg */ |
739 | blkg_put(tg_to_blkg(tg)); | 743 | blkg_put(tg_to_blkg(tg)); |
740 | 744 | ||
741 | BUG_ON(td->nr_queued[rw] <= 0); | 745 | BUG_ON(td->nr_queued[rw] <= 0); |
742 | td->nr_queued[rw]--; | 746 | td->nr_queued[rw]--; |
743 | 747 | ||
744 | throtl_charge_bio(tg, bio); | 748 | throtl_charge_bio(tg, bio); |
745 | bio_list_add(bl, bio); | 749 | bio_list_add(bl, bio); |
746 | bio->bi_rw |= REQ_THROTTLED; | 750 | bio->bi_rw |= REQ_THROTTLED; |
747 | 751 | ||
748 | throtl_trim_slice(td, tg, rw); | 752 | throtl_trim_slice(td, tg, rw); |
749 | } | 753 | } |
750 | 754 | ||
751 | static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, | 755 | static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, |
752 | struct bio_list *bl) | 756 | struct bio_list *bl) |
753 | { | 757 | { |
754 | unsigned int nr_reads = 0, nr_writes = 0; | 758 | unsigned int nr_reads = 0, nr_writes = 0; |
755 | unsigned int max_nr_reads = throtl_grp_quantum*3/4; | 759 | unsigned int max_nr_reads = throtl_grp_quantum*3/4; |
756 | unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; | 760 | unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; |
757 | struct bio *bio; | 761 | struct bio *bio; |
758 | 762 | ||
759 | /* Try to dispatch 75% READS and 25% WRITES */ | 763 | /* Try to dispatch 75% READS and 25% WRITES */ |
760 | 764 | ||
761 | while ((bio = bio_list_peek(&tg->bio_lists[READ])) | 765 | while ((bio = bio_list_peek(&tg->bio_lists[READ])) |
762 | && tg_may_dispatch(td, tg, bio, NULL)) { | 766 | && tg_may_dispatch(td, tg, bio, NULL)) { |
763 | 767 | ||
764 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl); | 768 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl); |
765 | nr_reads++; | 769 | nr_reads++; |
766 | 770 | ||
767 | if (nr_reads >= max_nr_reads) | 771 | if (nr_reads >= max_nr_reads) |
768 | break; | 772 | break; |
769 | } | 773 | } |
770 | 774 | ||
771 | while ((bio = bio_list_peek(&tg->bio_lists[WRITE])) | 775 | while ((bio = bio_list_peek(&tg->bio_lists[WRITE])) |
772 | && tg_may_dispatch(td, tg, bio, NULL)) { | 776 | && tg_may_dispatch(td, tg, bio, NULL)) { |
773 | 777 | ||
774 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl); | 778 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl); |
775 | nr_writes++; | 779 | nr_writes++; |
776 | 780 | ||
777 | if (nr_writes >= max_nr_writes) | 781 | if (nr_writes >= max_nr_writes) |
778 | break; | 782 | break; |
779 | } | 783 | } |
780 | 784 | ||
781 | return nr_reads + nr_writes; | 785 | return nr_reads + nr_writes; |
782 | } | 786 | } |
783 | 787 | ||
784 | static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl) | 788 | static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl) |
785 | { | 789 | { |
786 | unsigned int nr_disp = 0; | 790 | unsigned int nr_disp = 0; |
787 | struct throtl_grp *tg; | 791 | struct throtl_grp *tg; |
788 | struct throtl_rb_root *st = &td->tg_service_tree; | 792 | struct throtl_rb_root *st = &td->tg_service_tree; |
789 | 793 | ||
790 | while (1) { | 794 | while (1) { |
791 | tg = throtl_rb_first(st); | 795 | tg = throtl_rb_first(st); |
792 | 796 | ||
793 | if (!tg) | 797 | if (!tg) |
794 | break; | 798 | break; |
795 | 799 | ||
796 | if (time_before(jiffies, tg->disptime)) | 800 | if (time_before(jiffies, tg->disptime)) |
797 | break; | 801 | break; |
798 | 802 | ||
799 | throtl_dequeue_tg(td, tg); | 803 | throtl_dequeue_tg(td, tg); |
800 | 804 | ||
801 | nr_disp += throtl_dispatch_tg(td, tg, bl); | 805 | nr_disp += throtl_dispatch_tg(td, tg, bl); |
802 | 806 | ||
803 | if (tg->nr_queued[0] || tg->nr_queued[1]) { | 807 | if (tg->nr_queued[0] || tg->nr_queued[1]) { |
804 | tg_update_disptime(td, tg); | 808 | tg_update_disptime(td, tg); |
805 | throtl_enqueue_tg(td, tg); | 809 | throtl_enqueue_tg(td, tg); |
806 | } | 810 | } |
807 | 811 | ||
808 | if (nr_disp >= throtl_quantum) | 812 | if (nr_disp >= throtl_quantum) |
809 | break; | 813 | break; |
810 | } | 814 | } |
811 | 815 | ||
812 | return nr_disp; | 816 | return nr_disp; |
813 | } | 817 | } |
814 | 818 | ||
815 | static void throtl_process_limit_change(struct throtl_data *td) | 819 | static void throtl_process_limit_change(struct throtl_data *td) |
816 | { | 820 | { |
817 | struct request_queue *q = td->queue; | 821 | struct request_queue *q = td->queue; |
818 | struct blkio_group *blkg, *n; | 822 | struct blkio_group *blkg, *n; |
819 | 823 | ||
820 | if (!td->limits_changed) | 824 | if (!td->limits_changed) |
821 | return; | 825 | return; |
822 | 826 | ||
823 | xchg(&td->limits_changed, false); | 827 | xchg(&td->limits_changed, false); |
824 | 828 | ||
825 | throtl_log(td, "limits changed"); | 829 | throtl_log(td, "limits changed"); |
826 | 830 | ||
827 | list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { | 831 | list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { |
828 | struct throtl_grp *tg = blkg_to_tg(blkg); | 832 | struct throtl_grp *tg = blkg_to_tg(blkg); |
829 | 833 | ||
830 | if (!tg->limits_changed) | 834 | if (!tg->limits_changed) |
831 | continue; | 835 | continue; |
832 | 836 | ||
833 | if (!xchg(&tg->limits_changed, false)) | 837 | if (!xchg(&tg->limits_changed, false)) |
834 | continue; | 838 | continue; |
835 | 839 | ||
836 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" | 840 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" |
837 | " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE], | 841 | " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE], |
838 | tg->iops[READ], tg->iops[WRITE]); | 842 | tg->iops[READ], tg->iops[WRITE]); |
839 | 843 | ||
840 | /* | 844 | /* |
841 | * Restart the slices for both READ and WRITES. It | 845 | * Restart the slices for both READ and WRITES. It |
842 | * might happen that a group's limit are dropped | 846 | * might happen that a group's limit are dropped |
843 | * suddenly and we don't want to account recently | 847 | * suddenly and we don't want to account recently |
844 | * dispatched IO with new low rate | 848 | * dispatched IO with new low rate |
845 | */ | 849 | */ |
846 | throtl_start_new_slice(td, tg, 0); | 850 | throtl_start_new_slice(td, tg, 0); |
847 | throtl_start_new_slice(td, tg, 1); | 851 | throtl_start_new_slice(td, tg, 1); |
848 | 852 | ||
849 | if (throtl_tg_on_rr(tg)) | 853 | if (throtl_tg_on_rr(tg)) |
850 | tg_update_disptime(td, tg); | 854 | tg_update_disptime(td, tg); |
851 | } | 855 | } |
852 | } | 856 | } |
853 | 857 | ||
854 | /* Dispatch throttled bios. Should be called without queue lock held. */ | 858 | /* Dispatch throttled bios. Should be called without queue lock held. */ |
855 | static int throtl_dispatch(struct request_queue *q) | 859 | static int throtl_dispatch(struct request_queue *q) |
856 | { | 860 | { |
857 | struct throtl_data *td = q->td; | 861 | struct throtl_data *td = q->td; |
858 | unsigned int nr_disp = 0; | 862 | unsigned int nr_disp = 0; |
859 | struct bio_list bio_list_on_stack; | 863 | struct bio_list bio_list_on_stack; |
860 | struct bio *bio; | 864 | struct bio *bio; |
861 | struct blk_plug plug; | 865 | struct blk_plug plug; |
862 | 866 | ||
863 | spin_lock_irq(q->queue_lock); | 867 | spin_lock_irq(q->queue_lock); |
864 | 868 | ||
865 | throtl_process_limit_change(td); | 869 | throtl_process_limit_change(td); |
866 | 870 | ||
867 | if (!total_nr_queued(td)) | 871 | if (!total_nr_queued(td)) |
868 | goto out; | 872 | goto out; |
869 | 873 | ||
870 | bio_list_init(&bio_list_on_stack); | 874 | bio_list_init(&bio_list_on_stack); |
871 | 875 | ||
872 | throtl_log(td, "dispatch nr_queued=%u read=%u write=%u", | 876 | throtl_log(td, "dispatch nr_queued=%u read=%u write=%u", |
873 | total_nr_queued(td), td->nr_queued[READ], | 877 | total_nr_queued(td), td->nr_queued[READ], |
874 | td->nr_queued[WRITE]); | 878 | td->nr_queued[WRITE]); |
875 | 879 | ||
876 | nr_disp = throtl_select_dispatch(td, &bio_list_on_stack); | 880 | nr_disp = throtl_select_dispatch(td, &bio_list_on_stack); |
877 | 881 | ||
878 | if (nr_disp) | 882 | if (nr_disp) |
879 | throtl_log(td, "bios disp=%u", nr_disp); | 883 | throtl_log(td, "bios disp=%u", nr_disp); |
880 | 884 | ||
881 | throtl_schedule_next_dispatch(td); | 885 | throtl_schedule_next_dispatch(td); |
882 | out: | 886 | out: |
883 | spin_unlock_irq(q->queue_lock); | 887 | spin_unlock_irq(q->queue_lock); |
884 | 888 | ||
885 | /* | 889 | /* |
886 | * If we dispatched some requests, unplug the queue to make sure | 890 | * If we dispatched some requests, unplug the queue to make sure |
887 | * immediate dispatch | 891 | * immediate dispatch |
888 | */ | 892 | */ |
889 | if (nr_disp) { | 893 | if (nr_disp) { |
890 | blk_start_plug(&plug); | 894 | blk_start_plug(&plug); |
891 | while((bio = bio_list_pop(&bio_list_on_stack))) | 895 | while((bio = bio_list_pop(&bio_list_on_stack))) |
892 | generic_make_request(bio); | 896 | generic_make_request(bio); |
893 | blk_finish_plug(&plug); | 897 | blk_finish_plug(&plug); |
894 | } | 898 | } |
895 | return nr_disp; | 899 | return nr_disp; |
896 | } | 900 | } |
897 | 901 | ||
898 | void blk_throtl_work(struct work_struct *work) | 902 | void blk_throtl_work(struct work_struct *work) |
899 | { | 903 | { |
900 | struct throtl_data *td = container_of(work, struct throtl_data, | 904 | struct throtl_data *td = container_of(work, struct throtl_data, |
901 | throtl_work.work); | 905 | throtl_work.work); |
902 | struct request_queue *q = td->queue; | 906 | struct request_queue *q = td->queue; |
903 | 907 | ||
904 | throtl_dispatch(q); | 908 | throtl_dispatch(q); |
905 | } | 909 | } |
906 | 910 | ||
907 | /* Call with queue lock held */ | 911 | /* Call with queue lock held */ |
908 | static void | 912 | static void |
909 | throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) | 913 | throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) |
910 | { | 914 | { |
911 | 915 | ||
912 | struct delayed_work *dwork = &td->throtl_work; | 916 | struct delayed_work *dwork = &td->throtl_work; |
913 | 917 | ||
914 | /* schedule work if limits changed even if no bio is queued */ | 918 | /* schedule work if limits changed even if no bio is queued */ |
915 | if (total_nr_queued(td) || td->limits_changed) { | 919 | if (total_nr_queued(td) || td->limits_changed) { |
916 | /* | 920 | /* |
917 | * We might have a work scheduled to be executed in future. | 921 | * We might have a work scheduled to be executed in future. |
918 | * Cancel that and schedule a new one. | 922 | * Cancel that and schedule a new one. |
919 | */ | 923 | */ |
920 | __cancel_delayed_work(dwork); | 924 | __cancel_delayed_work(dwork); |
921 | queue_delayed_work(kthrotld_workqueue, dwork, delay); | 925 | queue_delayed_work(kthrotld_workqueue, dwork, delay); |
922 | throtl_log(td, "schedule work. delay=%lu jiffies=%lu", | 926 | throtl_log(td, "schedule work. delay=%lu jiffies=%lu", |
923 | delay, jiffies); | 927 | delay, jiffies); |
924 | } | 928 | } |
925 | } | 929 | } |
926 | 930 | ||
927 | static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off) | 931 | static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off) |
928 | { | 932 | { |
929 | struct throtl_grp *tg = pdata; | 933 | struct throtl_grp *tg = pdata; |
930 | struct blkg_rwstat rwstat = { }, tmp; | 934 | struct blkg_rwstat rwstat = { }, tmp; |
931 | int i, cpu; | 935 | int i, cpu; |
932 | 936 | ||
933 | for_each_possible_cpu(cpu) { | 937 | for_each_possible_cpu(cpu) { |
934 | struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu); | 938 | struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu); |
935 | 939 | ||
936 | tmp = blkg_rwstat_read((void *)sc + off); | 940 | tmp = blkg_rwstat_read((void *)sc + off); |
937 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | 941 | for (i = 0; i < BLKG_RWSTAT_NR; i++) |
938 | rwstat.cnt[i] += tmp.cnt[i]; | 942 | rwstat.cnt[i] += tmp.cnt[i]; |
939 | } | 943 | } |
940 | 944 | ||
941 | return __blkg_prfill_rwstat(sf, pdata, &rwstat); | 945 | return __blkg_prfill_rwstat(sf, pdata, &rwstat); |
942 | } | 946 | } |
943 | 947 | ||
944 | static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, | 948 | static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, |
945 | struct seq_file *sf) | 949 | struct seq_file *sf) |
946 | { | 950 | { |
947 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); | 951 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); |
948 | 952 | ||
949 | blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkio_policy_throtl, | 953 | blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkio_policy_throtl, |
950 | cft->private, true); | 954 | cft->private, true); |
951 | return 0; | 955 | return 0; |
952 | } | 956 | } |
953 | 957 | ||
954 | static u64 tg_prfill_conf_u64(struct seq_file *sf, void *pdata, int off) | 958 | static u64 tg_prfill_conf_u64(struct seq_file *sf, void *pdata, int off) |
955 | { | 959 | { |
956 | u64 v = *(u64 *)(pdata + off); | 960 | u64 v = *(u64 *)(pdata + off); |
957 | 961 | ||
958 | if (v == -1) | 962 | if (v == -1) |
959 | return 0; | 963 | return 0; |
960 | return __blkg_prfill_u64(sf, pdata, v); | 964 | return __blkg_prfill_u64(sf, pdata, v); |
961 | } | 965 | } |
962 | 966 | ||
963 | static u64 tg_prfill_conf_uint(struct seq_file *sf, void *pdata, int off) | 967 | static u64 tg_prfill_conf_uint(struct seq_file *sf, void *pdata, int off) |
964 | { | 968 | { |
965 | unsigned int v = *(unsigned int *)(pdata + off); | 969 | unsigned int v = *(unsigned int *)(pdata + off); |
966 | 970 | ||
967 | if (v == -1) | 971 | if (v == -1) |
968 | return 0; | 972 | return 0; |
969 | return __blkg_prfill_u64(sf, pdata, v); | 973 | return __blkg_prfill_u64(sf, pdata, v); |
970 | } | 974 | } |
971 | 975 | ||
972 | static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, | 976 | static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, |
973 | struct seq_file *sf) | 977 | struct seq_file *sf) |
974 | { | 978 | { |
975 | blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_u64, | 979 | blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_u64, |
976 | &blkio_policy_throtl, cft->private, false); | 980 | &blkio_policy_throtl, cft->private, false); |
977 | return 0; | 981 | return 0; |
978 | } | 982 | } |
979 | 983 | ||
980 | static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft, | 984 | static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft, |
981 | struct seq_file *sf) | 985 | struct seq_file *sf) |
982 | { | 986 | { |
983 | blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_uint, | 987 | blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_uint, |
984 | &blkio_policy_throtl, cft->private, false); | 988 | &blkio_policy_throtl, cft->private, false); |
985 | return 0; | 989 | return 0; |
986 | } | 990 | } |
987 | 991 | ||
988 | static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, | 992 | static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, |
989 | bool is_u64) | 993 | bool is_u64) |
990 | { | 994 | { |
991 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); | 995 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); |
992 | struct blkg_conf_ctx ctx; | 996 | struct blkg_conf_ctx ctx; |
993 | struct throtl_grp *tg; | 997 | struct throtl_grp *tg; |
994 | int ret; | 998 | int ret; |
995 | 999 | ||
996 | ret = blkg_conf_prep(blkcg, &blkio_policy_throtl, buf, &ctx); | 1000 | ret = blkg_conf_prep(blkcg, &blkio_policy_throtl, buf, &ctx); |
997 | if (ret) | 1001 | if (ret) |
998 | return ret; | 1002 | return ret; |
999 | 1003 | ||
1000 | ret = -EINVAL; | 1004 | ret = -EINVAL; |
1001 | tg = blkg_to_tg(ctx.blkg); | 1005 | tg = blkg_to_tg(ctx.blkg); |
1002 | if (tg) { | 1006 | if (tg) { |
1003 | struct throtl_data *td = ctx.blkg->q->td; | 1007 | struct throtl_data *td = ctx.blkg->q->td; |
1004 | 1008 | ||
1005 | if (!ctx.v) | 1009 | if (!ctx.v) |
1006 | ctx.v = -1; | 1010 | ctx.v = -1; |
1007 | 1011 | ||
1008 | if (is_u64) | 1012 | if (is_u64) |
1009 | *(u64 *)((void *)tg + cft->private) = ctx.v; | 1013 | *(u64 *)((void *)tg + cft->private) = ctx.v; |
1010 | else | 1014 | else |
1011 | *(unsigned int *)((void *)tg + cft->private) = ctx.v; | 1015 | *(unsigned int *)((void *)tg + cft->private) = ctx.v; |
1012 | 1016 | ||
1013 | /* XXX: we don't need the following deferred processing */ | 1017 | /* XXX: we don't need the following deferred processing */ |
1014 | xchg(&tg->limits_changed, true); | 1018 | xchg(&tg->limits_changed, true); |
1015 | xchg(&td->limits_changed, true); | 1019 | xchg(&td->limits_changed, true); |
1016 | throtl_schedule_delayed_work(td, 0); | 1020 | throtl_schedule_delayed_work(td, 0); |
1017 | 1021 | ||
1018 | ret = 0; | 1022 | ret = 0; |
1019 | } | 1023 | } |
1020 | 1024 | ||
1021 | blkg_conf_finish(&ctx); | 1025 | blkg_conf_finish(&ctx); |
1022 | return ret; | 1026 | return ret; |
1023 | } | 1027 | } |
1024 | 1028 | ||
1025 | static int tg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft, | 1029 | static int tg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft, |
1026 | const char *buf) | 1030 | const char *buf) |
1027 | { | 1031 | { |
1028 | return tg_set_conf(cgrp, cft, buf, true); | 1032 | return tg_set_conf(cgrp, cft, buf, true); |
1029 | } | 1033 | } |
1030 | 1034 | ||
1031 | static int tg_set_conf_uint(struct cgroup *cgrp, struct cftype *cft, | 1035 | static int tg_set_conf_uint(struct cgroup *cgrp, struct cftype *cft, |
1032 | const char *buf) | 1036 | const char *buf) |
1033 | { | 1037 | { |
1034 | return tg_set_conf(cgrp, cft, buf, false); | 1038 | return tg_set_conf(cgrp, cft, buf, false); |
1035 | } | 1039 | } |
1036 | 1040 | ||
1037 | static struct cftype throtl_files[] = { | 1041 | static struct cftype throtl_files[] = { |
1038 | { | 1042 | { |
1039 | .name = "throttle.read_bps_device", | 1043 | .name = "throttle.read_bps_device", |
1040 | .private = offsetof(struct throtl_grp, bps[READ]), | 1044 | .private = offsetof(struct throtl_grp, bps[READ]), |
1041 | .read_seq_string = tg_print_conf_u64, | 1045 | .read_seq_string = tg_print_conf_u64, |
1042 | .write_string = tg_set_conf_u64, | 1046 | .write_string = tg_set_conf_u64, |
1043 | .max_write_len = 256, | 1047 | .max_write_len = 256, |
1044 | }, | 1048 | }, |
1045 | { | 1049 | { |
1046 | .name = "throttle.write_bps_device", | 1050 | .name = "throttle.write_bps_device", |
1047 | .private = offsetof(struct throtl_grp, bps[WRITE]), | 1051 | .private = offsetof(struct throtl_grp, bps[WRITE]), |
1048 | .read_seq_string = tg_print_conf_u64, | 1052 | .read_seq_string = tg_print_conf_u64, |
1049 | .write_string = tg_set_conf_u64, | 1053 | .write_string = tg_set_conf_u64, |
1050 | .max_write_len = 256, | 1054 | .max_write_len = 256, |
1051 | }, | 1055 | }, |
1052 | { | 1056 | { |
1053 | .name = "throttle.read_iops_device", | 1057 | .name = "throttle.read_iops_device", |
1054 | .private = offsetof(struct throtl_grp, iops[READ]), | 1058 | .private = offsetof(struct throtl_grp, iops[READ]), |
1055 | .read_seq_string = tg_print_conf_uint, | 1059 | .read_seq_string = tg_print_conf_uint, |
1056 | .write_string = tg_set_conf_uint, | 1060 | .write_string = tg_set_conf_uint, |
1057 | .max_write_len = 256, | 1061 | .max_write_len = 256, |
1058 | }, | 1062 | }, |
1059 | { | 1063 | { |
1060 | .name = "throttle.write_iops_device", | 1064 | .name = "throttle.write_iops_device", |
1061 | .private = offsetof(struct throtl_grp, iops[WRITE]), | 1065 | .private = offsetof(struct throtl_grp, iops[WRITE]), |
1062 | .read_seq_string = tg_print_conf_uint, | 1066 | .read_seq_string = tg_print_conf_uint, |
1063 | .write_string = tg_set_conf_uint, | 1067 | .write_string = tg_set_conf_uint, |
1064 | .max_write_len = 256, | 1068 | .max_write_len = 256, |
1065 | }, | 1069 | }, |
1066 | { | 1070 | { |
1067 | .name = "throttle.io_service_bytes", | 1071 | .name = "throttle.io_service_bytes", |
1068 | .private = offsetof(struct tg_stats_cpu, service_bytes), | 1072 | .private = offsetof(struct tg_stats_cpu, service_bytes), |
1069 | .read_seq_string = tg_print_cpu_rwstat, | 1073 | .read_seq_string = tg_print_cpu_rwstat, |
1070 | }, | 1074 | }, |
1071 | { | 1075 | { |
1072 | .name = "throttle.io_serviced", | 1076 | .name = "throttle.io_serviced", |
1073 | .private = offsetof(struct tg_stats_cpu, serviced), | 1077 | .private = offsetof(struct tg_stats_cpu, serviced), |
1074 | .read_seq_string = tg_print_cpu_rwstat, | 1078 | .read_seq_string = tg_print_cpu_rwstat, |
1075 | }, | 1079 | }, |
1076 | { } /* terminate */ | 1080 | { } /* terminate */ |
1077 | }; | 1081 | }; |
1078 | 1082 | ||
1079 | static void throtl_shutdown_wq(struct request_queue *q) | 1083 | static void throtl_shutdown_wq(struct request_queue *q) |
1080 | { | 1084 | { |
1081 | struct throtl_data *td = q->td; | 1085 | struct throtl_data *td = q->td; |
1082 | 1086 | ||
1083 | cancel_delayed_work_sync(&td->throtl_work); | 1087 | cancel_delayed_work_sync(&td->throtl_work); |
1084 | } | 1088 | } |
1085 | 1089 | ||
1086 | static struct blkio_policy_type blkio_policy_throtl = { | 1090 | static struct blkio_policy_type blkio_policy_throtl = { |
1087 | .ops = { | 1091 | .ops = { |
1088 | .blkio_init_group_fn = throtl_init_blkio_group, | 1092 | .blkio_init_group_fn = throtl_init_blkio_group, |
1089 | .blkio_exit_group_fn = throtl_exit_blkio_group, | 1093 | .blkio_exit_group_fn = throtl_exit_blkio_group, |
1090 | .blkio_reset_group_stats_fn = throtl_reset_group_stats, | 1094 | .blkio_reset_group_stats_fn = throtl_reset_group_stats, |
1091 | }, | 1095 | }, |
1092 | .pdata_size = sizeof(struct throtl_grp), | 1096 | .pdata_size = sizeof(struct throtl_grp), |
1093 | .cftypes = throtl_files, | 1097 | .cftypes = throtl_files, |
1094 | }; | 1098 | }; |
1095 | 1099 | ||
1096 | bool blk_throtl_bio(struct request_queue *q, struct bio *bio) | 1100 | bool blk_throtl_bio(struct request_queue *q, struct bio *bio) |
1097 | { | 1101 | { |
1098 | struct throtl_data *td = q->td; | 1102 | struct throtl_data *td = q->td; |
1099 | struct throtl_grp *tg; | 1103 | struct throtl_grp *tg; |
1100 | bool rw = bio_data_dir(bio), update_disptime = true; | 1104 | bool rw = bio_data_dir(bio), update_disptime = true; |
1101 | struct blkio_cgroup *blkcg; | 1105 | struct blkio_cgroup *blkcg; |
1102 | bool throttled = false; | 1106 | bool throttled = false; |
1103 | 1107 | ||
1104 | if (bio->bi_rw & REQ_THROTTLED) { | 1108 | if (bio->bi_rw & REQ_THROTTLED) { |
1105 | bio->bi_rw &= ~REQ_THROTTLED; | 1109 | bio->bi_rw &= ~REQ_THROTTLED; |
1106 | goto out; | 1110 | goto out; |
1107 | } | 1111 | } |
1108 | 1112 | ||
1109 | /* bio_associate_current() needs ioc, try creating */ | 1113 | /* bio_associate_current() needs ioc, try creating */ |
1110 | create_io_context(GFP_ATOMIC, q->node); | 1114 | create_io_context(GFP_ATOMIC, q->node); |
1111 | 1115 | ||
1112 | /* | 1116 | /* |
1113 | * A throtl_grp pointer retrieved under rcu can be used to access | 1117 | * A throtl_grp pointer retrieved under rcu can be used to access |
1114 | * basic fields like stats and io rates. If a group has no rules, | 1118 | * basic fields like stats and io rates. If a group has no rules, |
1115 | * just update the dispatch stats in lockless manner and return. | 1119 | * just update the dispatch stats in lockless manner and return. |
1116 | */ | 1120 | */ |
1117 | rcu_read_lock(); | 1121 | rcu_read_lock(); |
1118 | blkcg = bio_blkio_cgroup(bio); | 1122 | blkcg = bio_blkio_cgroup(bio); |
1119 | tg = throtl_lookup_tg(td, blkcg); | 1123 | tg = throtl_lookup_tg(td, blkcg); |
1120 | if (tg) { | 1124 | if (tg) { |
1121 | if (tg_no_rule_group(tg, rw)) { | 1125 | if (tg_no_rule_group(tg, rw)) { |
1122 | throtl_update_dispatch_stats(tg_to_blkg(tg), | 1126 | throtl_update_dispatch_stats(tg_to_blkg(tg), |
1123 | bio->bi_size, bio->bi_rw); | 1127 | bio->bi_size, bio->bi_rw); |
1124 | goto out_unlock_rcu; | 1128 | goto out_unlock_rcu; |
1125 | } | 1129 | } |
1126 | } | 1130 | } |
1127 | 1131 | ||
1128 | /* | 1132 | /* |
1129 | * Either group has not been allocated yet or it is not an unlimited | 1133 | * Either group has not been allocated yet or it is not an unlimited |
1130 | * IO group | 1134 | * IO group |
1131 | */ | 1135 | */ |
1132 | spin_lock_irq(q->queue_lock); | 1136 | spin_lock_irq(q->queue_lock); |
1133 | tg = throtl_lookup_create_tg(td, blkcg); | 1137 | tg = throtl_lookup_create_tg(td, blkcg); |
1134 | if (unlikely(!tg)) | 1138 | if (unlikely(!tg)) |
1135 | goto out_unlock; | 1139 | goto out_unlock; |
1136 | 1140 | ||
1137 | if (tg->nr_queued[rw]) { | 1141 | if (tg->nr_queued[rw]) { |
1138 | /* | 1142 | /* |
1139 | * There is already another bio queued in same dir. No | 1143 | * There is already another bio queued in same dir. No |
1140 | * need to update dispatch time. | 1144 | * need to update dispatch time. |
1141 | */ | 1145 | */ |
1142 | update_disptime = false; | 1146 | update_disptime = false; |
1143 | goto queue_bio; | 1147 | goto queue_bio; |
1144 | 1148 | ||
1145 | } | 1149 | } |
1146 | 1150 | ||
1147 | /* Bio is with-in rate limit of group */ | 1151 | /* Bio is with-in rate limit of group */ |
1148 | if (tg_may_dispatch(td, tg, bio, NULL)) { | 1152 | if (tg_may_dispatch(td, tg, bio, NULL)) { |
1149 | throtl_charge_bio(tg, bio); | 1153 | throtl_charge_bio(tg, bio); |
1150 | 1154 | ||
1151 | /* | 1155 | /* |
1152 | * We need to trim slice even when bios are not being queued | 1156 | * We need to trim slice even when bios are not being queued |
1153 | * otherwise it might happen that a bio is not queued for | 1157 | * otherwise it might happen that a bio is not queued for |
1154 | * a long time and slice keeps on extending and trim is not | 1158 | * a long time and slice keeps on extending and trim is not |
1155 | * called for a long time. Now if limits are reduced suddenly | 1159 | * called for a long time. Now if limits are reduced suddenly |
1156 | * we take into account all the IO dispatched so far at new | 1160 | * we take into account all the IO dispatched so far at new |
1157 | * low rate and * newly queued IO gets a really long dispatch | 1161 | * low rate and * newly queued IO gets a really long dispatch |
1158 | * time. | 1162 | * time. |
1159 | * | 1163 | * |
1160 | * So keep on trimming slice even if bio is not queued. | 1164 | * So keep on trimming slice even if bio is not queued. |
1161 | */ | 1165 | */ |
1162 | throtl_trim_slice(td, tg, rw); | 1166 | throtl_trim_slice(td, tg, rw); |
1163 | goto out_unlock; | 1167 | goto out_unlock; |
1164 | } | 1168 | } |
1165 | 1169 | ||
1166 | queue_bio: | 1170 | queue_bio: |
1167 | throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu" | 1171 | throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu" |
1168 | " iodisp=%u iops=%u queued=%d/%d", | 1172 | " iodisp=%u iops=%u queued=%d/%d", |
1169 | rw == READ ? 'R' : 'W', | 1173 | rw == READ ? 'R' : 'W', |
1170 | tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], | 1174 | tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], |
1171 | tg->io_disp[rw], tg->iops[rw], | 1175 | tg->io_disp[rw], tg->iops[rw], |
1172 | tg->nr_queued[READ], tg->nr_queued[WRITE]); | 1176 | tg->nr_queued[READ], tg->nr_queued[WRITE]); |
1173 | 1177 | ||
1174 | bio_associate_current(bio); | 1178 | bio_associate_current(bio); |
1175 | throtl_add_bio_tg(q->td, tg, bio); | 1179 | throtl_add_bio_tg(q->td, tg, bio); |
1176 | throttled = true; | 1180 | throttled = true; |
1177 | 1181 | ||
1178 | if (update_disptime) { | 1182 | if (update_disptime) { |
1179 | tg_update_disptime(td, tg); | 1183 | tg_update_disptime(td, tg); |
1180 | throtl_schedule_next_dispatch(td); | 1184 | throtl_schedule_next_dispatch(td); |
1181 | } | 1185 | } |
1182 | 1186 | ||
1183 | out_unlock: | 1187 | out_unlock: |
1184 | spin_unlock_irq(q->queue_lock); | 1188 | spin_unlock_irq(q->queue_lock); |
1185 | out_unlock_rcu: | 1189 | out_unlock_rcu: |
1186 | rcu_read_unlock(); | 1190 | rcu_read_unlock(); |
1187 | out: | 1191 | out: |
1188 | return throttled; | 1192 | return throttled; |
1189 | } | 1193 | } |
1190 | 1194 | ||
1191 | /** | 1195 | /** |
1192 | * blk_throtl_drain - drain throttled bios | 1196 | * blk_throtl_drain - drain throttled bios |
1193 | * @q: request_queue to drain throttled bios for | 1197 | * @q: request_queue to drain throttled bios for |
1194 | * | 1198 | * |
1195 | * Dispatch all currently throttled bios on @q through ->make_request_fn(). | 1199 | * Dispatch all currently throttled bios on @q through ->make_request_fn(). |
1196 | */ | 1200 | */ |
1197 | void blk_throtl_drain(struct request_queue *q) | 1201 | void blk_throtl_drain(struct request_queue *q) |
1198 | __releases(q->queue_lock) __acquires(q->queue_lock) | 1202 | __releases(q->queue_lock) __acquires(q->queue_lock) |
1199 | { | 1203 | { |
1200 | struct throtl_data *td = q->td; | 1204 | struct throtl_data *td = q->td; |
1201 | struct throtl_rb_root *st = &td->tg_service_tree; | 1205 | struct throtl_rb_root *st = &td->tg_service_tree; |
1202 | struct throtl_grp *tg; | 1206 | struct throtl_grp *tg; |
1203 | struct bio_list bl; | 1207 | struct bio_list bl; |
1204 | struct bio *bio; | 1208 | struct bio *bio; |
1205 | 1209 | ||
1206 | WARN_ON_ONCE(!queue_is_locked(q)); | 1210 | WARN_ON_ONCE(!queue_is_locked(q)); |
1207 | 1211 | ||
1208 | bio_list_init(&bl); | 1212 | bio_list_init(&bl); |
1209 | 1213 | ||
1210 | while ((tg = throtl_rb_first(st))) { | 1214 | while ((tg = throtl_rb_first(st))) { |
1211 | throtl_dequeue_tg(td, tg); | 1215 | throtl_dequeue_tg(td, tg); |
1212 | 1216 | ||
1213 | while ((bio = bio_list_peek(&tg->bio_lists[READ]))) | 1217 | while ((bio = bio_list_peek(&tg->bio_lists[READ]))) |
1214 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); | 1218 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); |
1215 | while ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) | 1219 | while ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) |
1216 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); | 1220 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); |
1217 | } | 1221 | } |
1218 | spin_unlock_irq(q->queue_lock); | 1222 | spin_unlock_irq(q->queue_lock); |
1219 | 1223 | ||
1220 | while ((bio = bio_list_pop(&bl))) | 1224 | while ((bio = bio_list_pop(&bl))) |
1221 | generic_make_request(bio); | 1225 | generic_make_request(bio); |
1222 | 1226 | ||
1223 | spin_lock_irq(q->queue_lock); | 1227 | spin_lock_irq(q->queue_lock); |
1224 | } | 1228 | } |
1225 | 1229 | ||
1226 | int blk_throtl_init(struct request_queue *q) | 1230 | int blk_throtl_init(struct request_queue *q) |
1227 | { | 1231 | { |
1228 | struct throtl_data *td; | 1232 | struct throtl_data *td; |
1229 | struct blkio_group *blkg; | 1233 | struct blkio_group *blkg; |
1230 | 1234 | ||
1231 | td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); | 1235 | td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); |
1232 | if (!td) | 1236 | if (!td) |
1233 | return -ENOMEM; | 1237 | return -ENOMEM; |
1234 | 1238 | ||
1235 | td->tg_service_tree = THROTL_RB_ROOT; | 1239 | td->tg_service_tree = THROTL_RB_ROOT; |
1236 | td->limits_changed = false; | 1240 | td->limits_changed = false; |
1237 | INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); | 1241 | INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); |
1238 | 1242 | ||
1239 | q->td = td; | 1243 | q->td = td; |
1240 | td->queue = q; | 1244 | td->queue = q; |
1241 | 1245 | ||
1242 | /* alloc and init root group. */ | 1246 | /* alloc and init root group. */ |
1243 | rcu_read_lock(); | 1247 | rcu_read_lock(); |
1244 | spin_lock_irq(q->queue_lock); | 1248 | spin_lock_irq(q->queue_lock); |
1245 | 1249 | ||
1246 | blkg = blkg_lookup_create(&blkio_root_cgroup, q, true); | 1250 | blkg = blkg_lookup_create(&blkio_root_cgroup, q, true); |
1247 | if (!IS_ERR(blkg)) | 1251 | if (!IS_ERR(blkg)) |
1248 | td->root_tg = blkg_to_tg(blkg); | 1252 | q->root_blkg = blkg; |
1249 | 1253 | ||
1250 | spin_unlock_irq(q->queue_lock); | 1254 | spin_unlock_irq(q->queue_lock); |
1251 | rcu_read_unlock(); | 1255 | rcu_read_unlock(); |
1252 | 1256 | ||
1253 | if (!td->root_tg) { | 1257 | if (!q->root_blkg) { |
1254 | kfree(td); | 1258 | kfree(td); |
1255 | return -ENOMEM; | 1259 | return -ENOMEM; |
1256 | } | 1260 | } |
1257 | return 0; | 1261 | return 0; |
1258 | } | 1262 | } |
1259 | 1263 | ||
1260 | void blk_throtl_exit(struct request_queue *q) | 1264 | void blk_throtl_exit(struct request_queue *q) |
1261 | { | 1265 | { |
1262 | BUG_ON(!q->td); | 1266 | BUG_ON(!q->td); |
1263 | throtl_shutdown_wq(q); | 1267 | throtl_shutdown_wq(q); |
1264 | kfree(q->td); | 1268 | kfree(q->td); |
1265 | } | 1269 | } |
1266 | 1270 | ||
1267 | static int __init throtl_init(void) | 1271 | static int __init throtl_init(void) |
1268 | { | 1272 | { |
1269 | kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0); | 1273 | kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0); |
1270 | if (!kthrotld_workqueue) | 1274 | if (!kthrotld_workqueue) |
1271 | panic("Failed to create kthrotld\n"); | 1275 | panic("Failed to create kthrotld\n"); |
1272 | 1276 | ||
1273 | return blkio_policy_register(&blkio_policy_throtl); | 1277 | return blkio_policy_register(&blkio_policy_throtl); |
1274 | } | 1278 | } |
1275 | 1279 | ||
1276 | module_init(throtl_init); | 1280 | module_init(throtl_init); |
block/cfq-iosched.c
1 | /* | 1 | /* |
2 | * CFQ, or complete fairness queueing, disk scheduler. | 2 | * CFQ, or complete fairness queueing, disk scheduler. |
3 | * | 3 | * |
4 | * Based on ideas from a previously unfinished io | 4 | * Based on ideas from a previously unfinished io |
5 | * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. | 5 | * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. |
6 | * | 6 | * |
7 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | 7 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> |
8 | */ | 8 | */ |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <linux/blkdev.h> | 11 | #include <linux/blkdev.h> |
12 | #include <linux/elevator.h> | 12 | #include <linux/elevator.h> |
13 | #include <linux/jiffies.h> | 13 | #include <linux/jiffies.h> |
14 | #include <linux/rbtree.h> | 14 | #include <linux/rbtree.h> |
15 | #include <linux/ioprio.h> | 15 | #include <linux/ioprio.h> |
16 | #include <linux/blktrace_api.h> | 16 | #include <linux/blktrace_api.h> |
17 | #include "blk.h" | 17 | #include "blk.h" |
18 | #include "blk-cgroup.h" | 18 | #include "blk-cgroup.h" |
19 | 19 | ||
20 | static struct blkio_policy_type blkio_policy_cfq __maybe_unused; | 20 | static struct blkio_policy_type blkio_policy_cfq __maybe_unused; |
21 | 21 | ||
22 | /* | 22 | /* |
23 | * tunables | 23 | * tunables |
24 | */ | 24 | */ |
25 | /* max queue in one round of service */ | 25 | /* max queue in one round of service */ |
26 | static const int cfq_quantum = 8; | 26 | static const int cfq_quantum = 8; |
27 | static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; | 27 | static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; |
28 | /* maximum backwards seek, in KiB */ | 28 | /* maximum backwards seek, in KiB */ |
29 | static const int cfq_back_max = 16 * 1024; | 29 | static const int cfq_back_max = 16 * 1024; |
30 | /* penalty of a backwards seek */ | 30 | /* penalty of a backwards seek */ |
31 | static const int cfq_back_penalty = 2; | 31 | static const int cfq_back_penalty = 2; |
32 | static const int cfq_slice_sync = HZ / 10; | 32 | static const int cfq_slice_sync = HZ / 10; |
33 | static int cfq_slice_async = HZ / 25; | 33 | static int cfq_slice_async = HZ / 25; |
34 | static const int cfq_slice_async_rq = 2; | 34 | static const int cfq_slice_async_rq = 2; |
35 | static int cfq_slice_idle = HZ / 125; | 35 | static int cfq_slice_idle = HZ / 125; |
36 | static int cfq_group_idle = HZ / 125; | 36 | static int cfq_group_idle = HZ / 125; |
37 | static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ | 37 | static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ |
38 | static const int cfq_hist_divisor = 4; | 38 | static const int cfq_hist_divisor = 4; |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * offset from end of service tree | 41 | * offset from end of service tree |
42 | */ | 42 | */ |
43 | #define CFQ_IDLE_DELAY (HZ / 5) | 43 | #define CFQ_IDLE_DELAY (HZ / 5) |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * below this threshold, we consider thinktime immediate | 46 | * below this threshold, we consider thinktime immediate |
47 | */ | 47 | */ |
48 | #define CFQ_MIN_TT (2) | 48 | #define CFQ_MIN_TT (2) |
49 | 49 | ||
50 | #define CFQ_SLICE_SCALE (5) | 50 | #define CFQ_SLICE_SCALE (5) |
51 | #define CFQ_HW_QUEUE_MIN (5) | 51 | #define CFQ_HW_QUEUE_MIN (5) |
52 | #define CFQ_SERVICE_SHIFT 12 | 52 | #define CFQ_SERVICE_SHIFT 12 |
53 | 53 | ||
54 | #define CFQQ_SEEK_THR (sector_t)(8 * 100) | 54 | #define CFQQ_SEEK_THR (sector_t)(8 * 100) |
55 | #define CFQQ_CLOSE_THR (sector_t)(8 * 1024) | 55 | #define CFQQ_CLOSE_THR (sector_t)(8 * 1024) |
56 | #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) | 56 | #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) |
57 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) | 57 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) |
58 | 58 | ||
59 | #define RQ_CIC(rq) icq_to_cic((rq)->elv.icq) | 59 | #define RQ_CIC(rq) icq_to_cic((rq)->elv.icq) |
60 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0]) | 60 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0]) |
61 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1]) | 61 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1]) |
62 | 62 | ||
63 | static struct kmem_cache *cfq_pool; | 63 | static struct kmem_cache *cfq_pool; |
64 | 64 | ||
65 | #define CFQ_PRIO_LISTS IOPRIO_BE_NR | 65 | #define CFQ_PRIO_LISTS IOPRIO_BE_NR |
66 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) | 66 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) |
67 | #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) | 67 | #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) |
68 | 68 | ||
69 | #define sample_valid(samples) ((samples) > 80) | 69 | #define sample_valid(samples) ((samples) > 80) |
70 | #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) | 70 | #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) |
71 | 71 | ||
72 | struct cfq_ttime { | 72 | struct cfq_ttime { |
73 | unsigned long last_end_request; | 73 | unsigned long last_end_request; |
74 | 74 | ||
75 | unsigned long ttime_total; | 75 | unsigned long ttime_total; |
76 | unsigned long ttime_samples; | 76 | unsigned long ttime_samples; |
77 | unsigned long ttime_mean; | 77 | unsigned long ttime_mean; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Most of our rbtree usage is for sorting with min extraction, so | 81 | * Most of our rbtree usage is for sorting with min extraction, so |
82 | * if we cache the leftmost node we don't have to walk down the tree | 82 | * if we cache the leftmost node we don't have to walk down the tree |
83 | * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should | 83 | * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should |
84 | * move this into the elevator for the rq sorting as well. | 84 | * move this into the elevator for the rq sorting as well. |
85 | */ | 85 | */ |
86 | struct cfq_rb_root { | 86 | struct cfq_rb_root { |
87 | struct rb_root rb; | 87 | struct rb_root rb; |
88 | struct rb_node *left; | 88 | struct rb_node *left; |
89 | unsigned count; | 89 | unsigned count; |
90 | unsigned total_weight; | 90 | unsigned total_weight; |
91 | u64 min_vdisktime; | 91 | u64 min_vdisktime; |
92 | struct cfq_ttime ttime; | 92 | struct cfq_ttime ttime; |
93 | }; | 93 | }; |
94 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ | 94 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ |
95 | .ttime = {.last_end_request = jiffies,},} | 95 | .ttime = {.last_end_request = jiffies,},} |
96 | 96 | ||
97 | /* | 97 | /* |
98 | * Per process-grouping structure | 98 | * Per process-grouping structure |
99 | */ | 99 | */ |
100 | struct cfq_queue { | 100 | struct cfq_queue { |
101 | /* reference count */ | 101 | /* reference count */ |
102 | int ref; | 102 | int ref; |
103 | /* various state flags, see below */ | 103 | /* various state flags, see below */ |
104 | unsigned int flags; | 104 | unsigned int flags; |
105 | /* parent cfq_data */ | 105 | /* parent cfq_data */ |
106 | struct cfq_data *cfqd; | 106 | struct cfq_data *cfqd; |
107 | /* service_tree member */ | 107 | /* service_tree member */ |
108 | struct rb_node rb_node; | 108 | struct rb_node rb_node; |
109 | /* service_tree key */ | 109 | /* service_tree key */ |
110 | unsigned long rb_key; | 110 | unsigned long rb_key; |
111 | /* prio tree member */ | 111 | /* prio tree member */ |
112 | struct rb_node p_node; | 112 | struct rb_node p_node; |
113 | /* prio tree root we belong to, if any */ | 113 | /* prio tree root we belong to, if any */ |
114 | struct rb_root *p_root; | 114 | struct rb_root *p_root; |
115 | /* sorted list of pending requests */ | 115 | /* sorted list of pending requests */ |
116 | struct rb_root sort_list; | 116 | struct rb_root sort_list; |
117 | /* if fifo isn't expired, next request to serve */ | 117 | /* if fifo isn't expired, next request to serve */ |
118 | struct request *next_rq; | 118 | struct request *next_rq; |
119 | /* requests queued in sort_list */ | 119 | /* requests queued in sort_list */ |
120 | int queued[2]; | 120 | int queued[2]; |
121 | /* currently allocated requests */ | 121 | /* currently allocated requests */ |
122 | int allocated[2]; | 122 | int allocated[2]; |
123 | /* fifo list of requests in sort_list */ | 123 | /* fifo list of requests in sort_list */ |
124 | struct list_head fifo; | 124 | struct list_head fifo; |
125 | 125 | ||
126 | /* time when queue got scheduled in to dispatch first request. */ | 126 | /* time when queue got scheduled in to dispatch first request. */ |
127 | unsigned long dispatch_start; | 127 | unsigned long dispatch_start; |
128 | unsigned int allocated_slice; | 128 | unsigned int allocated_slice; |
129 | unsigned int slice_dispatch; | 129 | unsigned int slice_dispatch; |
130 | /* time when first request from queue completed and slice started. */ | 130 | /* time when first request from queue completed and slice started. */ |
131 | unsigned long slice_start; | 131 | unsigned long slice_start; |
132 | unsigned long slice_end; | 132 | unsigned long slice_end; |
133 | long slice_resid; | 133 | long slice_resid; |
134 | 134 | ||
135 | /* pending priority requests */ | 135 | /* pending priority requests */ |
136 | int prio_pending; | 136 | int prio_pending; |
137 | /* number of requests that are on the dispatch list or inside driver */ | 137 | /* number of requests that are on the dispatch list or inside driver */ |
138 | int dispatched; | 138 | int dispatched; |
139 | 139 | ||
140 | /* io prio of this group */ | 140 | /* io prio of this group */ |
141 | unsigned short ioprio, org_ioprio; | 141 | unsigned short ioprio, org_ioprio; |
142 | unsigned short ioprio_class; | 142 | unsigned short ioprio_class; |
143 | 143 | ||
144 | pid_t pid; | 144 | pid_t pid; |
145 | 145 | ||
146 | u32 seek_history; | 146 | u32 seek_history; |
147 | sector_t last_request_pos; | 147 | sector_t last_request_pos; |
148 | 148 | ||
149 | struct cfq_rb_root *service_tree; | 149 | struct cfq_rb_root *service_tree; |
150 | struct cfq_queue *new_cfqq; | 150 | struct cfq_queue *new_cfqq; |
151 | struct cfq_group *cfqg; | 151 | struct cfq_group *cfqg; |
152 | /* Number of sectors dispatched from queue in single dispatch round */ | 152 | /* Number of sectors dispatched from queue in single dispatch round */ |
153 | unsigned long nr_sectors; | 153 | unsigned long nr_sectors; |
154 | }; | 154 | }; |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * First index in the service_trees. | 157 | * First index in the service_trees. |
158 | * IDLE is handled separately, so it has negative index | 158 | * IDLE is handled separately, so it has negative index |
159 | */ | 159 | */ |
160 | enum wl_prio_t { | 160 | enum wl_prio_t { |
161 | BE_WORKLOAD = 0, | 161 | BE_WORKLOAD = 0, |
162 | RT_WORKLOAD = 1, | 162 | RT_WORKLOAD = 1, |
163 | IDLE_WORKLOAD = 2, | 163 | IDLE_WORKLOAD = 2, |
164 | CFQ_PRIO_NR, | 164 | CFQ_PRIO_NR, |
165 | }; | 165 | }; |
166 | 166 | ||
167 | /* | 167 | /* |
168 | * Second index in the service_trees. | 168 | * Second index in the service_trees. |
169 | */ | 169 | */ |
170 | enum wl_type_t { | 170 | enum wl_type_t { |
171 | ASYNC_WORKLOAD = 0, | 171 | ASYNC_WORKLOAD = 0, |
172 | SYNC_NOIDLE_WORKLOAD = 1, | 172 | SYNC_NOIDLE_WORKLOAD = 1, |
173 | SYNC_WORKLOAD = 2 | 173 | SYNC_WORKLOAD = 2 |
174 | }; | 174 | }; |
175 | 175 | ||
176 | struct cfqg_stats { | 176 | struct cfqg_stats { |
177 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 177 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
178 | /* total bytes transferred */ | 178 | /* total bytes transferred */ |
179 | struct blkg_rwstat service_bytes; | 179 | struct blkg_rwstat service_bytes; |
180 | /* total IOs serviced, post merge */ | 180 | /* total IOs serviced, post merge */ |
181 | struct blkg_rwstat serviced; | 181 | struct blkg_rwstat serviced; |
182 | /* number of ios merged */ | 182 | /* number of ios merged */ |
183 | struct blkg_rwstat merged; | 183 | struct blkg_rwstat merged; |
184 | /* total time spent on device in ns, may not be accurate w/ queueing */ | 184 | /* total time spent on device in ns, may not be accurate w/ queueing */ |
185 | struct blkg_rwstat service_time; | 185 | struct blkg_rwstat service_time; |
186 | /* total time spent waiting in scheduler queue in ns */ | 186 | /* total time spent waiting in scheduler queue in ns */ |
187 | struct blkg_rwstat wait_time; | 187 | struct blkg_rwstat wait_time; |
188 | /* number of IOs queued up */ | 188 | /* number of IOs queued up */ |
189 | struct blkg_rwstat queued; | 189 | struct blkg_rwstat queued; |
190 | /* total sectors transferred */ | 190 | /* total sectors transferred */ |
191 | struct blkg_stat sectors; | 191 | struct blkg_stat sectors; |
192 | /* total disk time and nr sectors dispatched by this group */ | 192 | /* total disk time and nr sectors dispatched by this group */ |
193 | struct blkg_stat time; | 193 | struct blkg_stat time; |
194 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 194 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
195 | /* time not charged to this cgroup */ | 195 | /* time not charged to this cgroup */ |
196 | struct blkg_stat unaccounted_time; | 196 | struct blkg_stat unaccounted_time; |
197 | /* sum of number of ios queued across all samples */ | 197 | /* sum of number of ios queued across all samples */ |
198 | struct blkg_stat avg_queue_size_sum; | 198 | struct blkg_stat avg_queue_size_sum; |
199 | /* count of samples taken for average */ | 199 | /* count of samples taken for average */ |
200 | struct blkg_stat avg_queue_size_samples; | 200 | struct blkg_stat avg_queue_size_samples; |
201 | /* how many times this group has been removed from service tree */ | 201 | /* how many times this group has been removed from service tree */ |
202 | struct blkg_stat dequeue; | 202 | struct blkg_stat dequeue; |
203 | /* total time spent waiting for it to be assigned a timeslice. */ | 203 | /* total time spent waiting for it to be assigned a timeslice. */ |
204 | struct blkg_stat group_wait_time; | 204 | struct blkg_stat group_wait_time; |
205 | /* time spent idling for this blkio_group */ | 205 | /* time spent idling for this blkio_group */ |
206 | struct blkg_stat idle_time; | 206 | struct blkg_stat idle_time; |
207 | /* total time with empty current active q with other requests queued */ | 207 | /* total time with empty current active q with other requests queued */ |
208 | struct blkg_stat empty_time; | 208 | struct blkg_stat empty_time; |
209 | /* fields after this shouldn't be cleared on stat reset */ | 209 | /* fields after this shouldn't be cleared on stat reset */ |
210 | uint64_t start_group_wait_time; | 210 | uint64_t start_group_wait_time; |
211 | uint64_t start_idle_time; | 211 | uint64_t start_idle_time; |
212 | uint64_t start_empty_time; | 212 | uint64_t start_empty_time; |
213 | uint16_t flags; | 213 | uint16_t flags; |
214 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ | 214 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ |
215 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ | 215 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ |
216 | }; | 216 | }; |
217 | 217 | ||
218 | /* This is per cgroup per device grouping structure */ | 218 | /* This is per cgroup per device grouping structure */ |
219 | struct cfq_group { | 219 | struct cfq_group { |
220 | /* group service_tree member */ | 220 | /* group service_tree member */ |
221 | struct rb_node rb_node; | 221 | struct rb_node rb_node; |
222 | 222 | ||
223 | /* group service_tree key */ | 223 | /* group service_tree key */ |
224 | u64 vdisktime; | 224 | u64 vdisktime; |
225 | unsigned int weight; | 225 | unsigned int weight; |
226 | unsigned int new_weight; | 226 | unsigned int new_weight; |
227 | unsigned int dev_weight; | 227 | unsigned int dev_weight; |
228 | 228 | ||
229 | /* number of cfqq currently on this group */ | 229 | /* number of cfqq currently on this group */ |
230 | int nr_cfqq; | 230 | int nr_cfqq; |
231 | 231 | ||
232 | /* | 232 | /* |
233 | * Per group busy queues average. Useful for workload slice calc. We | 233 | * Per group busy queues average. Useful for workload slice calc. We |
234 | * create the array for each prio class but at run time it is used | 234 | * create the array for each prio class but at run time it is used |
235 | * only for RT and BE class and slot for IDLE class remains unused. | 235 | * only for RT and BE class and slot for IDLE class remains unused. |
236 | * This is primarily done to avoid confusion and a gcc warning. | 236 | * This is primarily done to avoid confusion and a gcc warning. |
237 | */ | 237 | */ |
238 | unsigned int busy_queues_avg[CFQ_PRIO_NR]; | 238 | unsigned int busy_queues_avg[CFQ_PRIO_NR]; |
239 | /* | 239 | /* |
240 | * rr lists of queues with requests. We maintain service trees for | 240 | * rr lists of queues with requests. We maintain service trees for |
241 | * RT and BE classes. These trees are subdivided in subclasses | 241 | * RT and BE classes. These trees are subdivided in subclasses |
242 | * of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE | 242 | * of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE |
243 | * class there is no subclassification and all the cfq queues go on | 243 | * class there is no subclassification and all the cfq queues go on |
244 | * a single tree service_tree_idle. | 244 | * a single tree service_tree_idle. |
245 | * Counts are embedded in the cfq_rb_root | 245 | * Counts are embedded in the cfq_rb_root |
246 | */ | 246 | */ |
247 | struct cfq_rb_root service_trees[2][3]; | 247 | struct cfq_rb_root service_trees[2][3]; |
248 | struct cfq_rb_root service_tree_idle; | 248 | struct cfq_rb_root service_tree_idle; |
249 | 249 | ||
250 | unsigned long saved_workload_slice; | 250 | unsigned long saved_workload_slice; |
251 | enum wl_type_t saved_workload; | 251 | enum wl_type_t saved_workload; |
252 | enum wl_prio_t saved_serving_prio; | 252 | enum wl_prio_t saved_serving_prio; |
253 | 253 | ||
254 | /* number of requests that are on the dispatch list or inside driver */ | 254 | /* number of requests that are on the dispatch list or inside driver */ |
255 | int dispatched; | 255 | int dispatched; |
256 | struct cfq_ttime ttime; | 256 | struct cfq_ttime ttime; |
257 | struct cfqg_stats stats; | 257 | struct cfqg_stats stats; |
258 | }; | 258 | }; |
259 | 259 | ||
260 | struct cfq_io_cq { | 260 | struct cfq_io_cq { |
261 | struct io_cq icq; /* must be the first member */ | 261 | struct io_cq icq; /* must be the first member */ |
262 | struct cfq_queue *cfqq[2]; | 262 | struct cfq_queue *cfqq[2]; |
263 | struct cfq_ttime ttime; | 263 | struct cfq_ttime ttime; |
264 | int ioprio; /* the current ioprio */ | 264 | int ioprio; /* the current ioprio */ |
265 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 265 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
266 | uint64_t blkcg_id; /* the current blkcg ID */ | 266 | uint64_t blkcg_id; /* the current blkcg ID */ |
267 | #endif | 267 | #endif |
268 | }; | 268 | }; |
269 | 269 | ||
270 | /* | 270 | /* |
271 | * Per block device queue structure | 271 | * Per block device queue structure |
272 | */ | 272 | */ |
273 | struct cfq_data { | 273 | struct cfq_data { |
274 | struct request_queue *queue; | 274 | struct request_queue *queue; |
275 | /* Root service tree for cfq_groups */ | 275 | /* Root service tree for cfq_groups */ |
276 | struct cfq_rb_root grp_service_tree; | 276 | struct cfq_rb_root grp_service_tree; |
277 | struct cfq_group *root_group; | 277 | struct cfq_group *root_group; |
278 | 278 | ||
279 | /* | 279 | /* |
280 | * The priority currently being served | 280 | * The priority currently being served |
281 | */ | 281 | */ |
282 | enum wl_prio_t serving_prio; | 282 | enum wl_prio_t serving_prio; |
283 | enum wl_type_t serving_type; | 283 | enum wl_type_t serving_type; |
284 | unsigned long workload_expires; | 284 | unsigned long workload_expires; |
285 | struct cfq_group *serving_group; | 285 | struct cfq_group *serving_group; |
286 | 286 | ||
287 | /* | 287 | /* |
288 | * Each priority tree is sorted by next_request position. These | 288 | * Each priority tree is sorted by next_request position. These |
289 | * trees are used when determining if two or more queues are | 289 | * trees are used when determining if two or more queues are |
290 | * interleaving requests (see cfq_close_cooperator). | 290 | * interleaving requests (see cfq_close_cooperator). |
291 | */ | 291 | */ |
292 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; | 292 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; |
293 | 293 | ||
294 | unsigned int busy_queues; | 294 | unsigned int busy_queues; |
295 | unsigned int busy_sync_queues; | 295 | unsigned int busy_sync_queues; |
296 | 296 | ||
297 | int rq_in_driver; | 297 | int rq_in_driver; |
298 | int rq_in_flight[2]; | 298 | int rq_in_flight[2]; |
299 | 299 | ||
300 | /* | 300 | /* |
301 | * queue-depth detection | 301 | * queue-depth detection |
302 | */ | 302 | */ |
303 | int rq_queued; | 303 | int rq_queued; |
304 | int hw_tag; | 304 | int hw_tag; |
305 | /* | 305 | /* |
306 | * hw_tag can be | 306 | * hw_tag can be |
307 | * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection) | 307 | * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection) |
308 | * 1 => NCQ is present (hw_tag_est_depth is the estimated max depth) | 308 | * 1 => NCQ is present (hw_tag_est_depth is the estimated max depth) |
309 | * 0 => no NCQ | 309 | * 0 => no NCQ |
310 | */ | 310 | */ |
311 | int hw_tag_est_depth; | 311 | int hw_tag_est_depth; |
312 | unsigned int hw_tag_samples; | 312 | unsigned int hw_tag_samples; |
313 | 313 | ||
314 | /* | 314 | /* |
315 | * idle window management | 315 | * idle window management |
316 | */ | 316 | */ |
317 | struct timer_list idle_slice_timer; | 317 | struct timer_list idle_slice_timer; |
318 | struct work_struct unplug_work; | 318 | struct work_struct unplug_work; |
319 | 319 | ||
320 | struct cfq_queue *active_queue; | 320 | struct cfq_queue *active_queue; |
321 | struct cfq_io_cq *active_cic; | 321 | struct cfq_io_cq *active_cic; |
322 | 322 | ||
323 | /* | 323 | /* |
324 | * async queue for each priority case | 324 | * async queue for each priority case |
325 | */ | 325 | */ |
326 | struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; | 326 | struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; |
327 | struct cfq_queue *async_idle_cfqq; | 327 | struct cfq_queue *async_idle_cfqq; |
328 | 328 | ||
329 | sector_t last_position; | 329 | sector_t last_position; |
330 | 330 | ||
331 | /* | 331 | /* |
332 | * tunables, see top of file | 332 | * tunables, see top of file |
333 | */ | 333 | */ |
334 | unsigned int cfq_quantum; | 334 | unsigned int cfq_quantum; |
335 | unsigned int cfq_fifo_expire[2]; | 335 | unsigned int cfq_fifo_expire[2]; |
336 | unsigned int cfq_back_penalty; | 336 | unsigned int cfq_back_penalty; |
337 | unsigned int cfq_back_max; | 337 | unsigned int cfq_back_max; |
338 | unsigned int cfq_slice[2]; | 338 | unsigned int cfq_slice[2]; |
339 | unsigned int cfq_slice_async_rq; | 339 | unsigned int cfq_slice_async_rq; |
340 | unsigned int cfq_slice_idle; | 340 | unsigned int cfq_slice_idle; |
341 | unsigned int cfq_group_idle; | 341 | unsigned int cfq_group_idle; |
342 | unsigned int cfq_latency; | 342 | unsigned int cfq_latency; |
343 | 343 | ||
344 | /* | 344 | /* |
345 | * Fallback dummy cfqq for extreme OOM conditions | 345 | * Fallback dummy cfqq for extreme OOM conditions |
346 | */ | 346 | */ |
347 | struct cfq_queue oom_cfqq; | 347 | struct cfq_queue oom_cfqq; |
348 | 348 | ||
349 | unsigned long last_delayed_sync; | 349 | unsigned long last_delayed_sync; |
350 | }; | 350 | }; |
351 | 351 | ||
352 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); | 352 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); |
353 | 353 | ||
354 | static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, | 354 | static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, |
355 | enum wl_prio_t prio, | 355 | enum wl_prio_t prio, |
356 | enum wl_type_t type) | 356 | enum wl_type_t type) |
357 | { | 357 | { |
358 | if (!cfqg) | 358 | if (!cfqg) |
359 | return NULL; | 359 | return NULL; |
360 | 360 | ||
361 | if (prio == IDLE_WORKLOAD) | 361 | if (prio == IDLE_WORKLOAD) |
362 | return &cfqg->service_tree_idle; | 362 | return &cfqg->service_tree_idle; |
363 | 363 | ||
364 | return &cfqg->service_trees[prio][type]; | 364 | return &cfqg->service_trees[prio][type]; |
365 | } | 365 | } |
366 | 366 | ||
367 | enum cfqq_state_flags { | 367 | enum cfqq_state_flags { |
368 | CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ | 368 | CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ |
369 | CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ | 369 | CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ |
370 | CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */ | 370 | CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */ |
371 | CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ | 371 | CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ |
372 | CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ | 372 | CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ |
373 | CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ | 373 | CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ |
374 | CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ | 374 | CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ |
375 | CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ | 375 | CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ |
376 | CFQ_CFQQ_FLAG_sync, /* synchronous queue */ | 376 | CFQ_CFQQ_FLAG_sync, /* synchronous queue */ |
377 | CFQ_CFQQ_FLAG_coop, /* cfqq is shared */ | 377 | CFQ_CFQQ_FLAG_coop, /* cfqq is shared */ |
378 | CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */ | 378 | CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */ |
379 | CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */ | 379 | CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */ |
380 | CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */ | 380 | CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */ |
381 | }; | 381 | }; |
382 | 382 | ||
383 | #define CFQ_CFQQ_FNS(name) \ | 383 | #define CFQ_CFQQ_FNS(name) \ |
384 | static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ | 384 | static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ |
385 | { \ | 385 | { \ |
386 | (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ | 386 | (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ |
387 | } \ | 387 | } \ |
388 | static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ | 388 | static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ |
389 | { \ | 389 | { \ |
390 | (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ | 390 | (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ |
391 | } \ | 391 | } \ |
392 | static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ | 392 | static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ |
393 | { \ | 393 | { \ |
394 | return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ | 394 | return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ |
395 | } | 395 | } |
396 | 396 | ||
397 | CFQ_CFQQ_FNS(on_rr); | 397 | CFQ_CFQQ_FNS(on_rr); |
398 | CFQ_CFQQ_FNS(wait_request); | 398 | CFQ_CFQQ_FNS(wait_request); |
399 | CFQ_CFQQ_FNS(must_dispatch); | 399 | CFQ_CFQQ_FNS(must_dispatch); |
400 | CFQ_CFQQ_FNS(must_alloc_slice); | 400 | CFQ_CFQQ_FNS(must_alloc_slice); |
401 | CFQ_CFQQ_FNS(fifo_expire); | 401 | CFQ_CFQQ_FNS(fifo_expire); |
402 | CFQ_CFQQ_FNS(idle_window); | 402 | CFQ_CFQQ_FNS(idle_window); |
403 | CFQ_CFQQ_FNS(prio_changed); | 403 | CFQ_CFQQ_FNS(prio_changed); |
404 | CFQ_CFQQ_FNS(slice_new); | 404 | CFQ_CFQQ_FNS(slice_new); |
405 | CFQ_CFQQ_FNS(sync); | 405 | CFQ_CFQQ_FNS(sync); |
406 | CFQ_CFQQ_FNS(coop); | 406 | CFQ_CFQQ_FNS(coop); |
407 | CFQ_CFQQ_FNS(split_coop); | 407 | CFQ_CFQQ_FNS(split_coop); |
408 | CFQ_CFQQ_FNS(deep); | 408 | CFQ_CFQQ_FNS(deep); |
409 | CFQ_CFQQ_FNS(wait_busy); | 409 | CFQ_CFQQ_FNS(wait_busy); |
410 | #undef CFQ_CFQQ_FNS | 410 | #undef CFQ_CFQQ_FNS |
411 | 411 | ||
412 | #if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) | 412 | #if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) |
413 | 413 | ||
414 | /* cfqg stats flags */ | 414 | /* cfqg stats flags */ |
415 | enum cfqg_stats_flags { | 415 | enum cfqg_stats_flags { |
416 | CFQG_stats_waiting = 0, | 416 | CFQG_stats_waiting = 0, |
417 | CFQG_stats_idling, | 417 | CFQG_stats_idling, |
418 | CFQG_stats_empty, | 418 | CFQG_stats_empty, |
419 | }; | 419 | }; |
420 | 420 | ||
421 | #define CFQG_FLAG_FNS(name) \ | 421 | #define CFQG_FLAG_FNS(name) \ |
422 | static inline void cfqg_stats_mark_##name(struct cfqg_stats *stats) \ | 422 | static inline void cfqg_stats_mark_##name(struct cfqg_stats *stats) \ |
423 | { \ | 423 | { \ |
424 | stats->flags |= (1 << CFQG_stats_##name); \ | 424 | stats->flags |= (1 << CFQG_stats_##name); \ |
425 | } \ | 425 | } \ |
426 | static inline void cfqg_stats_clear_##name(struct cfqg_stats *stats) \ | 426 | static inline void cfqg_stats_clear_##name(struct cfqg_stats *stats) \ |
427 | { \ | 427 | { \ |
428 | stats->flags &= ~(1 << CFQG_stats_##name); \ | 428 | stats->flags &= ~(1 << CFQG_stats_##name); \ |
429 | } \ | 429 | } \ |
430 | static inline int cfqg_stats_##name(struct cfqg_stats *stats) \ | 430 | static inline int cfqg_stats_##name(struct cfqg_stats *stats) \ |
431 | { \ | 431 | { \ |
432 | return (stats->flags & (1 << CFQG_stats_##name)) != 0; \ | 432 | return (stats->flags & (1 << CFQG_stats_##name)) != 0; \ |
433 | } \ | 433 | } \ |
434 | 434 | ||
435 | CFQG_FLAG_FNS(waiting) | 435 | CFQG_FLAG_FNS(waiting) |
436 | CFQG_FLAG_FNS(idling) | 436 | CFQG_FLAG_FNS(idling) |
437 | CFQG_FLAG_FNS(empty) | 437 | CFQG_FLAG_FNS(empty) |
438 | #undef CFQG_FLAG_FNS | 438 | #undef CFQG_FLAG_FNS |
439 | 439 | ||
440 | /* This should be called with the queue_lock held. */ | 440 | /* This should be called with the queue_lock held. */ |
441 | static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats) | 441 | static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats) |
442 | { | 442 | { |
443 | unsigned long long now; | 443 | unsigned long long now; |
444 | 444 | ||
445 | if (!cfqg_stats_waiting(stats)) | 445 | if (!cfqg_stats_waiting(stats)) |
446 | return; | 446 | return; |
447 | 447 | ||
448 | now = sched_clock(); | 448 | now = sched_clock(); |
449 | if (time_after64(now, stats->start_group_wait_time)) | 449 | if (time_after64(now, stats->start_group_wait_time)) |
450 | blkg_stat_add(&stats->group_wait_time, | 450 | blkg_stat_add(&stats->group_wait_time, |
451 | now - stats->start_group_wait_time); | 451 | now - stats->start_group_wait_time); |
452 | cfqg_stats_clear_waiting(stats); | 452 | cfqg_stats_clear_waiting(stats); |
453 | } | 453 | } |
454 | 454 | ||
455 | /* This should be called with the queue_lock held. */ | 455 | /* This should be called with the queue_lock held. */ |
456 | static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg, | 456 | static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg, |
457 | struct cfq_group *curr_cfqg) | 457 | struct cfq_group *curr_cfqg) |
458 | { | 458 | { |
459 | struct cfqg_stats *stats = &cfqg->stats; | 459 | struct cfqg_stats *stats = &cfqg->stats; |
460 | 460 | ||
461 | if (cfqg_stats_waiting(stats)) | 461 | if (cfqg_stats_waiting(stats)) |
462 | return; | 462 | return; |
463 | if (cfqg == curr_cfqg) | 463 | if (cfqg == curr_cfqg) |
464 | return; | 464 | return; |
465 | stats->start_group_wait_time = sched_clock(); | 465 | stats->start_group_wait_time = sched_clock(); |
466 | cfqg_stats_mark_waiting(stats); | 466 | cfqg_stats_mark_waiting(stats); |
467 | } | 467 | } |
468 | 468 | ||
469 | /* This should be called with the queue_lock held. */ | 469 | /* This should be called with the queue_lock held. */ |
470 | static void cfqg_stats_end_empty_time(struct cfqg_stats *stats) | 470 | static void cfqg_stats_end_empty_time(struct cfqg_stats *stats) |
471 | { | 471 | { |
472 | unsigned long long now; | 472 | unsigned long long now; |
473 | 473 | ||
474 | if (!cfqg_stats_empty(stats)) | 474 | if (!cfqg_stats_empty(stats)) |
475 | return; | 475 | return; |
476 | 476 | ||
477 | now = sched_clock(); | 477 | now = sched_clock(); |
478 | if (time_after64(now, stats->start_empty_time)) | 478 | if (time_after64(now, stats->start_empty_time)) |
479 | blkg_stat_add(&stats->empty_time, | 479 | blkg_stat_add(&stats->empty_time, |
480 | now - stats->start_empty_time); | 480 | now - stats->start_empty_time); |
481 | cfqg_stats_clear_empty(stats); | 481 | cfqg_stats_clear_empty(stats); |
482 | } | 482 | } |
483 | 483 | ||
484 | static void cfqg_stats_update_dequeue(struct cfq_group *cfqg) | 484 | static void cfqg_stats_update_dequeue(struct cfq_group *cfqg) |
485 | { | 485 | { |
486 | blkg_stat_add(&cfqg->stats.dequeue, 1); | 486 | blkg_stat_add(&cfqg->stats.dequeue, 1); |
487 | } | 487 | } |
488 | 488 | ||
489 | static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) | 489 | static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) |
490 | { | 490 | { |
491 | struct cfqg_stats *stats = &cfqg->stats; | 491 | struct cfqg_stats *stats = &cfqg->stats; |
492 | 492 | ||
493 | if (blkg_rwstat_sum(&stats->queued)) | 493 | if (blkg_rwstat_sum(&stats->queued)) |
494 | return; | 494 | return; |
495 | 495 | ||
496 | /* | 496 | /* |
497 | * group is already marked empty. This can happen if cfqq got new | 497 | * group is already marked empty. This can happen if cfqq got new |
498 | * request in parent group and moved to this group while being added | 498 | * request in parent group and moved to this group while being added |
499 | * to service tree. Just ignore the event and move on. | 499 | * to service tree. Just ignore the event and move on. |
500 | */ | 500 | */ |
501 | if (cfqg_stats_empty(stats)) | 501 | if (cfqg_stats_empty(stats)) |
502 | return; | 502 | return; |
503 | 503 | ||
504 | stats->start_empty_time = sched_clock(); | 504 | stats->start_empty_time = sched_clock(); |
505 | cfqg_stats_mark_empty(stats); | 505 | cfqg_stats_mark_empty(stats); |
506 | } | 506 | } |
507 | 507 | ||
508 | static void cfqg_stats_update_idle_time(struct cfq_group *cfqg) | 508 | static void cfqg_stats_update_idle_time(struct cfq_group *cfqg) |
509 | { | 509 | { |
510 | struct cfqg_stats *stats = &cfqg->stats; | 510 | struct cfqg_stats *stats = &cfqg->stats; |
511 | 511 | ||
512 | if (cfqg_stats_idling(stats)) { | 512 | if (cfqg_stats_idling(stats)) { |
513 | unsigned long long now = sched_clock(); | 513 | unsigned long long now = sched_clock(); |
514 | 514 | ||
515 | if (time_after64(now, stats->start_idle_time)) | 515 | if (time_after64(now, stats->start_idle_time)) |
516 | blkg_stat_add(&stats->idle_time, | 516 | blkg_stat_add(&stats->idle_time, |
517 | now - stats->start_idle_time); | 517 | now - stats->start_idle_time); |
518 | cfqg_stats_clear_idling(stats); | 518 | cfqg_stats_clear_idling(stats); |
519 | } | 519 | } |
520 | } | 520 | } |
521 | 521 | ||
522 | static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) | 522 | static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) |
523 | { | 523 | { |
524 | struct cfqg_stats *stats = &cfqg->stats; | 524 | struct cfqg_stats *stats = &cfqg->stats; |
525 | 525 | ||
526 | BUG_ON(cfqg_stats_idling(stats)); | 526 | BUG_ON(cfqg_stats_idling(stats)); |
527 | 527 | ||
528 | stats->start_idle_time = sched_clock(); | 528 | stats->start_idle_time = sched_clock(); |
529 | cfqg_stats_mark_idling(stats); | 529 | cfqg_stats_mark_idling(stats); |
530 | } | 530 | } |
531 | 531 | ||
532 | static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) | 532 | static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) |
533 | { | 533 | { |
534 | struct cfqg_stats *stats = &cfqg->stats; | 534 | struct cfqg_stats *stats = &cfqg->stats; |
535 | 535 | ||
536 | blkg_stat_add(&stats->avg_queue_size_sum, | 536 | blkg_stat_add(&stats->avg_queue_size_sum, |
537 | blkg_rwstat_sum(&stats->queued)); | 537 | blkg_rwstat_sum(&stats->queued)); |
538 | blkg_stat_add(&stats->avg_queue_size_samples, 1); | 538 | blkg_stat_add(&stats->avg_queue_size_samples, 1); |
539 | cfqg_stats_update_group_wait_time(stats); | 539 | cfqg_stats_update_group_wait_time(stats); |
540 | } | 540 | } |
541 | 541 | ||
542 | #else /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */ | 542 | #else /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */ |
543 | 543 | ||
544 | static inline void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg, struct cfq_group *curr_cfqg) { } | 544 | static inline void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg, struct cfq_group *curr_cfqg) { } |
545 | static inline void cfqg_stats_end_empty_time(struct cfqg_stats *stats) { } | 545 | static inline void cfqg_stats_end_empty_time(struct cfqg_stats *stats) { } |
546 | static inline void cfqg_stats_update_dequeue(struct cfq_group *cfqg) { } | 546 | static inline void cfqg_stats_update_dequeue(struct cfq_group *cfqg) { } |
547 | static inline void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { } | 547 | static inline void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { } |
548 | static inline void cfqg_stats_update_idle_time(struct cfq_group *cfqg) { } | 548 | static inline void cfqg_stats_update_idle_time(struct cfq_group *cfqg) { } |
549 | static inline void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) { } | 549 | static inline void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) { } |
550 | static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { } | 550 | static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { } |
551 | 551 | ||
552 | #endif /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */ | 552 | #endif /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */ |
553 | 553 | ||
554 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 554 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
555 | 555 | ||
556 | static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg) | 556 | static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg) |
557 | { | 557 | { |
558 | return blkg_to_pdata(blkg, &blkio_policy_cfq); | 558 | return blkg_to_pdata(blkg, &blkio_policy_cfq); |
559 | } | 559 | } |
560 | 560 | ||
561 | static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg) | 561 | static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg) |
562 | { | 562 | { |
563 | return pdata_to_blkg(cfqg); | 563 | return pdata_to_blkg(cfqg); |
564 | } | 564 | } |
565 | 565 | ||
566 | static inline void cfqg_get(struct cfq_group *cfqg) | 566 | static inline void cfqg_get(struct cfq_group *cfqg) |
567 | { | 567 | { |
568 | return blkg_get(cfqg_to_blkg(cfqg)); | 568 | return blkg_get(cfqg_to_blkg(cfqg)); |
569 | } | 569 | } |
570 | 570 | ||
571 | static inline void cfqg_put(struct cfq_group *cfqg) | 571 | static inline void cfqg_put(struct cfq_group *cfqg) |
572 | { | 572 | { |
573 | return blkg_put(cfqg_to_blkg(cfqg)); | 573 | return blkg_put(cfqg_to_blkg(cfqg)); |
574 | } | 574 | } |
575 | 575 | ||
576 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ | 576 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ |
577 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ | 577 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ |
578 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ | 578 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ |
579 | blkg_path(cfqg_to_blkg((cfqq)->cfqg)), ##args) | 579 | blkg_path(cfqg_to_blkg((cfqq)->cfqg)), ##args) |
580 | 580 | ||
581 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \ | 581 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \ |
582 | blk_add_trace_msg((cfqd)->queue, "%s " fmt, \ | 582 | blk_add_trace_msg((cfqd)->queue, "%s " fmt, \ |
583 | blkg_path(cfqg_to_blkg((cfqg))), ##args) \ | 583 | blkg_path(cfqg_to_blkg((cfqg))), ##args) \ |
584 | 584 | ||
585 | static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, | 585 | static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, |
586 | struct cfq_group *curr_cfqg, int rw) | 586 | struct cfq_group *curr_cfqg, int rw) |
587 | { | 587 | { |
588 | blkg_rwstat_add(&cfqg->stats.queued, rw, 1); | 588 | blkg_rwstat_add(&cfqg->stats.queued, rw, 1); |
589 | cfqg_stats_end_empty_time(&cfqg->stats); | 589 | cfqg_stats_end_empty_time(&cfqg->stats); |
590 | cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg); | 590 | cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg); |
591 | } | 591 | } |
592 | 592 | ||
593 | static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, | 593 | static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, |
594 | unsigned long time, unsigned long unaccounted_time) | 594 | unsigned long time, unsigned long unaccounted_time) |
595 | { | 595 | { |
596 | blkg_stat_add(&cfqg->stats.time, time); | 596 | blkg_stat_add(&cfqg->stats.time, time); |
597 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 597 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
598 | blkg_stat_add(&cfqg->stats.unaccounted_time, unaccounted_time); | 598 | blkg_stat_add(&cfqg->stats.unaccounted_time, unaccounted_time); |
599 | #endif | 599 | #endif |
600 | } | 600 | } |
601 | 601 | ||
602 | static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) | 602 | static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) |
603 | { | 603 | { |
604 | blkg_rwstat_add(&cfqg->stats.queued, rw, -1); | 604 | blkg_rwstat_add(&cfqg->stats.queued, rw, -1); |
605 | } | 605 | } |
606 | 606 | ||
607 | static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) | 607 | static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) |
608 | { | 608 | { |
609 | blkg_rwstat_add(&cfqg->stats.merged, rw, 1); | 609 | blkg_rwstat_add(&cfqg->stats.merged, rw, 1); |
610 | } | 610 | } |
611 | 611 | ||
612 | static inline void cfqg_stats_update_dispatch(struct cfq_group *cfqg, | 612 | static inline void cfqg_stats_update_dispatch(struct cfq_group *cfqg, |
613 | uint64_t bytes, int rw) | 613 | uint64_t bytes, int rw) |
614 | { | 614 | { |
615 | blkg_stat_add(&cfqg->stats.sectors, bytes >> 9); | 615 | blkg_stat_add(&cfqg->stats.sectors, bytes >> 9); |
616 | blkg_rwstat_add(&cfqg->stats.serviced, rw, 1); | 616 | blkg_rwstat_add(&cfqg->stats.serviced, rw, 1); |
617 | blkg_rwstat_add(&cfqg->stats.service_bytes, rw, bytes); | 617 | blkg_rwstat_add(&cfqg->stats.service_bytes, rw, bytes); |
618 | } | 618 | } |
619 | 619 | ||
620 | static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, | 620 | static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, |
621 | uint64_t start_time, uint64_t io_start_time, int rw) | 621 | uint64_t start_time, uint64_t io_start_time, int rw) |
622 | { | 622 | { |
623 | struct cfqg_stats *stats = &cfqg->stats; | 623 | struct cfqg_stats *stats = &cfqg->stats; |
624 | unsigned long long now = sched_clock(); | 624 | unsigned long long now = sched_clock(); |
625 | 625 | ||
626 | if (time_after64(now, io_start_time)) | 626 | if (time_after64(now, io_start_time)) |
627 | blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); | 627 | blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); |
628 | if (time_after64(io_start_time, start_time)) | 628 | if (time_after64(io_start_time, start_time)) |
629 | blkg_rwstat_add(&stats->wait_time, rw, | 629 | blkg_rwstat_add(&stats->wait_time, rw, |
630 | io_start_time - start_time); | 630 | io_start_time - start_time); |
631 | } | 631 | } |
632 | 632 | ||
633 | static void cfqg_stats_reset(struct blkio_group *blkg) | 633 | static void cfqg_stats_reset(struct blkio_group *blkg) |
634 | { | 634 | { |
635 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); | 635 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); |
636 | struct cfqg_stats *stats = &cfqg->stats; | 636 | struct cfqg_stats *stats = &cfqg->stats; |
637 | 637 | ||
638 | /* queued stats shouldn't be cleared */ | 638 | /* queued stats shouldn't be cleared */ |
639 | blkg_rwstat_reset(&stats->service_bytes); | 639 | blkg_rwstat_reset(&stats->service_bytes); |
640 | blkg_rwstat_reset(&stats->serviced); | 640 | blkg_rwstat_reset(&stats->serviced); |
641 | blkg_rwstat_reset(&stats->merged); | 641 | blkg_rwstat_reset(&stats->merged); |
642 | blkg_rwstat_reset(&stats->service_time); | 642 | blkg_rwstat_reset(&stats->service_time); |
643 | blkg_rwstat_reset(&stats->wait_time); | 643 | blkg_rwstat_reset(&stats->wait_time); |
644 | blkg_stat_reset(&stats->time); | 644 | blkg_stat_reset(&stats->time); |
645 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 645 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
646 | blkg_stat_reset(&stats->unaccounted_time); | 646 | blkg_stat_reset(&stats->unaccounted_time); |
647 | blkg_stat_reset(&stats->avg_queue_size_sum); | 647 | blkg_stat_reset(&stats->avg_queue_size_sum); |
648 | blkg_stat_reset(&stats->avg_queue_size_samples); | 648 | blkg_stat_reset(&stats->avg_queue_size_samples); |
649 | blkg_stat_reset(&stats->dequeue); | 649 | blkg_stat_reset(&stats->dequeue); |
650 | blkg_stat_reset(&stats->group_wait_time); | 650 | blkg_stat_reset(&stats->group_wait_time); |
651 | blkg_stat_reset(&stats->idle_time); | 651 | blkg_stat_reset(&stats->idle_time); |
652 | blkg_stat_reset(&stats->empty_time); | 652 | blkg_stat_reset(&stats->empty_time); |
653 | #endif | 653 | #endif |
654 | } | 654 | } |
655 | 655 | ||
656 | #else /* CONFIG_CFQ_GROUP_IOSCHED */ | 656 | #else /* CONFIG_CFQ_GROUP_IOSCHED */ |
657 | 657 | ||
658 | static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg) { return NULL; } | 658 | static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg) { return NULL; } |
659 | static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg) { return NULL; } | 659 | static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg) { return NULL; } |
660 | static inline void cfqg_get(struct cfq_group *cfqg) { } | 660 | static inline void cfqg_get(struct cfq_group *cfqg) { } |
661 | static inline void cfqg_put(struct cfq_group *cfqg) { } | 661 | static inline void cfqg_put(struct cfq_group *cfqg) { } |
662 | 662 | ||
663 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ | 663 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ |
664 | blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) | 664 | blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) |
665 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) | 665 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) |
666 | 666 | ||
667 | static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, | 667 | static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, |
668 | struct cfq_group *curr_cfqg, int rw) { } | 668 | struct cfq_group *curr_cfqg, int rw) { } |
669 | static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, | 669 | static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, |
670 | unsigned long time, unsigned long unaccounted_time) { } | 670 | unsigned long time, unsigned long unaccounted_time) { } |
671 | static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { } | 671 | static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { } |
672 | static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { } | 672 | static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { } |
673 | static inline void cfqg_stats_update_dispatch(struct cfq_group *cfqg, | 673 | static inline void cfqg_stats_update_dispatch(struct cfq_group *cfqg, |
674 | uint64_t bytes, int rw) { } | 674 | uint64_t bytes, int rw) { } |
675 | static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, | 675 | static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, |
676 | uint64_t start_time, uint64_t io_start_time, int rw) { } | 676 | uint64_t start_time, uint64_t io_start_time, int rw) { } |
677 | 677 | ||
678 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ | 678 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ |
679 | 679 | ||
680 | #define cfq_log(cfqd, fmt, args...) \ | 680 | #define cfq_log(cfqd, fmt, args...) \ |
681 | blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) | 681 | blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) |
682 | 682 | ||
683 | /* Traverses through cfq group service trees */ | 683 | /* Traverses through cfq group service trees */ |
684 | #define for_each_cfqg_st(cfqg, i, j, st) \ | 684 | #define for_each_cfqg_st(cfqg, i, j, st) \ |
685 | for (i = 0; i <= IDLE_WORKLOAD; i++) \ | 685 | for (i = 0; i <= IDLE_WORKLOAD; i++) \ |
686 | for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\ | 686 | for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\ |
687 | : &cfqg->service_tree_idle; \ | 687 | : &cfqg->service_tree_idle; \ |
688 | (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \ | 688 | (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \ |
689 | (i == IDLE_WORKLOAD && j == 0); \ | 689 | (i == IDLE_WORKLOAD && j == 0); \ |
690 | j++, st = i < IDLE_WORKLOAD ? \ | 690 | j++, st = i < IDLE_WORKLOAD ? \ |
691 | &cfqg->service_trees[i][j]: NULL) \ | 691 | &cfqg->service_trees[i][j]: NULL) \ |
692 | 692 | ||
693 | static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd, | 693 | static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd, |
694 | struct cfq_ttime *ttime, bool group_idle) | 694 | struct cfq_ttime *ttime, bool group_idle) |
695 | { | 695 | { |
696 | unsigned long slice; | 696 | unsigned long slice; |
697 | if (!sample_valid(ttime->ttime_samples)) | 697 | if (!sample_valid(ttime->ttime_samples)) |
698 | return false; | 698 | return false; |
699 | if (group_idle) | 699 | if (group_idle) |
700 | slice = cfqd->cfq_group_idle; | 700 | slice = cfqd->cfq_group_idle; |
701 | else | 701 | else |
702 | slice = cfqd->cfq_slice_idle; | 702 | slice = cfqd->cfq_slice_idle; |
703 | return ttime->ttime_mean > slice; | 703 | return ttime->ttime_mean > slice; |
704 | } | 704 | } |
705 | 705 | ||
706 | static inline bool iops_mode(struct cfq_data *cfqd) | 706 | static inline bool iops_mode(struct cfq_data *cfqd) |
707 | { | 707 | { |
708 | /* | 708 | /* |
709 | * If we are not idling on queues and it is a NCQ drive, parallel | 709 | * If we are not idling on queues and it is a NCQ drive, parallel |
710 | * execution of requests is on and measuring time is not possible | 710 | * execution of requests is on and measuring time is not possible |
711 | * in most of the cases until and unless we drive shallower queue | 711 | * in most of the cases until and unless we drive shallower queue |
712 | * depths and that becomes a performance bottleneck. In such cases | 712 | * depths and that becomes a performance bottleneck. In such cases |
713 | * switch to start providing fairness in terms of number of IOs. | 713 | * switch to start providing fairness in terms of number of IOs. |
714 | */ | 714 | */ |
715 | if (!cfqd->cfq_slice_idle && cfqd->hw_tag) | 715 | if (!cfqd->cfq_slice_idle && cfqd->hw_tag) |
716 | return true; | 716 | return true; |
717 | else | 717 | else |
718 | return false; | 718 | return false; |
719 | } | 719 | } |
720 | 720 | ||
721 | static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) | 721 | static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) |
722 | { | 722 | { |
723 | if (cfq_class_idle(cfqq)) | 723 | if (cfq_class_idle(cfqq)) |
724 | return IDLE_WORKLOAD; | 724 | return IDLE_WORKLOAD; |
725 | if (cfq_class_rt(cfqq)) | 725 | if (cfq_class_rt(cfqq)) |
726 | return RT_WORKLOAD; | 726 | return RT_WORKLOAD; |
727 | return BE_WORKLOAD; | 727 | return BE_WORKLOAD; |
728 | } | 728 | } |
729 | 729 | ||
730 | 730 | ||
731 | static enum wl_type_t cfqq_type(struct cfq_queue *cfqq) | 731 | static enum wl_type_t cfqq_type(struct cfq_queue *cfqq) |
732 | { | 732 | { |
733 | if (!cfq_cfqq_sync(cfqq)) | 733 | if (!cfq_cfqq_sync(cfqq)) |
734 | return ASYNC_WORKLOAD; | 734 | return ASYNC_WORKLOAD; |
735 | if (!cfq_cfqq_idle_window(cfqq)) | 735 | if (!cfq_cfqq_idle_window(cfqq)) |
736 | return SYNC_NOIDLE_WORKLOAD; | 736 | return SYNC_NOIDLE_WORKLOAD; |
737 | return SYNC_WORKLOAD; | 737 | return SYNC_WORKLOAD; |
738 | } | 738 | } |
739 | 739 | ||
740 | static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl, | 740 | static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl, |
741 | struct cfq_data *cfqd, | 741 | struct cfq_data *cfqd, |
742 | struct cfq_group *cfqg) | 742 | struct cfq_group *cfqg) |
743 | { | 743 | { |
744 | if (wl == IDLE_WORKLOAD) | 744 | if (wl == IDLE_WORKLOAD) |
745 | return cfqg->service_tree_idle.count; | 745 | return cfqg->service_tree_idle.count; |
746 | 746 | ||
747 | return cfqg->service_trees[wl][ASYNC_WORKLOAD].count | 747 | return cfqg->service_trees[wl][ASYNC_WORKLOAD].count |
748 | + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count | 748 | + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count |
749 | + cfqg->service_trees[wl][SYNC_WORKLOAD].count; | 749 | + cfqg->service_trees[wl][SYNC_WORKLOAD].count; |
750 | } | 750 | } |
751 | 751 | ||
752 | static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, | 752 | static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, |
753 | struct cfq_group *cfqg) | 753 | struct cfq_group *cfqg) |
754 | { | 754 | { |
755 | return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count | 755 | return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count |
756 | + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; | 756 | + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; |
757 | } | 757 | } |
758 | 758 | ||
759 | static void cfq_dispatch_insert(struct request_queue *, struct request *); | 759 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
760 | static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, bool is_sync, | 760 | static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, bool is_sync, |
761 | struct cfq_io_cq *cic, struct bio *bio, | 761 | struct cfq_io_cq *cic, struct bio *bio, |
762 | gfp_t gfp_mask); | 762 | gfp_t gfp_mask); |
763 | 763 | ||
764 | static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq) | 764 | static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq) |
765 | { | 765 | { |
766 | /* cic->icq is the first member, %NULL will convert to %NULL */ | 766 | /* cic->icq is the first member, %NULL will convert to %NULL */ |
767 | return container_of(icq, struct cfq_io_cq, icq); | 767 | return container_of(icq, struct cfq_io_cq, icq); |
768 | } | 768 | } |
769 | 769 | ||
770 | static inline struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *cfqd, | 770 | static inline struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *cfqd, |
771 | struct io_context *ioc) | 771 | struct io_context *ioc) |
772 | { | 772 | { |
773 | if (ioc) | 773 | if (ioc) |
774 | return icq_to_cic(ioc_lookup_icq(ioc, cfqd->queue)); | 774 | return icq_to_cic(ioc_lookup_icq(ioc, cfqd->queue)); |
775 | return NULL; | 775 | return NULL; |
776 | } | 776 | } |
777 | 777 | ||
778 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync) | 778 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync) |
779 | { | 779 | { |
780 | return cic->cfqq[is_sync]; | 780 | return cic->cfqq[is_sync]; |
781 | } | 781 | } |
782 | 782 | ||
783 | static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq, | 783 | static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq, |
784 | bool is_sync) | 784 | bool is_sync) |
785 | { | 785 | { |
786 | cic->cfqq[is_sync] = cfqq; | 786 | cic->cfqq[is_sync] = cfqq; |
787 | } | 787 | } |
788 | 788 | ||
789 | static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic) | 789 | static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic) |
790 | { | 790 | { |
791 | return cic->icq.q->elevator->elevator_data; | 791 | return cic->icq.q->elevator->elevator_data; |
792 | } | 792 | } |
793 | 793 | ||
794 | /* | 794 | /* |
795 | * We regard a request as SYNC, if it's either a read or has the SYNC bit | 795 | * We regard a request as SYNC, if it's either a read or has the SYNC bit |
796 | * set (in which case it could also be direct WRITE). | 796 | * set (in which case it could also be direct WRITE). |
797 | */ | 797 | */ |
798 | static inline bool cfq_bio_sync(struct bio *bio) | 798 | static inline bool cfq_bio_sync(struct bio *bio) |
799 | { | 799 | { |
800 | return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC); | 800 | return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC); |
801 | } | 801 | } |
802 | 802 | ||
803 | /* | 803 | /* |
804 | * scheduler run of queue, if there are requests pending and no one in the | 804 | * scheduler run of queue, if there are requests pending and no one in the |
805 | * driver that will restart queueing | 805 | * driver that will restart queueing |
806 | */ | 806 | */ |
807 | static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) | 807 | static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) |
808 | { | 808 | { |
809 | if (cfqd->busy_queues) { | 809 | if (cfqd->busy_queues) { |
810 | cfq_log(cfqd, "schedule dispatch"); | 810 | cfq_log(cfqd, "schedule dispatch"); |
811 | kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); | 811 | kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); |
812 | } | 812 | } |
813 | } | 813 | } |
814 | 814 | ||
815 | /* | 815 | /* |
816 | * Scale schedule slice based on io priority. Use the sync time slice only | 816 | * Scale schedule slice based on io priority. Use the sync time slice only |
817 | * if a queue is marked sync and has sync io queued. A sync queue with async | 817 | * if a queue is marked sync and has sync io queued. A sync queue with async |
818 | * io only, should not get full sync slice length. | 818 | * io only, should not get full sync slice length. |
819 | */ | 819 | */ |
820 | static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, | 820 | static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, |
821 | unsigned short prio) | 821 | unsigned short prio) |
822 | { | 822 | { |
823 | const int base_slice = cfqd->cfq_slice[sync]; | 823 | const int base_slice = cfqd->cfq_slice[sync]; |
824 | 824 | ||
825 | WARN_ON(prio >= IOPRIO_BE_NR); | 825 | WARN_ON(prio >= IOPRIO_BE_NR); |
826 | 826 | ||
827 | return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); | 827 | return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); |
828 | } | 828 | } |
829 | 829 | ||
830 | static inline int | 830 | static inline int |
831 | cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 831 | cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
832 | { | 832 | { |
833 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); | 833 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); |
834 | } | 834 | } |
835 | 835 | ||
836 | static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg) | 836 | static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg) |
837 | { | 837 | { |
838 | u64 d = delta << CFQ_SERVICE_SHIFT; | 838 | u64 d = delta << CFQ_SERVICE_SHIFT; |
839 | 839 | ||
840 | d = d * CFQ_WEIGHT_DEFAULT; | 840 | d = d * CFQ_WEIGHT_DEFAULT; |
841 | do_div(d, cfqg->weight); | 841 | do_div(d, cfqg->weight); |
842 | return d; | 842 | return d; |
843 | } | 843 | } |
844 | 844 | ||
845 | static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) | 845 | static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) |
846 | { | 846 | { |
847 | s64 delta = (s64)(vdisktime - min_vdisktime); | 847 | s64 delta = (s64)(vdisktime - min_vdisktime); |
848 | if (delta > 0) | 848 | if (delta > 0) |
849 | min_vdisktime = vdisktime; | 849 | min_vdisktime = vdisktime; |
850 | 850 | ||
851 | return min_vdisktime; | 851 | return min_vdisktime; |
852 | } | 852 | } |
853 | 853 | ||
854 | static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) | 854 | static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) |
855 | { | 855 | { |
856 | s64 delta = (s64)(vdisktime - min_vdisktime); | 856 | s64 delta = (s64)(vdisktime - min_vdisktime); |
857 | if (delta < 0) | 857 | if (delta < 0) |
858 | min_vdisktime = vdisktime; | 858 | min_vdisktime = vdisktime; |
859 | 859 | ||
860 | return min_vdisktime; | 860 | return min_vdisktime; |
861 | } | 861 | } |
862 | 862 | ||
863 | static void update_min_vdisktime(struct cfq_rb_root *st) | 863 | static void update_min_vdisktime(struct cfq_rb_root *st) |
864 | { | 864 | { |
865 | struct cfq_group *cfqg; | 865 | struct cfq_group *cfqg; |
866 | 866 | ||
867 | if (st->left) { | 867 | if (st->left) { |
868 | cfqg = rb_entry_cfqg(st->left); | 868 | cfqg = rb_entry_cfqg(st->left); |
869 | st->min_vdisktime = max_vdisktime(st->min_vdisktime, | 869 | st->min_vdisktime = max_vdisktime(st->min_vdisktime, |
870 | cfqg->vdisktime); | 870 | cfqg->vdisktime); |
871 | } | 871 | } |
872 | } | 872 | } |
873 | 873 | ||
874 | /* | 874 | /* |
875 | * get averaged number of queues of RT/BE priority. | 875 | * get averaged number of queues of RT/BE priority. |
876 | * average is updated, with a formula that gives more weight to higher numbers, | 876 | * average is updated, with a formula that gives more weight to higher numbers, |
877 | * to quickly follows sudden increases and decrease slowly | 877 | * to quickly follows sudden increases and decrease slowly |
878 | */ | 878 | */ |
879 | 879 | ||
880 | static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd, | 880 | static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd, |
881 | struct cfq_group *cfqg, bool rt) | 881 | struct cfq_group *cfqg, bool rt) |
882 | { | 882 | { |
883 | unsigned min_q, max_q; | 883 | unsigned min_q, max_q; |
884 | unsigned mult = cfq_hist_divisor - 1; | 884 | unsigned mult = cfq_hist_divisor - 1; |
885 | unsigned round = cfq_hist_divisor / 2; | 885 | unsigned round = cfq_hist_divisor / 2; |
886 | unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg); | 886 | unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg); |
887 | 887 | ||
888 | min_q = min(cfqg->busy_queues_avg[rt], busy); | 888 | min_q = min(cfqg->busy_queues_avg[rt], busy); |
889 | max_q = max(cfqg->busy_queues_avg[rt], busy); | 889 | max_q = max(cfqg->busy_queues_avg[rt], busy); |
890 | cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) / | 890 | cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) / |
891 | cfq_hist_divisor; | 891 | cfq_hist_divisor; |
892 | return cfqg->busy_queues_avg[rt]; | 892 | return cfqg->busy_queues_avg[rt]; |
893 | } | 893 | } |
894 | 894 | ||
895 | static inline unsigned | 895 | static inline unsigned |
896 | cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) | 896 | cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) |
897 | { | 897 | { |
898 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 898 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
899 | 899 | ||
900 | return cfq_target_latency * cfqg->weight / st->total_weight; | 900 | return cfq_target_latency * cfqg->weight / st->total_weight; |
901 | } | 901 | } |
902 | 902 | ||
903 | static inline unsigned | 903 | static inline unsigned |
904 | cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 904 | cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
905 | { | 905 | { |
906 | unsigned slice = cfq_prio_to_slice(cfqd, cfqq); | 906 | unsigned slice = cfq_prio_to_slice(cfqd, cfqq); |
907 | if (cfqd->cfq_latency) { | 907 | if (cfqd->cfq_latency) { |
908 | /* | 908 | /* |
909 | * interested queues (we consider only the ones with the same | 909 | * interested queues (we consider only the ones with the same |
910 | * priority class in the cfq group) | 910 | * priority class in the cfq group) |
911 | */ | 911 | */ |
912 | unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg, | 912 | unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg, |
913 | cfq_class_rt(cfqq)); | 913 | cfq_class_rt(cfqq)); |
914 | unsigned sync_slice = cfqd->cfq_slice[1]; | 914 | unsigned sync_slice = cfqd->cfq_slice[1]; |
915 | unsigned expect_latency = sync_slice * iq; | 915 | unsigned expect_latency = sync_slice * iq; |
916 | unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg); | 916 | unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg); |
917 | 917 | ||
918 | if (expect_latency > group_slice) { | 918 | if (expect_latency > group_slice) { |
919 | unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; | 919 | unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; |
920 | /* scale low_slice according to IO priority | 920 | /* scale low_slice according to IO priority |
921 | * and sync vs async */ | 921 | * and sync vs async */ |
922 | unsigned low_slice = | 922 | unsigned low_slice = |
923 | min(slice, base_low_slice * slice / sync_slice); | 923 | min(slice, base_low_slice * slice / sync_slice); |
924 | /* the adapted slice value is scaled to fit all iqs | 924 | /* the adapted slice value is scaled to fit all iqs |
925 | * into the target latency */ | 925 | * into the target latency */ |
926 | slice = max(slice * group_slice / expect_latency, | 926 | slice = max(slice * group_slice / expect_latency, |
927 | low_slice); | 927 | low_slice); |
928 | } | 928 | } |
929 | } | 929 | } |
930 | return slice; | 930 | return slice; |
931 | } | 931 | } |
932 | 932 | ||
933 | static inline void | 933 | static inline void |
934 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 934 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
935 | { | 935 | { |
936 | unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq); | 936 | unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq); |
937 | 937 | ||
938 | cfqq->slice_start = jiffies; | 938 | cfqq->slice_start = jiffies; |
939 | cfqq->slice_end = jiffies + slice; | 939 | cfqq->slice_end = jiffies + slice; |
940 | cfqq->allocated_slice = slice; | 940 | cfqq->allocated_slice = slice; |
941 | cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); | 941 | cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); |
942 | } | 942 | } |
943 | 943 | ||
944 | /* | 944 | /* |
945 | * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end | 945 | * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end |
946 | * isn't valid until the first request from the dispatch is activated | 946 | * isn't valid until the first request from the dispatch is activated |
947 | * and the slice time set. | 947 | * and the slice time set. |
948 | */ | 948 | */ |
949 | static inline bool cfq_slice_used(struct cfq_queue *cfqq) | 949 | static inline bool cfq_slice_used(struct cfq_queue *cfqq) |
950 | { | 950 | { |
951 | if (cfq_cfqq_slice_new(cfqq)) | 951 | if (cfq_cfqq_slice_new(cfqq)) |
952 | return false; | 952 | return false; |
953 | if (time_before(jiffies, cfqq->slice_end)) | 953 | if (time_before(jiffies, cfqq->slice_end)) |
954 | return false; | 954 | return false; |
955 | 955 | ||
956 | return true; | 956 | return true; |
957 | } | 957 | } |
958 | 958 | ||
959 | /* | 959 | /* |
960 | * Lifted from AS - choose which of rq1 and rq2 that is best served now. | 960 | * Lifted from AS - choose which of rq1 and rq2 that is best served now. |
961 | * We choose the request that is closest to the head right now. Distance | 961 | * We choose the request that is closest to the head right now. Distance |
962 | * behind the head is penalized and only allowed to a certain extent. | 962 | * behind the head is penalized and only allowed to a certain extent. |
963 | */ | 963 | */ |
964 | static struct request * | 964 | static struct request * |
965 | cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last) | 965 | cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last) |
966 | { | 966 | { |
967 | sector_t s1, s2, d1 = 0, d2 = 0; | 967 | sector_t s1, s2, d1 = 0, d2 = 0; |
968 | unsigned long back_max; | 968 | unsigned long back_max; |
969 | #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ | 969 | #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ |
970 | #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ | 970 | #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ |
971 | unsigned wrap = 0; /* bit mask: requests behind the disk head? */ | 971 | unsigned wrap = 0; /* bit mask: requests behind the disk head? */ |
972 | 972 | ||
973 | if (rq1 == NULL || rq1 == rq2) | 973 | if (rq1 == NULL || rq1 == rq2) |
974 | return rq2; | 974 | return rq2; |
975 | if (rq2 == NULL) | 975 | if (rq2 == NULL) |
976 | return rq1; | 976 | return rq1; |
977 | 977 | ||
978 | if (rq_is_sync(rq1) != rq_is_sync(rq2)) | 978 | if (rq_is_sync(rq1) != rq_is_sync(rq2)) |
979 | return rq_is_sync(rq1) ? rq1 : rq2; | 979 | return rq_is_sync(rq1) ? rq1 : rq2; |
980 | 980 | ||
981 | if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO) | 981 | if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO) |
982 | return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2; | 982 | return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2; |
983 | 983 | ||
984 | s1 = blk_rq_pos(rq1); | 984 | s1 = blk_rq_pos(rq1); |
985 | s2 = blk_rq_pos(rq2); | 985 | s2 = blk_rq_pos(rq2); |
986 | 986 | ||
987 | /* | 987 | /* |
988 | * by definition, 1KiB is 2 sectors | 988 | * by definition, 1KiB is 2 sectors |
989 | */ | 989 | */ |
990 | back_max = cfqd->cfq_back_max * 2; | 990 | back_max = cfqd->cfq_back_max * 2; |
991 | 991 | ||
992 | /* | 992 | /* |
993 | * Strict one way elevator _except_ in the case where we allow | 993 | * Strict one way elevator _except_ in the case where we allow |
994 | * short backward seeks which are biased as twice the cost of a | 994 | * short backward seeks which are biased as twice the cost of a |
995 | * similar forward seek. | 995 | * similar forward seek. |
996 | */ | 996 | */ |
997 | if (s1 >= last) | 997 | if (s1 >= last) |
998 | d1 = s1 - last; | 998 | d1 = s1 - last; |
999 | else if (s1 + back_max >= last) | 999 | else if (s1 + back_max >= last) |
1000 | d1 = (last - s1) * cfqd->cfq_back_penalty; | 1000 | d1 = (last - s1) * cfqd->cfq_back_penalty; |
1001 | else | 1001 | else |
1002 | wrap |= CFQ_RQ1_WRAP; | 1002 | wrap |= CFQ_RQ1_WRAP; |
1003 | 1003 | ||
1004 | if (s2 >= last) | 1004 | if (s2 >= last) |
1005 | d2 = s2 - last; | 1005 | d2 = s2 - last; |
1006 | else if (s2 + back_max >= last) | 1006 | else if (s2 + back_max >= last) |
1007 | d2 = (last - s2) * cfqd->cfq_back_penalty; | 1007 | d2 = (last - s2) * cfqd->cfq_back_penalty; |
1008 | else | 1008 | else |
1009 | wrap |= CFQ_RQ2_WRAP; | 1009 | wrap |= CFQ_RQ2_WRAP; |
1010 | 1010 | ||
1011 | /* Found required data */ | 1011 | /* Found required data */ |
1012 | 1012 | ||
1013 | /* | 1013 | /* |
1014 | * By doing switch() on the bit mask "wrap" we avoid having to | 1014 | * By doing switch() on the bit mask "wrap" we avoid having to |
1015 | * check two variables for all permutations: --> faster! | 1015 | * check two variables for all permutations: --> faster! |
1016 | */ | 1016 | */ |
1017 | switch (wrap) { | 1017 | switch (wrap) { |
1018 | case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ | 1018 | case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ |
1019 | if (d1 < d2) | 1019 | if (d1 < d2) |
1020 | return rq1; | 1020 | return rq1; |
1021 | else if (d2 < d1) | 1021 | else if (d2 < d1) |
1022 | return rq2; | 1022 | return rq2; |
1023 | else { | 1023 | else { |
1024 | if (s1 >= s2) | 1024 | if (s1 >= s2) |
1025 | return rq1; | 1025 | return rq1; |
1026 | else | 1026 | else |
1027 | return rq2; | 1027 | return rq2; |
1028 | } | 1028 | } |
1029 | 1029 | ||
1030 | case CFQ_RQ2_WRAP: | 1030 | case CFQ_RQ2_WRAP: |
1031 | return rq1; | 1031 | return rq1; |
1032 | case CFQ_RQ1_WRAP: | 1032 | case CFQ_RQ1_WRAP: |
1033 | return rq2; | 1033 | return rq2; |
1034 | case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */ | 1034 | case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */ |
1035 | default: | 1035 | default: |
1036 | /* | 1036 | /* |
1037 | * Since both rqs are wrapped, | 1037 | * Since both rqs are wrapped, |
1038 | * start with the one that's further behind head | 1038 | * start with the one that's further behind head |
1039 | * (--> only *one* back seek required), | 1039 | * (--> only *one* back seek required), |
1040 | * since back seek takes more time than forward. | 1040 | * since back seek takes more time than forward. |
1041 | */ | 1041 | */ |
1042 | if (s1 <= s2) | 1042 | if (s1 <= s2) |
1043 | return rq1; | 1043 | return rq1; |
1044 | else | 1044 | else |
1045 | return rq2; | 1045 | return rq2; |
1046 | } | 1046 | } |
1047 | } | 1047 | } |
1048 | 1048 | ||
1049 | /* | 1049 | /* |
1050 | * The below is leftmost cache rbtree addon | 1050 | * The below is leftmost cache rbtree addon |
1051 | */ | 1051 | */ |
1052 | static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) | 1052 | static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) |
1053 | { | 1053 | { |
1054 | /* Service tree is empty */ | 1054 | /* Service tree is empty */ |
1055 | if (!root->count) | 1055 | if (!root->count) |
1056 | return NULL; | 1056 | return NULL; |
1057 | 1057 | ||
1058 | if (!root->left) | 1058 | if (!root->left) |
1059 | root->left = rb_first(&root->rb); | 1059 | root->left = rb_first(&root->rb); |
1060 | 1060 | ||
1061 | if (root->left) | 1061 | if (root->left) |
1062 | return rb_entry(root->left, struct cfq_queue, rb_node); | 1062 | return rb_entry(root->left, struct cfq_queue, rb_node); |
1063 | 1063 | ||
1064 | return NULL; | 1064 | return NULL; |
1065 | } | 1065 | } |
1066 | 1066 | ||
1067 | static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root) | 1067 | static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root) |
1068 | { | 1068 | { |
1069 | if (!root->left) | 1069 | if (!root->left) |
1070 | root->left = rb_first(&root->rb); | 1070 | root->left = rb_first(&root->rb); |
1071 | 1071 | ||
1072 | if (root->left) | 1072 | if (root->left) |
1073 | return rb_entry_cfqg(root->left); | 1073 | return rb_entry_cfqg(root->left); |
1074 | 1074 | ||
1075 | return NULL; | 1075 | return NULL; |
1076 | } | 1076 | } |
1077 | 1077 | ||
1078 | static void rb_erase_init(struct rb_node *n, struct rb_root *root) | 1078 | static void rb_erase_init(struct rb_node *n, struct rb_root *root) |
1079 | { | 1079 | { |
1080 | rb_erase(n, root); | 1080 | rb_erase(n, root); |
1081 | RB_CLEAR_NODE(n); | 1081 | RB_CLEAR_NODE(n); |
1082 | } | 1082 | } |
1083 | 1083 | ||
1084 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) | 1084 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) |
1085 | { | 1085 | { |
1086 | if (root->left == n) | 1086 | if (root->left == n) |
1087 | root->left = NULL; | 1087 | root->left = NULL; |
1088 | rb_erase_init(n, &root->rb); | 1088 | rb_erase_init(n, &root->rb); |
1089 | --root->count; | 1089 | --root->count; |
1090 | } | 1090 | } |
1091 | 1091 | ||
1092 | /* | 1092 | /* |
1093 | * would be nice to take fifo expire time into account as well | 1093 | * would be nice to take fifo expire time into account as well |
1094 | */ | 1094 | */ |
1095 | static struct request * | 1095 | static struct request * |
1096 | cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 1096 | cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
1097 | struct request *last) | 1097 | struct request *last) |
1098 | { | 1098 | { |
1099 | struct rb_node *rbnext = rb_next(&last->rb_node); | 1099 | struct rb_node *rbnext = rb_next(&last->rb_node); |
1100 | struct rb_node *rbprev = rb_prev(&last->rb_node); | 1100 | struct rb_node *rbprev = rb_prev(&last->rb_node); |
1101 | struct request *next = NULL, *prev = NULL; | 1101 | struct request *next = NULL, *prev = NULL; |
1102 | 1102 | ||
1103 | BUG_ON(RB_EMPTY_NODE(&last->rb_node)); | 1103 | BUG_ON(RB_EMPTY_NODE(&last->rb_node)); |
1104 | 1104 | ||
1105 | if (rbprev) | 1105 | if (rbprev) |
1106 | prev = rb_entry_rq(rbprev); | 1106 | prev = rb_entry_rq(rbprev); |
1107 | 1107 | ||
1108 | if (rbnext) | 1108 | if (rbnext) |
1109 | next = rb_entry_rq(rbnext); | 1109 | next = rb_entry_rq(rbnext); |
1110 | else { | 1110 | else { |
1111 | rbnext = rb_first(&cfqq->sort_list); | 1111 | rbnext = rb_first(&cfqq->sort_list); |
1112 | if (rbnext && rbnext != &last->rb_node) | 1112 | if (rbnext && rbnext != &last->rb_node) |
1113 | next = rb_entry_rq(rbnext); | 1113 | next = rb_entry_rq(rbnext); |
1114 | } | 1114 | } |
1115 | 1115 | ||
1116 | return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last)); | 1116 | return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last)); |
1117 | } | 1117 | } |
1118 | 1118 | ||
1119 | static unsigned long cfq_slice_offset(struct cfq_data *cfqd, | 1119 | static unsigned long cfq_slice_offset(struct cfq_data *cfqd, |
1120 | struct cfq_queue *cfqq) | 1120 | struct cfq_queue *cfqq) |
1121 | { | 1121 | { |
1122 | /* | 1122 | /* |
1123 | * just an approximation, should be ok. | 1123 | * just an approximation, should be ok. |
1124 | */ | 1124 | */ |
1125 | return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) - | 1125 | return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) - |
1126 | cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio)); | 1126 | cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio)); |
1127 | } | 1127 | } |
1128 | 1128 | ||
1129 | static inline s64 | 1129 | static inline s64 |
1130 | cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg) | 1130 | cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg) |
1131 | { | 1131 | { |
1132 | return cfqg->vdisktime - st->min_vdisktime; | 1132 | return cfqg->vdisktime - st->min_vdisktime; |
1133 | } | 1133 | } |
1134 | 1134 | ||
1135 | static void | 1135 | static void |
1136 | __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | 1136 | __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) |
1137 | { | 1137 | { |
1138 | struct rb_node **node = &st->rb.rb_node; | 1138 | struct rb_node **node = &st->rb.rb_node; |
1139 | struct rb_node *parent = NULL; | 1139 | struct rb_node *parent = NULL; |
1140 | struct cfq_group *__cfqg; | 1140 | struct cfq_group *__cfqg; |
1141 | s64 key = cfqg_key(st, cfqg); | 1141 | s64 key = cfqg_key(st, cfqg); |
1142 | int left = 1; | 1142 | int left = 1; |
1143 | 1143 | ||
1144 | while (*node != NULL) { | 1144 | while (*node != NULL) { |
1145 | parent = *node; | 1145 | parent = *node; |
1146 | __cfqg = rb_entry_cfqg(parent); | 1146 | __cfqg = rb_entry_cfqg(parent); |
1147 | 1147 | ||
1148 | if (key < cfqg_key(st, __cfqg)) | 1148 | if (key < cfqg_key(st, __cfqg)) |
1149 | node = &parent->rb_left; | 1149 | node = &parent->rb_left; |
1150 | else { | 1150 | else { |
1151 | node = &parent->rb_right; | 1151 | node = &parent->rb_right; |
1152 | left = 0; | 1152 | left = 0; |
1153 | } | 1153 | } |
1154 | } | 1154 | } |
1155 | 1155 | ||
1156 | if (left) | 1156 | if (left) |
1157 | st->left = &cfqg->rb_node; | 1157 | st->left = &cfqg->rb_node; |
1158 | 1158 | ||
1159 | rb_link_node(&cfqg->rb_node, parent, node); | 1159 | rb_link_node(&cfqg->rb_node, parent, node); |
1160 | rb_insert_color(&cfqg->rb_node, &st->rb); | 1160 | rb_insert_color(&cfqg->rb_node, &st->rb); |
1161 | } | 1161 | } |
1162 | 1162 | ||
1163 | static void | 1163 | static void |
1164 | cfq_update_group_weight(struct cfq_group *cfqg) | 1164 | cfq_update_group_weight(struct cfq_group *cfqg) |
1165 | { | 1165 | { |
1166 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | 1166 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); |
1167 | if (cfqg->new_weight) { | 1167 | if (cfqg->new_weight) { |
1168 | cfqg->weight = cfqg->new_weight; | 1168 | cfqg->weight = cfqg->new_weight; |
1169 | cfqg->new_weight = 0; | 1169 | cfqg->new_weight = 0; |
1170 | } | 1170 | } |
1171 | } | 1171 | } |
1172 | 1172 | ||
1173 | static void | 1173 | static void |
1174 | cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | 1174 | cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) |
1175 | { | 1175 | { |
1176 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | 1176 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); |
1177 | 1177 | ||
1178 | cfq_update_group_weight(cfqg); | 1178 | cfq_update_group_weight(cfqg); |
1179 | __cfq_group_service_tree_add(st, cfqg); | 1179 | __cfq_group_service_tree_add(st, cfqg); |
1180 | st->total_weight += cfqg->weight; | 1180 | st->total_weight += cfqg->weight; |
1181 | } | 1181 | } |
1182 | 1182 | ||
1183 | static void | 1183 | static void |
1184 | cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | 1184 | cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) |
1185 | { | 1185 | { |
1186 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 1186 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
1187 | struct cfq_group *__cfqg; | 1187 | struct cfq_group *__cfqg; |
1188 | struct rb_node *n; | 1188 | struct rb_node *n; |
1189 | 1189 | ||
1190 | cfqg->nr_cfqq++; | 1190 | cfqg->nr_cfqq++; |
1191 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | 1191 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
1192 | return; | 1192 | return; |
1193 | 1193 | ||
1194 | /* | 1194 | /* |
1195 | * Currently put the group at the end. Later implement something | 1195 | * Currently put the group at the end. Later implement something |
1196 | * so that groups get lesser vtime based on their weights, so that | 1196 | * so that groups get lesser vtime based on their weights, so that |
1197 | * if group does not loose all if it was not continuously backlogged. | 1197 | * if group does not loose all if it was not continuously backlogged. |
1198 | */ | 1198 | */ |
1199 | n = rb_last(&st->rb); | 1199 | n = rb_last(&st->rb); |
1200 | if (n) { | 1200 | if (n) { |
1201 | __cfqg = rb_entry_cfqg(n); | 1201 | __cfqg = rb_entry_cfqg(n); |
1202 | cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; | 1202 | cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; |
1203 | } else | 1203 | } else |
1204 | cfqg->vdisktime = st->min_vdisktime; | 1204 | cfqg->vdisktime = st->min_vdisktime; |
1205 | cfq_group_service_tree_add(st, cfqg); | 1205 | cfq_group_service_tree_add(st, cfqg); |
1206 | } | 1206 | } |
1207 | 1207 | ||
1208 | static void | 1208 | static void |
1209 | cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg) | 1209 | cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg) |
1210 | { | 1210 | { |
1211 | st->total_weight -= cfqg->weight; | 1211 | st->total_weight -= cfqg->weight; |
1212 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | 1212 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
1213 | cfq_rb_erase(&cfqg->rb_node, st); | 1213 | cfq_rb_erase(&cfqg->rb_node, st); |
1214 | } | 1214 | } |
1215 | 1215 | ||
1216 | static void | 1216 | static void |
1217 | cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | 1217 | cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) |
1218 | { | 1218 | { |
1219 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 1219 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
1220 | 1220 | ||
1221 | BUG_ON(cfqg->nr_cfqq < 1); | 1221 | BUG_ON(cfqg->nr_cfqq < 1); |
1222 | cfqg->nr_cfqq--; | 1222 | cfqg->nr_cfqq--; |
1223 | 1223 | ||
1224 | /* If there are other cfq queues under this group, don't delete it */ | 1224 | /* If there are other cfq queues under this group, don't delete it */ |
1225 | if (cfqg->nr_cfqq) | 1225 | if (cfqg->nr_cfqq) |
1226 | return; | 1226 | return; |
1227 | 1227 | ||
1228 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); | 1228 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); |
1229 | cfq_group_service_tree_del(st, cfqg); | 1229 | cfq_group_service_tree_del(st, cfqg); |
1230 | cfqg->saved_workload_slice = 0; | 1230 | cfqg->saved_workload_slice = 0; |
1231 | cfqg_stats_update_dequeue(cfqg); | 1231 | cfqg_stats_update_dequeue(cfqg); |
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq, | 1234 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq, |
1235 | unsigned int *unaccounted_time) | 1235 | unsigned int *unaccounted_time) |
1236 | { | 1236 | { |
1237 | unsigned int slice_used; | 1237 | unsigned int slice_used; |
1238 | 1238 | ||
1239 | /* | 1239 | /* |
1240 | * Queue got expired before even a single request completed or | 1240 | * Queue got expired before even a single request completed or |
1241 | * got expired immediately after first request completion. | 1241 | * got expired immediately after first request completion. |
1242 | */ | 1242 | */ |
1243 | if (!cfqq->slice_start || cfqq->slice_start == jiffies) { | 1243 | if (!cfqq->slice_start || cfqq->slice_start == jiffies) { |
1244 | /* | 1244 | /* |
1245 | * Also charge the seek time incurred to the group, otherwise | 1245 | * Also charge the seek time incurred to the group, otherwise |
1246 | * if there are mutiple queues in the group, each can dispatch | 1246 | * if there are mutiple queues in the group, each can dispatch |
1247 | * a single request on seeky media and cause lots of seek time | 1247 | * a single request on seeky media and cause lots of seek time |
1248 | * and group will never know it. | 1248 | * and group will never know it. |
1249 | */ | 1249 | */ |
1250 | slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start), | 1250 | slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start), |
1251 | 1); | 1251 | 1); |
1252 | } else { | 1252 | } else { |
1253 | slice_used = jiffies - cfqq->slice_start; | 1253 | slice_used = jiffies - cfqq->slice_start; |
1254 | if (slice_used > cfqq->allocated_slice) { | 1254 | if (slice_used > cfqq->allocated_slice) { |
1255 | *unaccounted_time = slice_used - cfqq->allocated_slice; | 1255 | *unaccounted_time = slice_used - cfqq->allocated_slice; |
1256 | slice_used = cfqq->allocated_slice; | 1256 | slice_used = cfqq->allocated_slice; |
1257 | } | 1257 | } |
1258 | if (time_after(cfqq->slice_start, cfqq->dispatch_start)) | 1258 | if (time_after(cfqq->slice_start, cfqq->dispatch_start)) |
1259 | *unaccounted_time += cfqq->slice_start - | 1259 | *unaccounted_time += cfqq->slice_start - |
1260 | cfqq->dispatch_start; | 1260 | cfqq->dispatch_start; |
1261 | } | 1261 | } |
1262 | 1262 | ||
1263 | return slice_used; | 1263 | return slice_used; |
1264 | } | 1264 | } |
1265 | 1265 | ||
1266 | static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | 1266 | static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, |
1267 | struct cfq_queue *cfqq) | 1267 | struct cfq_queue *cfqq) |
1268 | { | 1268 | { |
1269 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 1269 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
1270 | unsigned int used_sl, charge, unaccounted_sl = 0; | 1270 | unsigned int used_sl, charge, unaccounted_sl = 0; |
1271 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) | 1271 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) |
1272 | - cfqg->service_tree_idle.count; | 1272 | - cfqg->service_tree_idle.count; |
1273 | 1273 | ||
1274 | BUG_ON(nr_sync < 0); | 1274 | BUG_ON(nr_sync < 0); |
1275 | used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); | 1275 | used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); |
1276 | 1276 | ||
1277 | if (iops_mode(cfqd)) | 1277 | if (iops_mode(cfqd)) |
1278 | charge = cfqq->slice_dispatch; | 1278 | charge = cfqq->slice_dispatch; |
1279 | else if (!cfq_cfqq_sync(cfqq) && !nr_sync) | 1279 | else if (!cfq_cfqq_sync(cfqq) && !nr_sync) |
1280 | charge = cfqq->allocated_slice; | 1280 | charge = cfqq->allocated_slice; |
1281 | 1281 | ||
1282 | /* Can't update vdisktime while group is on service tree */ | 1282 | /* Can't update vdisktime while group is on service tree */ |
1283 | cfq_group_service_tree_del(st, cfqg); | 1283 | cfq_group_service_tree_del(st, cfqg); |
1284 | cfqg->vdisktime += cfq_scale_slice(charge, cfqg); | 1284 | cfqg->vdisktime += cfq_scale_slice(charge, cfqg); |
1285 | /* If a new weight was requested, update now, off tree */ | 1285 | /* If a new weight was requested, update now, off tree */ |
1286 | cfq_group_service_tree_add(st, cfqg); | 1286 | cfq_group_service_tree_add(st, cfqg); |
1287 | 1287 | ||
1288 | /* This group is being expired. Save the context */ | 1288 | /* This group is being expired. Save the context */ |
1289 | if (time_after(cfqd->workload_expires, jiffies)) { | 1289 | if (time_after(cfqd->workload_expires, jiffies)) { |
1290 | cfqg->saved_workload_slice = cfqd->workload_expires | 1290 | cfqg->saved_workload_slice = cfqd->workload_expires |
1291 | - jiffies; | 1291 | - jiffies; |
1292 | cfqg->saved_workload = cfqd->serving_type; | 1292 | cfqg->saved_workload = cfqd->serving_type; |
1293 | cfqg->saved_serving_prio = cfqd->serving_prio; | 1293 | cfqg->saved_serving_prio = cfqd->serving_prio; |
1294 | } else | 1294 | } else |
1295 | cfqg->saved_workload_slice = 0; | 1295 | cfqg->saved_workload_slice = 0; |
1296 | 1296 | ||
1297 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, | 1297 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, |
1298 | st->min_vdisktime); | 1298 | st->min_vdisktime); |
1299 | cfq_log_cfqq(cfqq->cfqd, cfqq, | 1299 | cfq_log_cfqq(cfqq->cfqd, cfqq, |
1300 | "sl_used=%u disp=%u charge=%u iops=%u sect=%lu", | 1300 | "sl_used=%u disp=%u charge=%u iops=%u sect=%lu", |
1301 | used_sl, cfqq->slice_dispatch, charge, | 1301 | used_sl, cfqq->slice_dispatch, charge, |
1302 | iops_mode(cfqd), cfqq->nr_sectors); | 1302 | iops_mode(cfqd), cfqq->nr_sectors); |
1303 | cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl); | 1303 | cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl); |
1304 | cfqg_stats_set_start_empty_time(cfqg); | 1304 | cfqg_stats_set_start_empty_time(cfqg); |
1305 | } | 1305 | } |
1306 | 1306 | ||
1307 | /** | 1307 | /** |
1308 | * cfq_init_cfqg_base - initialize base part of a cfq_group | 1308 | * cfq_init_cfqg_base - initialize base part of a cfq_group |
1309 | * @cfqg: cfq_group to initialize | 1309 | * @cfqg: cfq_group to initialize |
1310 | * | 1310 | * |
1311 | * Initialize the base part which is used whether %CONFIG_CFQ_GROUP_IOSCHED | 1311 | * Initialize the base part which is used whether %CONFIG_CFQ_GROUP_IOSCHED |
1312 | * is enabled or not. | 1312 | * is enabled or not. |
1313 | */ | 1313 | */ |
1314 | static void cfq_init_cfqg_base(struct cfq_group *cfqg) | 1314 | static void cfq_init_cfqg_base(struct cfq_group *cfqg) |
1315 | { | 1315 | { |
1316 | struct cfq_rb_root *st; | 1316 | struct cfq_rb_root *st; |
1317 | int i, j; | 1317 | int i, j; |
1318 | 1318 | ||
1319 | for_each_cfqg_st(cfqg, i, j, st) | 1319 | for_each_cfqg_st(cfqg, i, j, st) |
1320 | *st = CFQ_RB_ROOT; | 1320 | *st = CFQ_RB_ROOT; |
1321 | RB_CLEAR_NODE(&cfqg->rb_node); | 1321 | RB_CLEAR_NODE(&cfqg->rb_node); |
1322 | 1322 | ||
1323 | cfqg->ttime.last_end_request = jiffies; | 1323 | cfqg->ttime.last_end_request = jiffies; |
1324 | } | 1324 | } |
1325 | 1325 | ||
1326 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 1326 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
1327 | static void cfq_init_blkio_group(struct blkio_group *blkg) | 1327 | static void cfq_init_blkio_group(struct blkio_group *blkg) |
1328 | { | 1328 | { |
1329 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); | 1329 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); |
1330 | 1330 | ||
1331 | cfq_init_cfqg_base(cfqg); | 1331 | cfq_init_cfqg_base(cfqg); |
1332 | cfqg->weight = blkg->blkcg->cfq_weight; | 1332 | cfqg->weight = blkg->blkcg->cfq_weight; |
1333 | } | 1333 | } |
1334 | 1334 | ||
1335 | /* | 1335 | /* |
1336 | * Search for the cfq group current task belongs to. request_queue lock must | 1336 | * Search for the cfq group current task belongs to. request_queue lock must |
1337 | * be held. | 1337 | * be held. |
1338 | */ | 1338 | */ |
1339 | static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, | 1339 | static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, |
1340 | struct blkio_cgroup *blkcg) | 1340 | struct blkio_cgroup *blkcg) |
1341 | { | 1341 | { |
1342 | struct request_queue *q = cfqd->queue; | 1342 | struct request_queue *q = cfqd->queue; |
1343 | struct cfq_group *cfqg = NULL; | 1343 | struct cfq_group *cfqg = NULL; |
1344 | 1344 | ||
1345 | /* avoid lookup for the common case where there's no blkio cgroup */ | 1345 | /* avoid lookup for the common case where there's no blkio cgroup */ |
1346 | if (blkcg == &blkio_root_cgroup) { | 1346 | if (blkcg == &blkio_root_cgroup) { |
1347 | cfqg = cfqd->root_group; | 1347 | cfqg = cfqd->root_group; |
1348 | } else { | 1348 | } else { |
1349 | struct blkio_group *blkg; | 1349 | struct blkio_group *blkg; |
1350 | 1350 | ||
1351 | blkg = blkg_lookup_create(blkcg, q, false); | 1351 | blkg = blkg_lookup_create(blkcg, q, false); |
1352 | if (!IS_ERR(blkg)) | 1352 | if (!IS_ERR(blkg)) |
1353 | cfqg = blkg_to_cfqg(blkg); | 1353 | cfqg = blkg_to_cfqg(blkg); |
1354 | } | 1354 | } |
1355 | 1355 | ||
1356 | return cfqg; | 1356 | return cfqg; |
1357 | } | 1357 | } |
1358 | 1358 | ||
1359 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | 1359 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) |
1360 | { | 1360 | { |
1361 | /* Currently, all async queues are mapped to root group */ | 1361 | /* Currently, all async queues are mapped to root group */ |
1362 | if (!cfq_cfqq_sync(cfqq)) | 1362 | if (!cfq_cfqq_sync(cfqq)) |
1363 | cfqg = cfqq->cfqd->root_group; | 1363 | cfqg = cfqq->cfqd->root_group; |
1364 | 1364 | ||
1365 | cfqq->cfqg = cfqg; | 1365 | cfqq->cfqg = cfqg; |
1366 | /* cfqq reference on cfqg */ | 1366 | /* cfqq reference on cfqg */ |
1367 | cfqg_get(cfqg); | 1367 | cfqg_get(cfqg); |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | static u64 cfqg_prfill_weight_device(struct seq_file *sf, void *pdata, int off) | 1370 | static u64 cfqg_prfill_weight_device(struct seq_file *sf, void *pdata, int off) |
1371 | { | 1371 | { |
1372 | struct cfq_group *cfqg = pdata; | 1372 | struct cfq_group *cfqg = pdata; |
1373 | 1373 | ||
1374 | if (!cfqg->dev_weight) | 1374 | if (!cfqg->dev_weight) |
1375 | return 0; | 1375 | return 0; |
1376 | return __blkg_prfill_u64(sf, pdata, cfqg->dev_weight); | 1376 | return __blkg_prfill_u64(sf, pdata, cfqg->dev_weight); |
1377 | } | 1377 | } |
1378 | 1378 | ||
1379 | static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft, | 1379 | static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft, |
1380 | struct seq_file *sf) | 1380 | struct seq_file *sf) |
1381 | { | 1381 | { |
1382 | blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), | 1382 | blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), |
1383 | cfqg_prfill_weight_device, &blkio_policy_cfq, 0, | 1383 | cfqg_prfill_weight_device, &blkio_policy_cfq, 0, |
1384 | false); | 1384 | false); |
1385 | return 0; | 1385 | return 0; |
1386 | } | 1386 | } |
1387 | 1387 | ||
1388 | static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, | 1388 | static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, |
1389 | struct seq_file *sf) | 1389 | struct seq_file *sf) |
1390 | { | 1390 | { |
1391 | seq_printf(sf, "%u\n", cgroup_to_blkio_cgroup(cgrp)->cfq_weight); | 1391 | seq_printf(sf, "%u\n", cgroup_to_blkio_cgroup(cgrp)->cfq_weight); |
1392 | return 0; | 1392 | return 0; |
1393 | } | 1393 | } |
1394 | 1394 | ||
1395 | static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, | 1395 | static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, |
1396 | const char *buf) | 1396 | const char *buf) |
1397 | { | 1397 | { |
1398 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); | 1398 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); |
1399 | struct blkg_conf_ctx ctx; | 1399 | struct blkg_conf_ctx ctx; |
1400 | struct cfq_group *cfqg; | 1400 | struct cfq_group *cfqg; |
1401 | int ret; | 1401 | int ret; |
1402 | 1402 | ||
1403 | ret = blkg_conf_prep(blkcg, &blkio_policy_cfq, buf, &ctx); | 1403 | ret = blkg_conf_prep(blkcg, &blkio_policy_cfq, buf, &ctx); |
1404 | if (ret) | 1404 | if (ret) |
1405 | return ret; | 1405 | return ret; |
1406 | 1406 | ||
1407 | ret = -EINVAL; | 1407 | ret = -EINVAL; |
1408 | cfqg = blkg_to_cfqg(ctx.blkg); | 1408 | cfqg = blkg_to_cfqg(ctx.blkg); |
1409 | if (cfqg && (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && | 1409 | if (cfqg && (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && |
1410 | ctx.v <= CFQ_WEIGHT_MAX))) { | 1410 | ctx.v <= CFQ_WEIGHT_MAX))) { |
1411 | cfqg->dev_weight = ctx.v; | 1411 | cfqg->dev_weight = ctx.v; |
1412 | cfqg->new_weight = cfqg->dev_weight ?: blkcg->cfq_weight; | 1412 | cfqg->new_weight = cfqg->dev_weight ?: blkcg->cfq_weight; |
1413 | ret = 0; | 1413 | ret = 0; |
1414 | } | 1414 | } |
1415 | 1415 | ||
1416 | blkg_conf_finish(&ctx); | 1416 | blkg_conf_finish(&ctx); |
1417 | return ret; | 1417 | return ret; |
1418 | } | 1418 | } |
1419 | 1419 | ||
1420 | static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) | 1420 | static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) |
1421 | { | 1421 | { |
1422 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); | 1422 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); |
1423 | struct blkio_group *blkg; | 1423 | struct blkio_group *blkg; |
1424 | struct hlist_node *n; | 1424 | struct hlist_node *n; |
1425 | 1425 | ||
1426 | if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) | 1426 | if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) |
1427 | return -EINVAL; | 1427 | return -EINVAL; |
1428 | 1428 | ||
1429 | spin_lock_irq(&blkcg->lock); | 1429 | spin_lock_irq(&blkcg->lock); |
1430 | blkcg->cfq_weight = (unsigned int)val; | 1430 | blkcg->cfq_weight = (unsigned int)val; |
1431 | 1431 | ||
1432 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | 1432 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { |
1433 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); | 1433 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); |
1434 | 1434 | ||
1435 | if (cfqg && !cfqg->dev_weight) | 1435 | if (cfqg && !cfqg->dev_weight) |
1436 | cfqg->new_weight = blkcg->cfq_weight; | 1436 | cfqg->new_weight = blkcg->cfq_weight; |
1437 | } | 1437 | } |
1438 | 1438 | ||
1439 | spin_unlock_irq(&blkcg->lock); | 1439 | spin_unlock_irq(&blkcg->lock); |
1440 | return 0; | 1440 | return 0; |
1441 | } | 1441 | } |
1442 | 1442 | ||
1443 | static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft, | 1443 | static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft, |
1444 | struct seq_file *sf) | 1444 | struct seq_file *sf) |
1445 | { | 1445 | { |
1446 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); | 1446 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); |
1447 | 1447 | ||
1448 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkio_policy_cfq, | 1448 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkio_policy_cfq, |
1449 | cft->private, false); | 1449 | cft->private, false); |
1450 | return 0; | 1450 | return 0; |
1451 | } | 1451 | } |
1452 | 1452 | ||
1453 | static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft, | 1453 | static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft, |
1454 | struct seq_file *sf) | 1454 | struct seq_file *sf) |
1455 | { | 1455 | { |
1456 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); | 1456 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); |
1457 | 1457 | ||
1458 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkio_policy_cfq, | 1458 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkio_policy_cfq, |
1459 | cft->private, true); | 1459 | cft->private, true); |
1460 | return 0; | 1460 | return 0; |
1461 | } | 1461 | } |
1462 | 1462 | ||
1463 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 1463 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
1464 | static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, void *pdata, int off) | 1464 | static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, void *pdata, int off) |
1465 | { | 1465 | { |
1466 | struct cfq_group *cfqg = pdata; | 1466 | struct cfq_group *cfqg = pdata; |
1467 | u64 samples = blkg_stat_read(&cfqg->stats.avg_queue_size_samples); | 1467 | u64 samples = blkg_stat_read(&cfqg->stats.avg_queue_size_samples); |
1468 | u64 v = 0; | 1468 | u64 v = 0; |
1469 | 1469 | ||
1470 | if (samples) { | 1470 | if (samples) { |
1471 | v = blkg_stat_read(&cfqg->stats.avg_queue_size_sum); | 1471 | v = blkg_stat_read(&cfqg->stats.avg_queue_size_sum); |
1472 | do_div(v, samples); | 1472 | do_div(v, samples); |
1473 | } | 1473 | } |
1474 | __blkg_prfill_u64(sf, pdata, v); | 1474 | __blkg_prfill_u64(sf, pdata, v); |
1475 | return 0; | 1475 | return 0; |
1476 | } | 1476 | } |
1477 | 1477 | ||
1478 | /* print avg_queue_size */ | 1478 | /* print avg_queue_size */ |
1479 | static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft, | 1479 | static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft, |
1480 | struct seq_file *sf) | 1480 | struct seq_file *sf) |
1481 | { | 1481 | { |
1482 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); | 1482 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); |
1483 | 1483 | ||
1484 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size, | 1484 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size, |
1485 | &blkio_policy_cfq, 0, false); | 1485 | &blkio_policy_cfq, 0, false); |
1486 | return 0; | 1486 | return 0; |
1487 | } | 1487 | } |
1488 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ | 1488 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ |
1489 | 1489 | ||
1490 | static struct cftype cfq_blkcg_files[] = { | 1490 | static struct cftype cfq_blkcg_files[] = { |
1491 | { | 1491 | { |
1492 | .name = "weight_device", | 1492 | .name = "weight_device", |
1493 | .read_seq_string = cfqg_print_weight_device, | 1493 | .read_seq_string = cfqg_print_weight_device, |
1494 | .write_string = cfqg_set_weight_device, | 1494 | .write_string = cfqg_set_weight_device, |
1495 | .max_write_len = 256, | 1495 | .max_write_len = 256, |
1496 | }, | 1496 | }, |
1497 | { | 1497 | { |
1498 | .name = "weight", | 1498 | .name = "weight", |
1499 | .read_seq_string = cfq_print_weight, | 1499 | .read_seq_string = cfq_print_weight, |
1500 | .write_u64 = cfq_set_weight, | 1500 | .write_u64 = cfq_set_weight, |
1501 | }, | 1501 | }, |
1502 | { | 1502 | { |
1503 | .name = "time", | 1503 | .name = "time", |
1504 | .private = offsetof(struct cfq_group, stats.time), | 1504 | .private = offsetof(struct cfq_group, stats.time), |
1505 | .read_seq_string = cfqg_print_stat, | 1505 | .read_seq_string = cfqg_print_stat, |
1506 | }, | 1506 | }, |
1507 | { | 1507 | { |
1508 | .name = "sectors", | 1508 | .name = "sectors", |
1509 | .private = offsetof(struct cfq_group, stats.sectors), | 1509 | .private = offsetof(struct cfq_group, stats.sectors), |
1510 | .read_seq_string = cfqg_print_stat, | 1510 | .read_seq_string = cfqg_print_stat, |
1511 | }, | 1511 | }, |
1512 | { | 1512 | { |
1513 | .name = "io_service_bytes", | 1513 | .name = "io_service_bytes", |
1514 | .private = offsetof(struct cfq_group, stats.service_bytes), | 1514 | .private = offsetof(struct cfq_group, stats.service_bytes), |
1515 | .read_seq_string = cfqg_print_rwstat, | 1515 | .read_seq_string = cfqg_print_rwstat, |
1516 | }, | 1516 | }, |
1517 | { | 1517 | { |
1518 | .name = "io_serviced", | 1518 | .name = "io_serviced", |
1519 | .private = offsetof(struct cfq_group, stats.serviced), | 1519 | .private = offsetof(struct cfq_group, stats.serviced), |
1520 | .read_seq_string = cfqg_print_rwstat, | 1520 | .read_seq_string = cfqg_print_rwstat, |
1521 | }, | 1521 | }, |
1522 | { | 1522 | { |
1523 | .name = "io_service_time", | 1523 | .name = "io_service_time", |
1524 | .private = offsetof(struct cfq_group, stats.service_time), | 1524 | .private = offsetof(struct cfq_group, stats.service_time), |
1525 | .read_seq_string = cfqg_print_rwstat, | 1525 | .read_seq_string = cfqg_print_rwstat, |
1526 | }, | 1526 | }, |
1527 | { | 1527 | { |
1528 | .name = "io_wait_time", | 1528 | .name = "io_wait_time", |
1529 | .private = offsetof(struct cfq_group, stats.wait_time), | 1529 | .private = offsetof(struct cfq_group, stats.wait_time), |
1530 | .read_seq_string = cfqg_print_rwstat, | 1530 | .read_seq_string = cfqg_print_rwstat, |
1531 | }, | 1531 | }, |
1532 | { | 1532 | { |
1533 | .name = "io_merged", | 1533 | .name = "io_merged", |
1534 | .private = offsetof(struct cfq_group, stats.merged), | 1534 | .private = offsetof(struct cfq_group, stats.merged), |
1535 | .read_seq_string = cfqg_print_rwstat, | 1535 | .read_seq_string = cfqg_print_rwstat, |
1536 | }, | 1536 | }, |
1537 | { | 1537 | { |
1538 | .name = "io_queued", | 1538 | .name = "io_queued", |
1539 | .private = offsetof(struct cfq_group, stats.queued), | 1539 | .private = offsetof(struct cfq_group, stats.queued), |
1540 | .read_seq_string = cfqg_print_rwstat, | 1540 | .read_seq_string = cfqg_print_rwstat, |
1541 | }, | 1541 | }, |
1542 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 1542 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
1543 | { | 1543 | { |
1544 | .name = "avg_queue_size", | 1544 | .name = "avg_queue_size", |
1545 | .read_seq_string = cfqg_print_avg_queue_size, | 1545 | .read_seq_string = cfqg_print_avg_queue_size, |
1546 | }, | 1546 | }, |
1547 | { | 1547 | { |
1548 | .name = "group_wait_time", | 1548 | .name = "group_wait_time", |
1549 | .private = offsetof(struct cfq_group, stats.group_wait_time), | 1549 | .private = offsetof(struct cfq_group, stats.group_wait_time), |
1550 | .read_seq_string = cfqg_print_stat, | 1550 | .read_seq_string = cfqg_print_stat, |
1551 | }, | 1551 | }, |
1552 | { | 1552 | { |
1553 | .name = "idle_time", | 1553 | .name = "idle_time", |
1554 | .private = offsetof(struct cfq_group, stats.idle_time), | 1554 | .private = offsetof(struct cfq_group, stats.idle_time), |
1555 | .read_seq_string = cfqg_print_stat, | 1555 | .read_seq_string = cfqg_print_stat, |
1556 | }, | 1556 | }, |
1557 | { | 1557 | { |
1558 | .name = "empty_time", | 1558 | .name = "empty_time", |
1559 | .private = offsetof(struct cfq_group, stats.empty_time), | 1559 | .private = offsetof(struct cfq_group, stats.empty_time), |
1560 | .read_seq_string = cfqg_print_stat, | 1560 | .read_seq_string = cfqg_print_stat, |
1561 | }, | 1561 | }, |
1562 | { | 1562 | { |
1563 | .name = "dequeue", | 1563 | .name = "dequeue", |
1564 | .private = offsetof(struct cfq_group, stats.dequeue), | 1564 | .private = offsetof(struct cfq_group, stats.dequeue), |
1565 | .read_seq_string = cfqg_print_stat, | 1565 | .read_seq_string = cfqg_print_stat, |
1566 | }, | 1566 | }, |
1567 | { | 1567 | { |
1568 | .name = "unaccounted_time", | 1568 | .name = "unaccounted_time", |
1569 | .private = offsetof(struct cfq_group, stats.unaccounted_time), | 1569 | .private = offsetof(struct cfq_group, stats.unaccounted_time), |
1570 | .read_seq_string = cfqg_print_stat, | 1570 | .read_seq_string = cfqg_print_stat, |
1571 | }, | 1571 | }, |
1572 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ | 1572 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ |
1573 | { } /* terminate */ | 1573 | { } /* terminate */ |
1574 | }; | 1574 | }; |
1575 | #else /* GROUP_IOSCHED */ | 1575 | #else /* GROUP_IOSCHED */ |
1576 | static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, | 1576 | static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, |
1577 | struct blkio_cgroup *blkcg) | 1577 | struct blkio_cgroup *blkcg) |
1578 | { | 1578 | { |
1579 | return cfqd->root_group; | 1579 | return cfqd->root_group; |
1580 | } | 1580 | } |
1581 | 1581 | ||
1582 | static inline void | 1582 | static inline void |
1583 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { | 1583 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { |
1584 | cfqq->cfqg = cfqg; | 1584 | cfqq->cfqg = cfqg; |
1585 | } | 1585 | } |
1586 | 1586 | ||
1587 | #endif /* GROUP_IOSCHED */ | 1587 | #endif /* GROUP_IOSCHED */ |
1588 | 1588 | ||
1589 | /* | 1589 | /* |
1590 | * The cfqd->service_trees holds all pending cfq_queue's that have | 1590 | * The cfqd->service_trees holds all pending cfq_queue's that have |
1591 | * requests waiting to be processed. It is sorted in the order that | 1591 | * requests waiting to be processed. It is sorted in the order that |
1592 | * we will service the queues. | 1592 | * we will service the queues. |
1593 | */ | 1593 | */ |
1594 | static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 1594 | static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
1595 | bool add_front) | 1595 | bool add_front) |
1596 | { | 1596 | { |
1597 | struct rb_node **p, *parent; | 1597 | struct rb_node **p, *parent; |
1598 | struct cfq_queue *__cfqq; | 1598 | struct cfq_queue *__cfqq; |
1599 | unsigned long rb_key; | 1599 | unsigned long rb_key; |
1600 | struct cfq_rb_root *service_tree; | 1600 | struct cfq_rb_root *service_tree; |
1601 | int left; | 1601 | int left; |
1602 | int new_cfqq = 1; | 1602 | int new_cfqq = 1; |
1603 | 1603 | ||
1604 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), | 1604 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), |
1605 | cfqq_type(cfqq)); | 1605 | cfqq_type(cfqq)); |
1606 | if (cfq_class_idle(cfqq)) { | 1606 | if (cfq_class_idle(cfqq)) { |
1607 | rb_key = CFQ_IDLE_DELAY; | 1607 | rb_key = CFQ_IDLE_DELAY; |
1608 | parent = rb_last(&service_tree->rb); | 1608 | parent = rb_last(&service_tree->rb); |
1609 | if (parent && parent != &cfqq->rb_node) { | 1609 | if (parent && parent != &cfqq->rb_node) { |
1610 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | 1610 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); |
1611 | rb_key += __cfqq->rb_key; | 1611 | rb_key += __cfqq->rb_key; |
1612 | } else | 1612 | } else |
1613 | rb_key += jiffies; | 1613 | rb_key += jiffies; |
1614 | } else if (!add_front) { | 1614 | } else if (!add_front) { |
1615 | /* | 1615 | /* |
1616 | * Get our rb key offset. Subtract any residual slice | 1616 | * Get our rb key offset. Subtract any residual slice |
1617 | * value carried from last service. A negative resid | 1617 | * value carried from last service. A negative resid |
1618 | * count indicates slice overrun, and this should position | 1618 | * count indicates slice overrun, and this should position |
1619 | * the next service time further away in the tree. | 1619 | * the next service time further away in the tree. |
1620 | */ | 1620 | */ |
1621 | rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; | 1621 | rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; |
1622 | rb_key -= cfqq->slice_resid; | 1622 | rb_key -= cfqq->slice_resid; |
1623 | cfqq->slice_resid = 0; | 1623 | cfqq->slice_resid = 0; |
1624 | } else { | 1624 | } else { |
1625 | rb_key = -HZ; | 1625 | rb_key = -HZ; |
1626 | __cfqq = cfq_rb_first(service_tree); | 1626 | __cfqq = cfq_rb_first(service_tree); |
1627 | rb_key += __cfqq ? __cfqq->rb_key : jiffies; | 1627 | rb_key += __cfqq ? __cfqq->rb_key : jiffies; |
1628 | } | 1628 | } |
1629 | 1629 | ||
1630 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) { | 1630 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) { |
1631 | new_cfqq = 0; | 1631 | new_cfqq = 0; |
1632 | /* | 1632 | /* |
1633 | * same position, nothing more to do | 1633 | * same position, nothing more to do |
1634 | */ | 1634 | */ |
1635 | if (rb_key == cfqq->rb_key && | 1635 | if (rb_key == cfqq->rb_key && |
1636 | cfqq->service_tree == service_tree) | 1636 | cfqq->service_tree == service_tree) |
1637 | return; | 1637 | return; |
1638 | 1638 | ||
1639 | cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); | 1639 | cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); |
1640 | cfqq->service_tree = NULL; | 1640 | cfqq->service_tree = NULL; |
1641 | } | 1641 | } |
1642 | 1642 | ||
1643 | left = 1; | 1643 | left = 1; |
1644 | parent = NULL; | 1644 | parent = NULL; |
1645 | cfqq->service_tree = service_tree; | 1645 | cfqq->service_tree = service_tree; |
1646 | p = &service_tree->rb.rb_node; | 1646 | p = &service_tree->rb.rb_node; |
1647 | while (*p) { | 1647 | while (*p) { |
1648 | struct rb_node **n; | 1648 | struct rb_node **n; |
1649 | 1649 | ||
1650 | parent = *p; | 1650 | parent = *p; |
1651 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | 1651 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); |
1652 | 1652 | ||
1653 | /* | 1653 | /* |
1654 | * sort by key, that represents service time. | 1654 | * sort by key, that represents service time. |
1655 | */ | 1655 | */ |
1656 | if (time_before(rb_key, __cfqq->rb_key)) | 1656 | if (time_before(rb_key, __cfqq->rb_key)) |
1657 | n = &(*p)->rb_left; | 1657 | n = &(*p)->rb_left; |
1658 | else { | 1658 | else { |
1659 | n = &(*p)->rb_right; | 1659 | n = &(*p)->rb_right; |
1660 | left = 0; | 1660 | left = 0; |
1661 | } | 1661 | } |
1662 | 1662 | ||
1663 | p = n; | 1663 | p = n; |
1664 | } | 1664 | } |
1665 | 1665 | ||
1666 | if (left) | 1666 | if (left) |
1667 | service_tree->left = &cfqq->rb_node; | 1667 | service_tree->left = &cfqq->rb_node; |
1668 | 1668 | ||
1669 | cfqq->rb_key = rb_key; | 1669 | cfqq->rb_key = rb_key; |
1670 | rb_link_node(&cfqq->rb_node, parent, p); | 1670 | rb_link_node(&cfqq->rb_node, parent, p); |
1671 | rb_insert_color(&cfqq->rb_node, &service_tree->rb); | 1671 | rb_insert_color(&cfqq->rb_node, &service_tree->rb); |
1672 | service_tree->count++; | 1672 | service_tree->count++; |
1673 | if (add_front || !new_cfqq) | 1673 | if (add_front || !new_cfqq) |
1674 | return; | 1674 | return; |
1675 | cfq_group_notify_queue_add(cfqd, cfqq->cfqg); | 1675 | cfq_group_notify_queue_add(cfqd, cfqq->cfqg); |
1676 | } | 1676 | } |
1677 | 1677 | ||
1678 | static struct cfq_queue * | 1678 | static struct cfq_queue * |
1679 | cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root, | 1679 | cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root, |
1680 | sector_t sector, struct rb_node **ret_parent, | 1680 | sector_t sector, struct rb_node **ret_parent, |
1681 | struct rb_node ***rb_link) | 1681 | struct rb_node ***rb_link) |
1682 | { | 1682 | { |
1683 | struct rb_node **p, *parent; | 1683 | struct rb_node **p, *parent; |
1684 | struct cfq_queue *cfqq = NULL; | 1684 | struct cfq_queue *cfqq = NULL; |
1685 | 1685 | ||
1686 | parent = NULL; | 1686 | parent = NULL; |
1687 | p = &root->rb_node; | 1687 | p = &root->rb_node; |
1688 | while (*p) { | 1688 | while (*p) { |
1689 | struct rb_node **n; | 1689 | struct rb_node **n; |
1690 | 1690 | ||
1691 | parent = *p; | 1691 | parent = *p; |
1692 | cfqq = rb_entry(parent, struct cfq_queue, p_node); | 1692 | cfqq = rb_entry(parent, struct cfq_queue, p_node); |
1693 | 1693 | ||
1694 | /* | 1694 | /* |
1695 | * Sort strictly based on sector. Smallest to the left, | 1695 | * Sort strictly based on sector. Smallest to the left, |
1696 | * largest to the right. | 1696 | * largest to the right. |
1697 | */ | 1697 | */ |
1698 | if (sector > blk_rq_pos(cfqq->next_rq)) | 1698 | if (sector > blk_rq_pos(cfqq->next_rq)) |
1699 | n = &(*p)->rb_right; | 1699 | n = &(*p)->rb_right; |
1700 | else if (sector < blk_rq_pos(cfqq->next_rq)) | 1700 | else if (sector < blk_rq_pos(cfqq->next_rq)) |
1701 | n = &(*p)->rb_left; | 1701 | n = &(*p)->rb_left; |
1702 | else | 1702 | else |
1703 | break; | 1703 | break; |
1704 | p = n; | 1704 | p = n; |
1705 | cfqq = NULL; | 1705 | cfqq = NULL; |
1706 | } | 1706 | } |
1707 | 1707 | ||
1708 | *ret_parent = parent; | 1708 | *ret_parent = parent; |
1709 | if (rb_link) | 1709 | if (rb_link) |
1710 | *rb_link = p; | 1710 | *rb_link = p; |
1711 | return cfqq; | 1711 | return cfqq; |
1712 | } | 1712 | } |
1713 | 1713 | ||
1714 | static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 1714 | static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
1715 | { | 1715 | { |
1716 | struct rb_node **p, *parent; | 1716 | struct rb_node **p, *parent; |
1717 | struct cfq_queue *__cfqq; | 1717 | struct cfq_queue *__cfqq; |
1718 | 1718 | ||
1719 | if (cfqq->p_root) { | 1719 | if (cfqq->p_root) { |
1720 | rb_erase(&cfqq->p_node, cfqq->p_root); | 1720 | rb_erase(&cfqq->p_node, cfqq->p_root); |
1721 | cfqq->p_root = NULL; | 1721 | cfqq->p_root = NULL; |
1722 | } | 1722 | } |
1723 | 1723 | ||
1724 | if (cfq_class_idle(cfqq)) | 1724 | if (cfq_class_idle(cfqq)) |
1725 | return; | 1725 | return; |
1726 | if (!cfqq->next_rq) | 1726 | if (!cfqq->next_rq) |
1727 | return; | 1727 | return; |
1728 | 1728 | ||
1729 | cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio]; | 1729 | cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio]; |
1730 | __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, | 1730 | __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, |
1731 | blk_rq_pos(cfqq->next_rq), &parent, &p); | 1731 | blk_rq_pos(cfqq->next_rq), &parent, &p); |
1732 | if (!__cfqq) { | 1732 | if (!__cfqq) { |
1733 | rb_link_node(&cfqq->p_node, parent, p); | 1733 | rb_link_node(&cfqq->p_node, parent, p); |
1734 | rb_insert_color(&cfqq->p_node, cfqq->p_root); | 1734 | rb_insert_color(&cfqq->p_node, cfqq->p_root); |
1735 | } else | 1735 | } else |
1736 | cfqq->p_root = NULL; | 1736 | cfqq->p_root = NULL; |
1737 | } | 1737 | } |
1738 | 1738 | ||
1739 | /* | 1739 | /* |
1740 | * Update cfqq's position in the service tree. | 1740 | * Update cfqq's position in the service tree. |
1741 | */ | 1741 | */ |
1742 | static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 1742 | static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
1743 | { | 1743 | { |
1744 | /* | 1744 | /* |
1745 | * Resorting requires the cfqq to be on the RR list already. | 1745 | * Resorting requires the cfqq to be on the RR list already. |
1746 | */ | 1746 | */ |
1747 | if (cfq_cfqq_on_rr(cfqq)) { | 1747 | if (cfq_cfqq_on_rr(cfqq)) { |
1748 | cfq_service_tree_add(cfqd, cfqq, 0); | 1748 | cfq_service_tree_add(cfqd, cfqq, 0); |
1749 | cfq_prio_tree_add(cfqd, cfqq); | 1749 | cfq_prio_tree_add(cfqd, cfqq); |
1750 | } | 1750 | } |
1751 | } | 1751 | } |
1752 | 1752 | ||
1753 | /* | 1753 | /* |
1754 | * add to busy list of queues for service, trying to be fair in ordering | 1754 | * add to busy list of queues for service, trying to be fair in ordering |
1755 | * the pending list according to last request service | 1755 | * the pending list according to last request service |
1756 | */ | 1756 | */ |
1757 | static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 1757 | static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
1758 | { | 1758 | { |
1759 | cfq_log_cfqq(cfqd, cfqq, "add_to_rr"); | 1759 | cfq_log_cfqq(cfqd, cfqq, "add_to_rr"); |
1760 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 1760 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
1761 | cfq_mark_cfqq_on_rr(cfqq); | 1761 | cfq_mark_cfqq_on_rr(cfqq); |
1762 | cfqd->busy_queues++; | 1762 | cfqd->busy_queues++; |
1763 | if (cfq_cfqq_sync(cfqq)) | 1763 | if (cfq_cfqq_sync(cfqq)) |
1764 | cfqd->busy_sync_queues++; | 1764 | cfqd->busy_sync_queues++; |
1765 | 1765 | ||
1766 | cfq_resort_rr_list(cfqd, cfqq); | 1766 | cfq_resort_rr_list(cfqd, cfqq); |
1767 | } | 1767 | } |
1768 | 1768 | ||
1769 | /* | 1769 | /* |
1770 | * Called when the cfqq no longer has requests pending, remove it from | 1770 | * Called when the cfqq no longer has requests pending, remove it from |
1771 | * the service tree. | 1771 | * the service tree. |
1772 | */ | 1772 | */ |
1773 | static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 1773 | static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
1774 | { | 1774 | { |
1775 | cfq_log_cfqq(cfqd, cfqq, "del_from_rr"); | 1775 | cfq_log_cfqq(cfqd, cfqq, "del_from_rr"); |
1776 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); | 1776 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); |
1777 | cfq_clear_cfqq_on_rr(cfqq); | 1777 | cfq_clear_cfqq_on_rr(cfqq); |
1778 | 1778 | ||
1779 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) { | 1779 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) { |
1780 | cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); | 1780 | cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); |
1781 | cfqq->service_tree = NULL; | 1781 | cfqq->service_tree = NULL; |
1782 | } | 1782 | } |
1783 | if (cfqq->p_root) { | 1783 | if (cfqq->p_root) { |
1784 | rb_erase(&cfqq->p_node, cfqq->p_root); | 1784 | rb_erase(&cfqq->p_node, cfqq->p_root); |
1785 | cfqq->p_root = NULL; | 1785 | cfqq->p_root = NULL; |
1786 | } | 1786 | } |
1787 | 1787 | ||
1788 | cfq_group_notify_queue_del(cfqd, cfqq->cfqg); | 1788 | cfq_group_notify_queue_del(cfqd, cfqq->cfqg); |
1789 | BUG_ON(!cfqd->busy_queues); | 1789 | BUG_ON(!cfqd->busy_queues); |
1790 | cfqd->busy_queues--; | 1790 | cfqd->busy_queues--; |
1791 | if (cfq_cfqq_sync(cfqq)) | 1791 | if (cfq_cfqq_sync(cfqq)) |
1792 | cfqd->busy_sync_queues--; | 1792 | cfqd->busy_sync_queues--; |
1793 | } | 1793 | } |
1794 | 1794 | ||
1795 | /* | 1795 | /* |
1796 | * rb tree support functions | 1796 | * rb tree support functions |
1797 | */ | 1797 | */ |
1798 | static void cfq_del_rq_rb(struct request *rq) | 1798 | static void cfq_del_rq_rb(struct request *rq) |
1799 | { | 1799 | { |
1800 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 1800 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
1801 | const int sync = rq_is_sync(rq); | 1801 | const int sync = rq_is_sync(rq); |
1802 | 1802 | ||
1803 | BUG_ON(!cfqq->queued[sync]); | 1803 | BUG_ON(!cfqq->queued[sync]); |
1804 | cfqq->queued[sync]--; | 1804 | cfqq->queued[sync]--; |
1805 | 1805 | ||
1806 | elv_rb_del(&cfqq->sort_list, rq); | 1806 | elv_rb_del(&cfqq->sort_list, rq); |
1807 | 1807 | ||
1808 | if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) { | 1808 | if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) { |
1809 | /* | 1809 | /* |
1810 | * Queue will be deleted from service tree when we actually | 1810 | * Queue will be deleted from service tree when we actually |
1811 | * expire it later. Right now just remove it from prio tree | 1811 | * expire it later. Right now just remove it from prio tree |
1812 | * as it is empty. | 1812 | * as it is empty. |
1813 | */ | 1813 | */ |
1814 | if (cfqq->p_root) { | 1814 | if (cfqq->p_root) { |
1815 | rb_erase(&cfqq->p_node, cfqq->p_root); | 1815 | rb_erase(&cfqq->p_node, cfqq->p_root); |
1816 | cfqq->p_root = NULL; | 1816 | cfqq->p_root = NULL; |
1817 | } | 1817 | } |
1818 | } | 1818 | } |
1819 | } | 1819 | } |
1820 | 1820 | ||
1821 | static void cfq_add_rq_rb(struct request *rq) | 1821 | static void cfq_add_rq_rb(struct request *rq) |
1822 | { | 1822 | { |
1823 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 1823 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
1824 | struct cfq_data *cfqd = cfqq->cfqd; | 1824 | struct cfq_data *cfqd = cfqq->cfqd; |
1825 | struct request *prev; | 1825 | struct request *prev; |
1826 | 1826 | ||
1827 | cfqq->queued[rq_is_sync(rq)]++; | 1827 | cfqq->queued[rq_is_sync(rq)]++; |
1828 | 1828 | ||
1829 | elv_rb_add(&cfqq->sort_list, rq); | 1829 | elv_rb_add(&cfqq->sort_list, rq); |
1830 | 1830 | ||
1831 | if (!cfq_cfqq_on_rr(cfqq)) | 1831 | if (!cfq_cfqq_on_rr(cfqq)) |
1832 | cfq_add_cfqq_rr(cfqd, cfqq); | 1832 | cfq_add_cfqq_rr(cfqd, cfqq); |
1833 | 1833 | ||
1834 | /* | 1834 | /* |
1835 | * check if this request is a better next-serve candidate | 1835 | * check if this request is a better next-serve candidate |
1836 | */ | 1836 | */ |
1837 | prev = cfqq->next_rq; | 1837 | prev = cfqq->next_rq; |
1838 | cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position); | 1838 | cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position); |
1839 | 1839 | ||
1840 | /* | 1840 | /* |
1841 | * adjust priority tree position, if ->next_rq changes | 1841 | * adjust priority tree position, if ->next_rq changes |
1842 | */ | 1842 | */ |
1843 | if (prev != cfqq->next_rq) | 1843 | if (prev != cfqq->next_rq) |
1844 | cfq_prio_tree_add(cfqd, cfqq); | 1844 | cfq_prio_tree_add(cfqd, cfqq); |
1845 | 1845 | ||
1846 | BUG_ON(!cfqq->next_rq); | 1846 | BUG_ON(!cfqq->next_rq); |
1847 | } | 1847 | } |
1848 | 1848 | ||
1849 | static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) | 1849 | static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) |
1850 | { | 1850 | { |
1851 | elv_rb_del(&cfqq->sort_list, rq); | 1851 | elv_rb_del(&cfqq->sort_list, rq); |
1852 | cfqq->queued[rq_is_sync(rq)]--; | 1852 | cfqq->queued[rq_is_sync(rq)]--; |
1853 | cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); | 1853 | cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); |
1854 | cfq_add_rq_rb(rq); | 1854 | cfq_add_rq_rb(rq); |
1855 | cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group, | 1855 | cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group, |
1856 | rq->cmd_flags); | 1856 | rq->cmd_flags); |
1857 | } | 1857 | } |
1858 | 1858 | ||
1859 | static struct request * | 1859 | static struct request * |
1860 | cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) | 1860 | cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) |
1861 | { | 1861 | { |
1862 | struct task_struct *tsk = current; | 1862 | struct task_struct *tsk = current; |
1863 | struct cfq_io_cq *cic; | 1863 | struct cfq_io_cq *cic; |
1864 | struct cfq_queue *cfqq; | 1864 | struct cfq_queue *cfqq; |
1865 | 1865 | ||
1866 | cic = cfq_cic_lookup(cfqd, tsk->io_context); | 1866 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
1867 | if (!cic) | 1867 | if (!cic) |
1868 | return NULL; | 1868 | return NULL; |
1869 | 1869 | ||
1870 | cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); | 1870 | cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); |
1871 | if (cfqq) { | 1871 | if (cfqq) { |
1872 | sector_t sector = bio->bi_sector + bio_sectors(bio); | 1872 | sector_t sector = bio->bi_sector + bio_sectors(bio); |
1873 | 1873 | ||
1874 | return elv_rb_find(&cfqq->sort_list, sector); | 1874 | return elv_rb_find(&cfqq->sort_list, sector); |
1875 | } | 1875 | } |
1876 | 1876 | ||
1877 | return NULL; | 1877 | return NULL; |
1878 | } | 1878 | } |
1879 | 1879 | ||
1880 | static void cfq_activate_request(struct request_queue *q, struct request *rq) | 1880 | static void cfq_activate_request(struct request_queue *q, struct request *rq) |
1881 | { | 1881 | { |
1882 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1882 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1883 | 1883 | ||
1884 | cfqd->rq_in_driver++; | 1884 | cfqd->rq_in_driver++; |
1885 | cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", | 1885 | cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", |
1886 | cfqd->rq_in_driver); | 1886 | cfqd->rq_in_driver); |
1887 | 1887 | ||
1888 | cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); | 1888 | cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); |
1889 | } | 1889 | } |
1890 | 1890 | ||
1891 | static void cfq_deactivate_request(struct request_queue *q, struct request *rq) | 1891 | static void cfq_deactivate_request(struct request_queue *q, struct request *rq) |
1892 | { | 1892 | { |
1893 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1893 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1894 | 1894 | ||
1895 | WARN_ON(!cfqd->rq_in_driver); | 1895 | WARN_ON(!cfqd->rq_in_driver); |
1896 | cfqd->rq_in_driver--; | 1896 | cfqd->rq_in_driver--; |
1897 | cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", | 1897 | cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", |
1898 | cfqd->rq_in_driver); | 1898 | cfqd->rq_in_driver); |
1899 | } | 1899 | } |
1900 | 1900 | ||
1901 | static void cfq_remove_request(struct request *rq) | 1901 | static void cfq_remove_request(struct request *rq) |
1902 | { | 1902 | { |
1903 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 1903 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
1904 | 1904 | ||
1905 | if (cfqq->next_rq == rq) | 1905 | if (cfqq->next_rq == rq) |
1906 | cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq); | 1906 | cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq); |
1907 | 1907 | ||
1908 | list_del_init(&rq->queuelist); | 1908 | list_del_init(&rq->queuelist); |
1909 | cfq_del_rq_rb(rq); | 1909 | cfq_del_rq_rb(rq); |
1910 | 1910 | ||
1911 | cfqq->cfqd->rq_queued--; | 1911 | cfqq->cfqd->rq_queued--; |
1912 | cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); | 1912 | cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); |
1913 | if (rq->cmd_flags & REQ_PRIO) { | 1913 | if (rq->cmd_flags & REQ_PRIO) { |
1914 | WARN_ON(!cfqq->prio_pending); | 1914 | WARN_ON(!cfqq->prio_pending); |
1915 | cfqq->prio_pending--; | 1915 | cfqq->prio_pending--; |
1916 | } | 1916 | } |
1917 | } | 1917 | } |
1918 | 1918 | ||
1919 | static int cfq_merge(struct request_queue *q, struct request **req, | 1919 | static int cfq_merge(struct request_queue *q, struct request **req, |
1920 | struct bio *bio) | 1920 | struct bio *bio) |
1921 | { | 1921 | { |
1922 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1922 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1923 | struct request *__rq; | 1923 | struct request *__rq; |
1924 | 1924 | ||
1925 | __rq = cfq_find_rq_fmerge(cfqd, bio); | 1925 | __rq = cfq_find_rq_fmerge(cfqd, bio); |
1926 | if (__rq && elv_rq_merge_ok(__rq, bio)) { | 1926 | if (__rq && elv_rq_merge_ok(__rq, bio)) { |
1927 | *req = __rq; | 1927 | *req = __rq; |
1928 | return ELEVATOR_FRONT_MERGE; | 1928 | return ELEVATOR_FRONT_MERGE; |
1929 | } | 1929 | } |
1930 | 1930 | ||
1931 | return ELEVATOR_NO_MERGE; | 1931 | return ELEVATOR_NO_MERGE; |
1932 | } | 1932 | } |
1933 | 1933 | ||
1934 | static void cfq_merged_request(struct request_queue *q, struct request *req, | 1934 | static void cfq_merged_request(struct request_queue *q, struct request *req, |
1935 | int type) | 1935 | int type) |
1936 | { | 1936 | { |
1937 | if (type == ELEVATOR_FRONT_MERGE) { | 1937 | if (type == ELEVATOR_FRONT_MERGE) { |
1938 | struct cfq_queue *cfqq = RQ_CFQQ(req); | 1938 | struct cfq_queue *cfqq = RQ_CFQQ(req); |
1939 | 1939 | ||
1940 | cfq_reposition_rq_rb(cfqq, req); | 1940 | cfq_reposition_rq_rb(cfqq, req); |
1941 | } | 1941 | } |
1942 | } | 1942 | } |
1943 | 1943 | ||
1944 | static void cfq_bio_merged(struct request_queue *q, struct request *req, | 1944 | static void cfq_bio_merged(struct request_queue *q, struct request *req, |
1945 | struct bio *bio) | 1945 | struct bio *bio) |
1946 | { | 1946 | { |
1947 | cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_rw); | 1947 | cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_rw); |
1948 | } | 1948 | } |
1949 | 1949 | ||
1950 | static void | 1950 | static void |
1951 | cfq_merged_requests(struct request_queue *q, struct request *rq, | 1951 | cfq_merged_requests(struct request_queue *q, struct request *rq, |
1952 | struct request *next) | 1952 | struct request *next) |
1953 | { | 1953 | { |
1954 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 1954 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
1955 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1955 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1956 | 1956 | ||
1957 | /* | 1957 | /* |
1958 | * reposition in fifo if next is older than rq | 1958 | * reposition in fifo if next is older than rq |
1959 | */ | 1959 | */ |
1960 | if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && | 1960 | if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && |
1961 | time_before(rq_fifo_time(next), rq_fifo_time(rq))) { | 1961 | time_before(rq_fifo_time(next), rq_fifo_time(rq))) { |
1962 | list_move(&rq->queuelist, &next->queuelist); | 1962 | list_move(&rq->queuelist, &next->queuelist); |
1963 | rq_set_fifo_time(rq, rq_fifo_time(next)); | 1963 | rq_set_fifo_time(rq, rq_fifo_time(next)); |
1964 | } | 1964 | } |
1965 | 1965 | ||
1966 | if (cfqq->next_rq == next) | 1966 | if (cfqq->next_rq == next) |
1967 | cfqq->next_rq = rq; | 1967 | cfqq->next_rq = rq; |
1968 | cfq_remove_request(next); | 1968 | cfq_remove_request(next); |
1969 | cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags); | 1969 | cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags); |
1970 | 1970 | ||
1971 | cfqq = RQ_CFQQ(next); | 1971 | cfqq = RQ_CFQQ(next); |
1972 | /* | 1972 | /* |
1973 | * all requests of this queue are merged to other queues, delete it | 1973 | * all requests of this queue are merged to other queues, delete it |
1974 | * from the service tree. If it's the active_queue, | 1974 | * from the service tree. If it's the active_queue, |
1975 | * cfq_dispatch_requests() will choose to expire it or do idle | 1975 | * cfq_dispatch_requests() will choose to expire it or do idle |
1976 | */ | 1976 | */ |
1977 | if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) && | 1977 | if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) && |
1978 | cfqq != cfqd->active_queue) | 1978 | cfqq != cfqd->active_queue) |
1979 | cfq_del_cfqq_rr(cfqd, cfqq); | 1979 | cfq_del_cfqq_rr(cfqd, cfqq); |
1980 | } | 1980 | } |
1981 | 1981 | ||
1982 | static int cfq_allow_merge(struct request_queue *q, struct request *rq, | 1982 | static int cfq_allow_merge(struct request_queue *q, struct request *rq, |
1983 | struct bio *bio) | 1983 | struct bio *bio) |
1984 | { | 1984 | { |
1985 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1985 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1986 | struct cfq_io_cq *cic; | 1986 | struct cfq_io_cq *cic; |
1987 | struct cfq_queue *cfqq; | 1987 | struct cfq_queue *cfqq; |
1988 | 1988 | ||
1989 | /* | 1989 | /* |
1990 | * Disallow merge of a sync bio into an async request. | 1990 | * Disallow merge of a sync bio into an async request. |
1991 | */ | 1991 | */ |
1992 | if (cfq_bio_sync(bio) && !rq_is_sync(rq)) | 1992 | if (cfq_bio_sync(bio) && !rq_is_sync(rq)) |
1993 | return false; | 1993 | return false; |
1994 | 1994 | ||
1995 | /* | 1995 | /* |
1996 | * Lookup the cfqq that this bio will be queued with and allow | 1996 | * Lookup the cfqq that this bio will be queued with and allow |
1997 | * merge only if rq is queued there. | 1997 | * merge only if rq is queued there. |
1998 | */ | 1998 | */ |
1999 | cic = cfq_cic_lookup(cfqd, current->io_context); | 1999 | cic = cfq_cic_lookup(cfqd, current->io_context); |
2000 | if (!cic) | 2000 | if (!cic) |
2001 | return false; | 2001 | return false; |
2002 | 2002 | ||
2003 | cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); | 2003 | cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); |
2004 | return cfqq == RQ_CFQQ(rq); | 2004 | return cfqq == RQ_CFQQ(rq); |
2005 | } | 2005 | } |
2006 | 2006 | ||
2007 | static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2007 | static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
2008 | { | 2008 | { |
2009 | del_timer(&cfqd->idle_slice_timer); | 2009 | del_timer(&cfqd->idle_slice_timer); |
2010 | cfqg_stats_update_idle_time(cfqq->cfqg); | 2010 | cfqg_stats_update_idle_time(cfqq->cfqg); |
2011 | } | 2011 | } |
2012 | 2012 | ||
2013 | static void __cfq_set_active_queue(struct cfq_data *cfqd, | 2013 | static void __cfq_set_active_queue(struct cfq_data *cfqd, |
2014 | struct cfq_queue *cfqq) | 2014 | struct cfq_queue *cfqq) |
2015 | { | 2015 | { |
2016 | if (cfqq) { | 2016 | if (cfqq) { |
2017 | cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", | 2017 | cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", |
2018 | cfqd->serving_prio, cfqd->serving_type); | 2018 | cfqd->serving_prio, cfqd->serving_type); |
2019 | cfqg_stats_update_avg_queue_size(cfqq->cfqg); | 2019 | cfqg_stats_update_avg_queue_size(cfqq->cfqg); |
2020 | cfqq->slice_start = 0; | 2020 | cfqq->slice_start = 0; |
2021 | cfqq->dispatch_start = jiffies; | 2021 | cfqq->dispatch_start = jiffies; |
2022 | cfqq->allocated_slice = 0; | 2022 | cfqq->allocated_slice = 0; |
2023 | cfqq->slice_end = 0; | 2023 | cfqq->slice_end = 0; |
2024 | cfqq->slice_dispatch = 0; | 2024 | cfqq->slice_dispatch = 0; |
2025 | cfqq->nr_sectors = 0; | 2025 | cfqq->nr_sectors = 0; |
2026 | 2026 | ||
2027 | cfq_clear_cfqq_wait_request(cfqq); | 2027 | cfq_clear_cfqq_wait_request(cfqq); |
2028 | cfq_clear_cfqq_must_dispatch(cfqq); | 2028 | cfq_clear_cfqq_must_dispatch(cfqq); |
2029 | cfq_clear_cfqq_must_alloc_slice(cfqq); | 2029 | cfq_clear_cfqq_must_alloc_slice(cfqq); |
2030 | cfq_clear_cfqq_fifo_expire(cfqq); | 2030 | cfq_clear_cfqq_fifo_expire(cfqq); |
2031 | cfq_mark_cfqq_slice_new(cfqq); | 2031 | cfq_mark_cfqq_slice_new(cfqq); |
2032 | 2032 | ||
2033 | cfq_del_timer(cfqd, cfqq); | 2033 | cfq_del_timer(cfqd, cfqq); |
2034 | } | 2034 | } |
2035 | 2035 | ||
2036 | cfqd->active_queue = cfqq; | 2036 | cfqd->active_queue = cfqq; |
2037 | } | 2037 | } |
2038 | 2038 | ||
2039 | /* | 2039 | /* |
2040 | * current cfqq expired its slice (or was too idle), select new one | 2040 | * current cfqq expired its slice (or was too idle), select new one |
2041 | */ | 2041 | */ |
2042 | static void | 2042 | static void |
2043 | __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 2043 | __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
2044 | bool timed_out) | 2044 | bool timed_out) |
2045 | { | 2045 | { |
2046 | cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); | 2046 | cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); |
2047 | 2047 | ||
2048 | if (cfq_cfqq_wait_request(cfqq)) | 2048 | if (cfq_cfqq_wait_request(cfqq)) |
2049 | cfq_del_timer(cfqd, cfqq); | 2049 | cfq_del_timer(cfqd, cfqq); |
2050 | 2050 | ||
2051 | cfq_clear_cfqq_wait_request(cfqq); | 2051 | cfq_clear_cfqq_wait_request(cfqq); |
2052 | cfq_clear_cfqq_wait_busy(cfqq); | 2052 | cfq_clear_cfqq_wait_busy(cfqq); |
2053 | 2053 | ||
2054 | /* | 2054 | /* |
2055 | * If this cfqq is shared between multiple processes, check to | 2055 | * If this cfqq is shared between multiple processes, check to |
2056 | * make sure that those processes are still issuing I/Os within | 2056 | * make sure that those processes are still issuing I/Os within |
2057 | * the mean seek distance. If not, it may be time to break the | 2057 | * the mean seek distance. If not, it may be time to break the |
2058 | * queues apart again. | 2058 | * queues apart again. |
2059 | */ | 2059 | */ |
2060 | if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq)) | 2060 | if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq)) |
2061 | cfq_mark_cfqq_split_coop(cfqq); | 2061 | cfq_mark_cfqq_split_coop(cfqq); |
2062 | 2062 | ||
2063 | /* | 2063 | /* |
2064 | * store what was left of this slice, if the queue idled/timed out | 2064 | * store what was left of this slice, if the queue idled/timed out |
2065 | */ | 2065 | */ |
2066 | if (timed_out) { | 2066 | if (timed_out) { |
2067 | if (cfq_cfqq_slice_new(cfqq)) | 2067 | if (cfq_cfqq_slice_new(cfqq)) |
2068 | cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); | 2068 | cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); |
2069 | else | 2069 | else |
2070 | cfqq->slice_resid = cfqq->slice_end - jiffies; | 2070 | cfqq->slice_resid = cfqq->slice_end - jiffies; |
2071 | cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); | 2071 | cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); |
2072 | } | 2072 | } |
2073 | 2073 | ||
2074 | cfq_group_served(cfqd, cfqq->cfqg, cfqq); | 2074 | cfq_group_served(cfqd, cfqq->cfqg, cfqq); |
2075 | 2075 | ||
2076 | if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) | 2076 | if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) |
2077 | cfq_del_cfqq_rr(cfqd, cfqq); | 2077 | cfq_del_cfqq_rr(cfqd, cfqq); |
2078 | 2078 | ||
2079 | cfq_resort_rr_list(cfqd, cfqq); | 2079 | cfq_resort_rr_list(cfqd, cfqq); |
2080 | 2080 | ||
2081 | if (cfqq == cfqd->active_queue) | 2081 | if (cfqq == cfqd->active_queue) |
2082 | cfqd->active_queue = NULL; | 2082 | cfqd->active_queue = NULL; |
2083 | 2083 | ||
2084 | if (cfqd->active_cic) { | 2084 | if (cfqd->active_cic) { |
2085 | put_io_context(cfqd->active_cic->icq.ioc); | 2085 | put_io_context(cfqd->active_cic->icq.ioc); |
2086 | cfqd->active_cic = NULL; | 2086 | cfqd->active_cic = NULL; |
2087 | } | 2087 | } |
2088 | } | 2088 | } |
2089 | 2089 | ||
2090 | static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) | 2090 | static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) |
2091 | { | 2091 | { |
2092 | struct cfq_queue *cfqq = cfqd->active_queue; | 2092 | struct cfq_queue *cfqq = cfqd->active_queue; |
2093 | 2093 | ||
2094 | if (cfqq) | 2094 | if (cfqq) |
2095 | __cfq_slice_expired(cfqd, cfqq, timed_out); | 2095 | __cfq_slice_expired(cfqd, cfqq, timed_out); |
2096 | } | 2096 | } |
2097 | 2097 | ||
2098 | /* | 2098 | /* |
2099 | * Get next queue for service. Unless we have a queue preemption, | 2099 | * Get next queue for service. Unless we have a queue preemption, |
2100 | * we'll simply select the first cfqq in the service tree. | 2100 | * we'll simply select the first cfqq in the service tree. |
2101 | */ | 2101 | */ |
2102 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) | 2102 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) |
2103 | { | 2103 | { |
2104 | struct cfq_rb_root *service_tree = | 2104 | struct cfq_rb_root *service_tree = |
2105 | service_tree_for(cfqd->serving_group, cfqd->serving_prio, | 2105 | service_tree_for(cfqd->serving_group, cfqd->serving_prio, |
2106 | cfqd->serving_type); | 2106 | cfqd->serving_type); |
2107 | 2107 | ||
2108 | if (!cfqd->rq_queued) | 2108 | if (!cfqd->rq_queued) |
2109 | return NULL; | 2109 | return NULL; |
2110 | 2110 | ||
2111 | /* There is nothing to dispatch */ | 2111 | /* There is nothing to dispatch */ |
2112 | if (!service_tree) | 2112 | if (!service_tree) |
2113 | return NULL; | 2113 | return NULL; |
2114 | if (RB_EMPTY_ROOT(&service_tree->rb)) | 2114 | if (RB_EMPTY_ROOT(&service_tree->rb)) |
2115 | return NULL; | 2115 | return NULL; |
2116 | return cfq_rb_first(service_tree); | 2116 | return cfq_rb_first(service_tree); |
2117 | } | 2117 | } |
2118 | 2118 | ||
2119 | static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) | 2119 | static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) |
2120 | { | 2120 | { |
2121 | struct cfq_group *cfqg; | 2121 | struct cfq_group *cfqg; |
2122 | struct cfq_queue *cfqq; | 2122 | struct cfq_queue *cfqq; |
2123 | int i, j; | 2123 | int i, j; |
2124 | struct cfq_rb_root *st; | 2124 | struct cfq_rb_root *st; |
2125 | 2125 | ||
2126 | if (!cfqd->rq_queued) | 2126 | if (!cfqd->rq_queued) |
2127 | return NULL; | 2127 | return NULL; |
2128 | 2128 | ||
2129 | cfqg = cfq_get_next_cfqg(cfqd); | 2129 | cfqg = cfq_get_next_cfqg(cfqd); |
2130 | if (!cfqg) | 2130 | if (!cfqg) |
2131 | return NULL; | 2131 | return NULL; |
2132 | 2132 | ||
2133 | for_each_cfqg_st(cfqg, i, j, st) | 2133 | for_each_cfqg_st(cfqg, i, j, st) |
2134 | if ((cfqq = cfq_rb_first(st)) != NULL) | 2134 | if ((cfqq = cfq_rb_first(st)) != NULL) |
2135 | return cfqq; | 2135 | return cfqq; |
2136 | return NULL; | 2136 | return NULL; |
2137 | } | 2137 | } |
2138 | 2138 | ||
2139 | /* | 2139 | /* |
2140 | * Get and set a new active queue for service. | 2140 | * Get and set a new active queue for service. |
2141 | */ | 2141 | */ |
2142 | static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd, | 2142 | static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd, |
2143 | struct cfq_queue *cfqq) | 2143 | struct cfq_queue *cfqq) |
2144 | { | 2144 | { |
2145 | if (!cfqq) | 2145 | if (!cfqq) |
2146 | cfqq = cfq_get_next_queue(cfqd); | 2146 | cfqq = cfq_get_next_queue(cfqd); |
2147 | 2147 | ||
2148 | __cfq_set_active_queue(cfqd, cfqq); | 2148 | __cfq_set_active_queue(cfqd, cfqq); |
2149 | return cfqq; | 2149 | return cfqq; |
2150 | } | 2150 | } |
2151 | 2151 | ||
2152 | static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, | 2152 | static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, |
2153 | struct request *rq) | 2153 | struct request *rq) |
2154 | { | 2154 | { |
2155 | if (blk_rq_pos(rq) >= cfqd->last_position) | 2155 | if (blk_rq_pos(rq) >= cfqd->last_position) |
2156 | return blk_rq_pos(rq) - cfqd->last_position; | 2156 | return blk_rq_pos(rq) - cfqd->last_position; |
2157 | else | 2157 | else |
2158 | return cfqd->last_position - blk_rq_pos(rq); | 2158 | return cfqd->last_position - blk_rq_pos(rq); |
2159 | } | 2159 | } |
2160 | 2160 | ||
2161 | static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 2161 | static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
2162 | struct request *rq) | 2162 | struct request *rq) |
2163 | { | 2163 | { |
2164 | return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR; | 2164 | return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR; |
2165 | } | 2165 | } |
2166 | 2166 | ||
2167 | static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, | 2167 | static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, |
2168 | struct cfq_queue *cur_cfqq) | 2168 | struct cfq_queue *cur_cfqq) |
2169 | { | 2169 | { |
2170 | struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio]; | 2170 | struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio]; |
2171 | struct rb_node *parent, *node; | 2171 | struct rb_node *parent, *node; |
2172 | struct cfq_queue *__cfqq; | 2172 | struct cfq_queue *__cfqq; |
2173 | sector_t sector = cfqd->last_position; | 2173 | sector_t sector = cfqd->last_position; |
2174 | 2174 | ||
2175 | if (RB_EMPTY_ROOT(root)) | 2175 | if (RB_EMPTY_ROOT(root)) |
2176 | return NULL; | 2176 | return NULL; |
2177 | 2177 | ||
2178 | /* | 2178 | /* |
2179 | * First, if we find a request starting at the end of the last | 2179 | * First, if we find a request starting at the end of the last |
2180 | * request, choose it. | 2180 | * request, choose it. |
2181 | */ | 2181 | */ |
2182 | __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL); | 2182 | __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL); |
2183 | if (__cfqq) | 2183 | if (__cfqq) |
2184 | return __cfqq; | 2184 | return __cfqq; |
2185 | 2185 | ||
2186 | /* | 2186 | /* |
2187 | * If the exact sector wasn't found, the parent of the NULL leaf | 2187 | * If the exact sector wasn't found, the parent of the NULL leaf |
2188 | * will contain the closest sector. | 2188 | * will contain the closest sector. |
2189 | */ | 2189 | */ |
2190 | __cfqq = rb_entry(parent, struct cfq_queue, p_node); | 2190 | __cfqq = rb_entry(parent, struct cfq_queue, p_node); |
2191 | if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) | 2191 | if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) |
2192 | return __cfqq; | 2192 | return __cfqq; |
2193 | 2193 | ||
2194 | if (blk_rq_pos(__cfqq->next_rq) < sector) | 2194 | if (blk_rq_pos(__cfqq->next_rq) < sector) |
2195 | node = rb_next(&__cfqq->p_node); | 2195 | node = rb_next(&__cfqq->p_node); |
2196 | else | 2196 | else |
2197 | node = rb_prev(&__cfqq->p_node); | 2197 | node = rb_prev(&__cfqq->p_node); |
2198 | if (!node) | 2198 | if (!node) |
2199 | return NULL; | 2199 | return NULL; |
2200 | 2200 | ||
2201 | __cfqq = rb_entry(node, struct cfq_queue, p_node); | 2201 | __cfqq = rb_entry(node, struct cfq_queue, p_node); |
2202 | if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) | 2202 | if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) |
2203 | return __cfqq; | 2203 | return __cfqq; |
2204 | 2204 | ||
2205 | return NULL; | 2205 | return NULL; |
2206 | } | 2206 | } |
2207 | 2207 | ||
2208 | /* | 2208 | /* |
2209 | * cfqd - obvious | 2209 | * cfqd - obvious |
2210 | * cur_cfqq - passed in so that we don't decide that the current queue is | 2210 | * cur_cfqq - passed in so that we don't decide that the current queue is |
2211 | * closely cooperating with itself. | 2211 | * closely cooperating with itself. |
2212 | * | 2212 | * |
2213 | * So, basically we're assuming that that cur_cfqq has dispatched at least | 2213 | * So, basically we're assuming that that cur_cfqq has dispatched at least |
2214 | * one request, and that cfqd->last_position reflects a position on the disk | 2214 | * one request, and that cfqd->last_position reflects a position on the disk |
2215 | * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid | 2215 | * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid |
2216 | * assumption. | 2216 | * assumption. |
2217 | */ | 2217 | */ |
2218 | static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, | 2218 | static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, |
2219 | struct cfq_queue *cur_cfqq) | 2219 | struct cfq_queue *cur_cfqq) |
2220 | { | 2220 | { |
2221 | struct cfq_queue *cfqq; | 2221 | struct cfq_queue *cfqq; |
2222 | 2222 | ||
2223 | if (cfq_class_idle(cur_cfqq)) | 2223 | if (cfq_class_idle(cur_cfqq)) |
2224 | return NULL; | 2224 | return NULL; |
2225 | if (!cfq_cfqq_sync(cur_cfqq)) | 2225 | if (!cfq_cfqq_sync(cur_cfqq)) |
2226 | return NULL; | 2226 | return NULL; |
2227 | if (CFQQ_SEEKY(cur_cfqq)) | 2227 | if (CFQQ_SEEKY(cur_cfqq)) |
2228 | return NULL; | 2228 | return NULL; |
2229 | 2229 | ||
2230 | /* | 2230 | /* |
2231 | * Don't search priority tree if it's the only queue in the group. | 2231 | * Don't search priority tree if it's the only queue in the group. |
2232 | */ | 2232 | */ |
2233 | if (cur_cfqq->cfqg->nr_cfqq == 1) | 2233 | if (cur_cfqq->cfqg->nr_cfqq == 1) |
2234 | return NULL; | 2234 | return NULL; |
2235 | 2235 | ||
2236 | /* | 2236 | /* |
2237 | * We should notice if some of the queues are cooperating, eg | 2237 | * We should notice if some of the queues are cooperating, eg |
2238 | * working closely on the same area of the disk. In that case, | 2238 | * working closely on the same area of the disk. In that case, |
2239 | * we can group them together and don't waste time idling. | 2239 | * we can group them together and don't waste time idling. |
2240 | */ | 2240 | */ |
2241 | cfqq = cfqq_close(cfqd, cur_cfqq); | 2241 | cfqq = cfqq_close(cfqd, cur_cfqq); |
2242 | if (!cfqq) | 2242 | if (!cfqq) |
2243 | return NULL; | 2243 | return NULL; |
2244 | 2244 | ||
2245 | /* If new queue belongs to different cfq_group, don't choose it */ | 2245 | /* If new queue belongs to different cfq_group, don't choose it */ |
2246 | if (cur_cfqq->cfqg != cfqq->cfqg) | 2246 | if (cur_cfqq->cfqg != cfqq->cfqg) |
2247 | return NULL; | 2247 | return NULL; |
2248 | 2248 | ||
2249 | /* | 2249 | /* |
2250 | * It only makes sense to merge sync queues. | 2250 | * It only makes sense to merge sync queues. |
2251 | */ | 2251 | */ |
2252 | if (!cfq_cfqq_sync(cfqq)) | 2252 | if (!cfq_cfqq_sync(cfqq)) |
2253 | return NULL; | 2253 | return NULL; |
2254 | if (CFQQ_SEEKY(cfqq)) | 2254 | if (CFQQ_SEEKY(cfqq)) |
2255 | return NULL; | 2255 | return NULL; |
2256 | 2256 | ||
2257 | /* | 2257 | /* |
2258 | * Do not merge queues of different priority classes | 2258 | * Do not merge queues of different priority classes |
2259 | */ | 2259 | */ |
2260 | if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq)) | 2260 | if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq)) |
2261 | return NULL; | 2261 | return NULL; |
2262 | 2262 | ||
2263 | return cfqq; | 2263 | return cfqq; |
2264 | } | 2264 | } |
2265 | 2265 | ||
2266 | /* | 2266 | /* |
2267 | * Determine whether we should enforce idle window for this queue. | 2267 | * Determine whether we should enforce idle window for this queue. |
2268 | */ | 2268 | */ |
2269 | 2269 | ||
2270 | static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2270 | static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
2271 | { | 2271 | { |
2272 | enum wl_prio_t prio = cfqq_prio(cfqq); | 2272 | enum wl_prio_t prio = cfqq_prio(cfqq); |
2273 | struct cfq_rb_root *service_tree = cfqq->service_tree; | 2273 | struct cfq_rb_root *service_tree = cfqq->service_tree; |
2274 | 2274 | ||
2275 | BUG_ON(!service_tree); | 2275 | BUG_ON(!service_tree); |
2276 | BUG_ON(!service_tree->count); | 2276 | BUG_ON(!service_tree->count); |
2277 | 2277 | ||
2278 | if (!cfqd->cfq_slice_idle) | 2278 | if (!cfqd->cfq_slice_idle) |
2279 | return false; | 2279 | return false; |
2280 | 2280 | ||
2281 | /* We never do for idle class queues. */ | 2281 | /* We never do for idle class queues. */ |
2282 | if (prio == IDLE_WORKLOAD) | 2282 | if (prio == IDLE_WORKLOAD) |
2283 | return false; | 2283 | return false; |
2284 | 2284 | ||
2285 | /* We do for queues that were marked with idle window flag. */ | 2285 | /* We do for queues that were marked with idle window flag. */ |
2286 | if (cfq_cfqq_idle_window(cfqq) && | 2286 | if (cfq_cfqq_idle_window(cfqq) && |
2287 | !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)) | 2287 | !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)) |
2288 | return true; | 2288 | return true; |
2289 | 2289 | ||
2290 | /* | 2290 | /* |
2291 | * Otherwise, we do only if they are the last ones | 2291 | * Otherwise, we do only if they are the last ones |
2292 | * in their service tree. | 2292 | * in their service tree. |
2293 | */ | 2293 | */ |
2294 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) && | 2294 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) && |
2295 | !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false)) | 2295 | !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false)) |
2296 | return true; | 2296 | return true; |
2297 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", | 2297 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", |
2298 | service_tree->count); | 2298 | service_tree->count); |
2299 | return false; | 2299 | return false; |
2300 | } | 2300 | } |
2301 | 2301 | ||
2302 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) | 2302 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
2303 | { | 2303 | { |
2304 | struct cfq_queue *cfqq = cfqd->active_queue; | 2304 | struct cfq_queue *cfqq = cfqd->active_queue; |
2305 | struct cfq_io_cq *cic; | 2305 | struct cfq_io_cq *cic; |
2306 | unsigned long sl, group_idle = 0; | 2306 | unsigned long sl, group_idle = 0; |
2307 | 2307 | ||
2308 | /* | 2308 | /* |
2309 | * SSD device without seek penalty, disable idling. But only do so | 2309 | * SSD device without seek penalty, disable idling. But only do so |
2310 | * for devices that support queuing, otherwise we still have a problem | 2310 | * for devices that support queuing, otherwise we still have a problem |
2311 | * with sync vs async workloads. | 2311 | * with sync vs async workloads. |
2312 | */ | 2312 | */ |
2313 | if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) | 2313 | if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) |
2314 | return; | 2314 | return; |
2315 | 2315 | ||
2316 | WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); | 2316 | WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); |
2317 | WARN_ON(cfq_cfqq_slice_new(cfqq)); | 2317 | WARN_ON(cfq_cfqq_slice_new(cfqq)); |
2318 | 2318 | ||
2319 | /* | 2319 | /* |
2320 | * idle is disabled, either manually or by past process history | 2320 | * idle is disabled, either manually or by past process history |
2321 | */ | 2321 | */ |
2322 | if (!cfq_should_idle(cfqd, cfqq)) { | 2322 | if (!cfq_should_idle(cfqd, cfqq)) { |
2323 | /* no queue idling. Check for group idling */ | 2323 | /* no queue idling. Check for group idling */ |
2324 | if (cfqd->cfq_group_idle) | 2324 | if (cfqd->cfq_group_idle) |
2325 | group_idle = cfqd->cfq_group_idle; | 2325 | group_idle = cfqd->cfq_group_idle; |
2326 | else | 2326 | else |
2327 | return; | 2327 | return; |
2328 | } | 2328 | } |
2329 | 2329 | ||
2330 | /* | 2330 | /* |
2331 | * still active requests from this queue, don't idle | 2331 | * still active requests from this queue, don't idle |
2332 | */ | 2332 | */ |
2333 | if (cfqq->dispatched) | 2333 | if (cfqq->dispatched) |
2334 | return; | 2334 | return; |
2335 | 2335 | ||
2336 | /* | 2336 | /* |
2337 | * task has exited, don't wait | 2337 | * task has exited, don't wait |
2338 | */ | 2338 | */ |
2339 | cic = cfqd->active_cic; | 2339 | cic = cfqd->active_cic; |
2340 | if (!cic || !atomic_read(&cic->icq.ioc->active_ref)) | 2340 | if (!cic || !atomic_read(&cic->icq.ioc->active_ref)) |
2341 | return; | 2341 | return; |
2342 | 2342 | ||
2343 | /* | 2343 | /* |
2344 | * If our average think time is larger than the remaining time | 2344 | * If our average think time is larger than the remaining time |
2345 | * slice, then don't idle. This avoids overrunning the allotted | 2345 | * slice, then don't idle. This avoids overrunning the allotted |
2346 | * time slice. | 2346 | * time slice. |
2347 | */ | 2347 | */ |
2348 | if (sample_valid(cic->ttime.ttime_samples) && | 2348 | if (sample_valid(cic->ttime.ttime_samples) && |
2349 | (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) { | 2349 | (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) { |
2350 | cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", | 2350 | cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", |
2351 | cic->ttime.ttime_mean); | 2351 | cic->ttime.ttime_mean); |
2352 | return; | 2352 | return; |
2353 | } | 2353 | } |
2354 | 2354 | ||
2355 | /* There are other queues in the group, don't do group idle */ | 2355 | /* There are other queues in the group, don't do group idle */ |
2356 | if (group_idle && cfqq->cfqg->nr_cfqq > 1) | 2356 | if (group_idle && cfqq->cfqg->nr_cfqq > 1) |
2357 | return; | 2357 | return; |
2358 | 2358 | ||
2359 | cfq_mark_cfqq_wait_request(cfqq); | 2359 | cfq_mark_cfqq_wait_request(cfqq); |
2360 | 2360 | ||
2361 | if (group_idle) | 2361 | if (group_idle) |
2362 | sl = cfqd->cfq_group_idle; | 2362 | sl = cfqd->cfq_group_idle; |
2363 | else | 2363 | else |
2364 | sl = cfqd->cfq_slice_idle; | 2364 | sl = cfqd->cfq_slice_idle; |
2365 | 2365 | ||
2366 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); | 2366 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); |
2367 | cfqg_stats_set_start_idle_time(cfqq->cfqg); | 2367 | cfqg_stats_set_start_idle_time(cfqq->cfqg); |
2368 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl, | 2368 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl, |
2369 | group_idle ? 1 : 0); | 2369 | group_idle ? 1 : 0); |
2370 | } | 2370 | } |
2371 | 2371 | ||
2372 | /* | 2372 | /* |
2373 | * Move request from internal lists to the request queue dispatch list. | 2373 | * Move request from internal lists to the request queue dispatch list. |
2374 | */ | 2374 | */ |
2375 | static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) | 2375 | static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) |
2376 | { | 2376 | { |
2377 | struct cfq_data *cfqd = q->elevator->elevator_data; | 2377 | struct cfq_data *cfqd = q->elevator->elevator_data; |
2378 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 2378 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
2379 | 2379 | ||
2380 | cfq_log_cfqq(cfqd, cfqq, "dispatch_insert"); | 2380 | cfq_log_cfqq(cfqd, cfqq, "dispatch_insert"); |
2381 | 2381 | ||
2382 | cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq); | 2382 | cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq); |
2383 | cfq_remove_request(rq); | 2383 | cfq_remove_request(rq); |
2384 | cfqq->dispatched++; | 2384 | cfqq->dispatched++; |
2385 | (RQ_CFQG(rq))->dispatched++; | 2385 | (RQ_CFQG(rq))->dispatched++; |
2386 | elv_dispatch_sort(q, rq); | 2386 | elv_dispatch_sort(q, rq); |
2387 | 2387 | ||
2388 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; | 2388 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; |
2389 | cfqq->nr_sectors += blk_rq_sectors(rq); | 2389 | cfqq->nr_sectors += blk_rq_sectors(rq); |
2390 | cfqg_stats_update_dispatch(cfqq->cfqg, blk_rq_bytes(rq), rq->cmd_flags); | 2390 | cfqg_stats_update_dispatch(cfqq->cfqg, blk_rq_bytes(rq), rq->cmd_flags); |
2391 | } | 2391 | } |
2392 | 2392 | ||
2393 | /* | 2393 | /* |
2394 | * return expired entry, or NULL to just start from scratch in rbtree | 2394 | * return expired entry, or NULL to just start from scratch in rbtree |
2395 | */ | 2395 | */ |
2396 | static struct request *cfq_check_fifo(struct cfq_queue *cfqq) | 2396 | static struct request *cfq_check_fifo(struct cfq_queue *cfqq) |
2397 | { | 2397 | { |
2398 | struct request *rq = NULL; | 2398 | struct request *rq = NULL; |
2399 | 2399 | ||
2400 | if (cfq_cfqq_fifo_expire(cfqq)) | 2400 | if (cfq_cfqq_fifo_expire(cfqq)) |
2401 | return NULL; | 2401 | return NULL; |
2402 | 2402 | ||
2403 | cfq_mark_cfqq_fifo_expire(cfqq); | 2403 | cfq_mark_cfqq_fifo_expire(cfqq); |
2404 | 2404 | ||
2405 | if (list_empty(&cfqq->fifo)) | 2405 | if (list_empty(&cfqq->fifo)) |
2406 | return NULL; | 2406 | return NULL; |
2407 | 2407 | ||
2408 | rq = rq_entry_fifo(cfqq->fifo.next); | 2408 | rq = rq_entry_fifo(cfqq->fifo.next); |
2409 | if (time_before(jiffies, rq_fifo_time(rq))) | 2409 | if (time_before(jiffies, rq_fifo_time(rq))) |
2410 | rq = NULL; | 2410 | rq = NULL; |
2411 | 2411 | ||
2412 | cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); | 2412 | cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); |
2413 | return rq; | 2413 | return rq; |
2414 | } | 2414 | } |
2415 | 2415 | ||
2416 | static inline int | 2416 | static inline int |
2417 | cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2417 | cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
2418 | { | 2418 | { |
2419 | const int base_rq = cfqd->cfq_slice_async_rq; | 2419 | const int base_rq = cfqd->cfq_slice_async_rq; |
2420 | 2420 | ||
2421 | WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); | 2421 | WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); |
2422 | 2422 | ||
2423 | return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio); | 2423 | return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio); |
2424 | } | 2424 | } |
2425 | 2425 | ||
2426 | /* | 2426 | /* |
2427 | * Must be called with the queue_lock held. | 2427 | * Must be called with the queue_lock held. |
2428 | */ | 2428 | */ |
2429 | static int cfqq_process_refs(struct cfq_queue *cfqq) | 2429 | static int cfqq_process_refs(struct cfq_queue *cfqq) |
2430 | { | 2430 | { |
2431 | int process_refs, io_refs; | 2431 | int process_refs, io_refs; |
2432 | 2432 | ||
2433 | io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; | 2433 | io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; |
2434 | process_refs = cfqq->ref - io_refs; | 2434 | process_refs = cfqq->ref - io_refs; |
2435 | BUG_ON(process_refs < 0); | 2435 | BUG_ON(process_refs < 0); |
2436 | return process_refs; | 2436 | return process_refs; |
2437 | } | 2437 | } |
2438 | 2438 | ||
2439 | static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) | 2439 | static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) |
2440 | { | 2440 | { |
2441 | int process_refs, new_process_refs; | 2441 | int process_refs, new_process_refs; |
2442 | struct cfq_queue *__cfqq; | 2442 | struct cfq_queue *__cfqq; |
2443 | 2443 | ||
2444 | /* | 2444 | /* |
2445 | * If there are no process references on the new_cfqq, then it is | 2445 | * If there are no process references on the new_cfqq, then it is |
2446 | * unsafe to follow the ->new_cfqq chain as other cfqq's in the | 2446 | * unsafe to follow the ->new_cfqq chain as other cfqq's in the |
2447 | * chain may have dropped their last reference (not just their | 2447 | * chain may have dropped their last reference (not just their |
2448 | * last process reference). | 2448 | * last process reference). |
2449 | */ | 2449 | */ |
2450 | if (!cfqq_process_refs(new_cfqq)) | 2450 | if (!cfqq_process_refs(new_cfqq)) |
2451 | return; | 2451 | return; |
2452 | 2452 | ||
2453 | /* Avoid a circular list and skip interim queue merges */ | 2453 | /* Avoid a circular list and skip interim queue merges */ |
2454 | while ((__cfqq = new_cfqq->new_cfqq)) { | 2454 | while ((__cfqq = new_cfqq->new_cfqq)) { |
2455 | if (__cfqq == cfqq) | 2455 | if (__cfqq == cfqq) |
2456 | return; | 2456 | return; |
2457 | new_cfqq = __cfqq; | 2457 | new_cfqq = __cfqq; |
2458 | } | 2458 | } |
2459 | 2459 | ||
2460 | process_refs = cfqq_process_refs(cfqq); | 2460 | process_refs = cfqq_process_refs(cfqq); |
2461 | new_process_refs = cfqq_process_refs(new_cfqq); | 2461 | new_process_refs = cfqq_process_refs(new_cfqq); |
2462 | /* | 2462 | /* |
2463 | * If the process for the cfqq has gone away, there is no | 2463 | * If the process for the cfqq has gone away, there is no |
2464 | * sense in merging the queues. | 2464 | * sense in merging the queues. |
2465 | */ | 2465 | */ |
2466 | if (process_refs == 0 || new_process_refs == 0) | 2466 | if (process_refs == 0 || new_process_refs == 0) |
2467 | return; | 2467 | return; |
2468 | 2468 | ||
2469 | /* | 2469 | /* |
2470 | * Merge in the direction of the lesser amount of work. | 2470 | * Merge in the direction of the lesser amount of work. |
2471 | */ | 2471 | */ |
2472 | if (new_process_refs >= process_refs) { | 2472 | if (new_process_refs >= process_refs) { |
2473 | cfqq->new_cfqq = new_cfqq; | 2473 | cfqq->new_cfqq = new_cfqq; |
2474 | new_cfqq->ref += process_refs; | 2474 | new_cfqq->ref += process_refs; |
2475 | } else { | 2475 | } else { |
2476 | new_cfqq->new_cfqq = cfqq; | 2476 | new_cfqq->new_cfqq = cfqq; |
2477 | cfqq->ref += new_process_refs; | 2477 | cfqq->ref += new_process_refs; |
2478 | } | 2478 | } |
2479 | } | 2479 | } |
2480 | 2480 | ||
2481 | static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, | 2481 | static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, |
2482 | struct cfq_group *cfqg, enum wl_prio_t prio) | 2482 | struct cfq_group *cfqg, enum wl_prio_t prio) |
2483 | { | 2483 | { |
2484 | struct cfq_queue *queue; | 2484 | struct cfq_queue *queue; |
2485 | int i; | 2485 | int i; |
2486 | bool key_valid = false; | 2486 | bool key_valid = false; |
2487 | unsigned long lowest_key = 0; | 2487 | unsigned long lowest_key = 0; |
2488 | enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; | 2488 | enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; |
2489 | 2489 | ||
2490 | for (i = 0; i <= SYNC_WORKLOAD; ++i) { | 2490 | for (i = 0; i <= SYNC_WORKLOAD; ++i) { |
2491 | /* select the one with lowest rb_key */ | 2491 | /* select the one with lowest rb_key */ |
2492 | queue = cfq_rb_first(service_tree_for(cfqg, prio, i)); | 2492 | queue = cfq_rb_first(service_tree_for(cfqg, prio, i)); |
2493 | if (queue && | 2493 | if (queue && |
2494 | (!key_valid || time_before(queue->rb_key, lowest_key))) { | 2494 | (!key_valid || time_before(queue->rb_key, lowest_key))) { |
2495 | lowest_key = queue->rb_key; | 2495 | lowest_key = queue->rb_key; |
2496 | cur_best = i; | 2496 | cur_best = i; |
2497 | key_valid = true; | 2497 | key_valid = true; |
2498 | } | 2498 | } |
2499 | } | 2499 | } |
2500 | 2500 | ||
2501 | return cur_best; | 2501 | return cur_best; |
2502 | } | 2502 | } |
2503 | 2503 | ||
2504 | static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | 2504 | static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) |
2505 | { | 2505 | { |
2506 | unsigned slice; | 2506 | unsigned slice; |
2507 | unsigned count; | 2507 | unsigned count; |
2508 | struct cfq_rb_root *st; | 2508 | struct cfq_rb_root *st; |
2509 | unsigned group_slice; | 2509 | unsigned group_slice; |
2510 | enum wl_prio_t original_prio = cfqd->serving_prio; | 2510 | enum wl_prio_t original_prio = cfqd->serving_prio; |
2511 | 2511 | ||
2512 | /* Choose next priority. RT > BE > IDLE */ | 2512 | /* Choose next priority. RT > BE > IDLE */ |
2513 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) | 2513 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) |
2514 | cfqd->serving_prio = RT_WORKLOAD; | 2514 | cfqd->serving_prio = RT_WORKLOAD; |
2515 | else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg)) | 2515 | else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg)) |
2516 | cfqd->serving_prio = BE_WORKLOAD; | 2516 | cfqd->serving_prio = BE_WORKLOAD; |
2517 | else { | 2517 | else { |
2518 | cfqd->serving_prio = IDLE_WORKLOAD; | 2518 | cfqd->serving_prio = IDLE_WORKLOAD; |
2519 | cfqd->workload_expires = jiffies + 1; | 2519 | cfqd->workload_expires = jiffies + 1; |
2520 | return; | 2520 | return; |
2521 | } | 2521 | } |
2522 | 2522 | ||
2523 | if (original_prio != cfqd->serving_prio) | 2523 | if (original_prio != cfqd->serving_prio) |
2524 | goto new_workload; | 2524 | goto new_workload; |
2525 | 2525 | ||
2526 | /* | 2526 | /* |
2527 | * For RT and BE, we have to choose also the type | 2527 | * For RT and BE, we have to choose also the type |
2528 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload | 2528 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload |
2529 | * expiration time | 2529 | * expiration time |
2530 | */ | 2530 | */ |
2531 | st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); | 2531 | st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); |
2532 | count = st->count; | 2532 | count = st->count; |
2533 | 2533 | ||
2534 | /* | 2534 | /* |
2535 | * check workload expiration, and that we still have other queues ready | 2535 | * check workload expiration, and that we still have other queues ready |
2536 | */ | 2536 | */ |
2537 | if (count && !time_after(jiffies, cfqd->workload_expires)) | 2537 | if (count && !time_after(jiffies, cfqd->workload_expires)) |
2538 | return; | 2538 | return; |
2539 | 2539 | ||
2540 | new_workload: | 2540 | new_workload: |
2541 | /* otherwise select new workload type */ | 2541 | /* otherwise select new workload type */ |
2542 | cfqd->serving_type = | 2542 | cfqd->serving_type = |
2543 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); | 2543 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); |
2544 | st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); | 2544 | st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); |
2545 | count = st->count; | 2545 | count = st->count; |
2546 | 2546 | ||
2547 | /* | 2547 | /* |
2548 | * the workload slice is computed as a fraction of target latency | 2548 | * the workload slice is computed as a fraction of target latency |
2549 | * proportional to the number of queues in that workload, over | 2549 | * proportional to the number of queues in that workload, over |
2550 | * all the queues in the same priority class | 2550 | * all the queues in the same priority class |
2551 | */ | 2551 | */ |
2552 | group_slice = cfq_group_slice(cfqd, cfqg); | 2552 | group_slice = cfq_group_slice(cfqd, cfqg); |
2553 | 2553 | ||
2554 | slice = group_slice * count / | 2554 | slice = group_slice * count / |
2555 | max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio], | 2555 | max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio], |
2556 | cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg)); | 2556 | cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg)); |
2557 | 2557 | ||
2558 | if (cfqd->serving_type == ASYNC_WORKLOAD) { | 2558 | if (cfqd->serving_type == ASYNC_WORKLOAD) { |
2559 | unsigned int tmp; | 2559 | unsigned int tmp; |
2560 | 2560 | ||
2561 | /* | 2561 | /* |
2562 | * Async queues are currently system wide. Just taking | 2562 | * Async queues are currently system wide. Just taking |
2563 | * proportion of queues with-in same group will lead to higher | 2563 | * proportion of queues with-in same group will lead to higher |
2564 | * async ratio system wide as generally root group is going | 2564 | * async ratio system wide as generally root group is going |
2565 | * to have higher weight. A more accurate thing would be to | 2565 | * to have higher weight. A more accurate thing would be to |
2566 | * calculate system wide asnc/sync ratio. | 2566 | * calculate system wide asnc/sync ratio. |
2567 | */ | 2567 | */ |
2568 | tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg); | 2568 | tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg); |
2569 | tmp = tmp/cfqd->busy_queues; | 2569 | tmp = tmp/cfqd->busy_queues; |
2570 | slice = min_t(unsigned, slice, tmp); | 2570 | slice = min_t(unsigned, slice, tmp); |
2571 | 2571 | ||
2572 | /* async workload slice is scaled down according to | 2572 | /* async workload slice is scaled down according to |
2573 | * the sync/async slice ratio. */ | 2573 | * the sync/async slice ratio. */ |
2574 | slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]; | 2574 | slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]; |
2575 | } else | 2575 | } else |
2576 | /* sync workload slice is at least 2 * cfq_slice_idle */ | 2576 | /* sync workload slice is at least 2 * cfq_slice_idle */ |
2577 | slice = max(slice, 2 * cfqd->cfq_slice_idle); | 2577 | slice = max(slice, 2 * cfqd->cfq_slice_idle); |
2578 | 2578 | ||
2579 | slice = max_t(unsigned, slice, CFQ_MIN_TT); | 2579 | slice = max_t(unsigned, slice, CFQ_MIN_TT); |
2580 | cfq_log(cfqd, "workload slice:%d", slice); | 2580 | cfq_log(cfqd, "workload slice:%d", slice); |
2581 | cfqd->workload_expires = jiffies + slice; | 2581 | cfqd->workload_expires = jiffies + slice; |
2582 | } | 2582 | } |
2583 | 2583 | ||
2584 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) | 2584 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) |
2585 | { | 2585 | { |
2586 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 2586 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
2587 | struct cfq_group *cfqg; | 2587 | struct cfq_group *cfqg; |
2588 | 2588 | ||
2589 | if (RB_EMPTY_ROOT(&st->rb)) | 2589 | if (RB_EMPTY_ROOT(&st->rb)) |
2590 | return NULL; | 2590 | return NULL; |
2591 | cfqg = cfq_rb_first_group(st); | 2591 | cfqg = cfq_rb_first_group(st); |
2592 | update_min_vdisktime(st); | 2592 | update_min_vdisktime(st); |
2593 | return cfqg; | 2593 | return cfqg; |
2594 | } | 2594 | } |
2595 | 2595 | ||
2596 | static void cfq_choose_cfqg(struct cfq_data *cfqd) | 2596 | static void cfq_choose_cfqg(struct cfq_data *cfqd) |
2597 | { | 2597 | { |
2598 | struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd); | 2598 | struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd); |
2599 | 2599 | ||
2600 | cfqd->serving_group = cfqg; | 2600 | cfqd->serving_group = cfqg; |
2601 | 2601 | ||
2602 | /* Restore the workload type data */ | 2602 | /* Restore the workload type data */ |
2603 | if (cfqg->saved_workload_slice) { | 2603 | if (cfqg->saved_workload_slice) { |
2604 | cfqd->workload_expires = jiffies + cfqg->saved_workload_slice; | 2604 | cfqd->workload_expires = jiffies + cfqg->saved_workload_slice; |
2605 | cfqd->serving_type = cfqg->saved_workload; | 2605 | cfqd->serving_type = cfqg->saved_workload; |
2606 | cfqd->serving_prio = cfqg->saved_serving_prio; | 2606 | cfqd->serving_prio = cfqg->saved_serving_prio; |
2607 | } else | 2607 | } else |
2608 | cfqd->workload_expires = jiffies - 1; | 2608 | cfqd->workload_expires = jiffies - 1; |
2609 | 2609 | ||
2610 | choose_service_tree(cfqd, cfqg); | 2610 | choose_service_tree(cfqd, cfqg); |
2611 | } | 2611 | } |
2612 | 2612 | ||
2613 | /* | 2613 | /* |
2614 | * Select a queue for service. If we have a current active queue, | 2614 | * Select a queue for service. If we have a current active queue, |
2615 | * check whether to continue servicing it, or retrieve and set a new one. | 2615 | * check whether to continue servicing it, or retrieve and set a new one. |
2616 | */ | 2616 | */ |
2617 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | 2617 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) |
2618 | { | 2618 | { |
2619 | struct cfq_queue *cfqq, *new_cfqq = NULL; | 2619 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
2620 | 2620 | ||
2621 | cfqq = cfqd->active_queue; | 2621 | cfqq = cfqd->active_queue; |
2622 | if (!cfqq) | 2622 | if (!cfqq) |
2623 | goto new_queue; | 2623 | goto new_queue; |
2624 | 2624 | ||
2625 | if (!cfqd->rq_queued) | 2625 | if (!cfqd->rq_queued) |
2626 | return NULL; | 2626 | return NULL; |
2627 | 2627 | ||
2628 | /* | 2628 | /* |
2629 | * We were waiting for group to get backlogged. Expire the queue | 2629 | * We were waiting for group to get backlogged. Expire the queue |
2630 | */ | 2630 | */ |
2631 | if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list)) | 2631 | if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list)) |
2632 | goto expire; | 2632 | goto expire; |
2633 | 2633 | ||
2634 | /* | 2634 | /* |
2635 | * The active queue has run out of time, expire it and select new. | 2635 | * The active queue has run out of time, expire it and select new. |
2636 | */ | 2636 | */ |
2637 | if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) { | 2637 | if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) { |
2638 | /* | 2638 | /* |
2639 | * If slice had not expired at the completion of last request | 2639 | * If slice had not expired at the completion of last request |
2640 | * we might not have turned on wait_busy flag. Don't expire | 2640 | * we might not have turned on wait_busy flag. Don't expire |
2641 | * the queue yet. Allow the group to get backlogged. | 2641 | * the queue yet. Allow the group to get backlogged. |
2642 | * | 2642 | * |
2643 | * The very fact that we have used the slice, that means we | 2643 | * The very fact that we have used the slice, that means we |
2644 | * have been idling all along on this queue and it should be | 2644 | * have been idling all along on this queue and it should be |
2645 | * ok to wait for this request to complete. | 2645 | * ok to wait for this request to complete. |
2646 | */ | 2646 | */ |
2647 | if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list) | 2647 | if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list) |
2648 | && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { | 2648 | && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { |
2649 | cfqq = NULL; | 2649 | cfqq = NULL; |
2650 | goto keep_queue; | 2650 | goto keep_queue; |
2651 | } else | 2651 | } else |
2652 | goto check_group_idle; | 2652 | goto check_group_idle; |
2653 | } | 2653 | } |
2654 | 2654 | ||
2655 | /* | 2655 | /* |
2656 | * The active queue has requests and isn't expired, allow it to | 2656 | * The active queue has requests and isn't expired, allow it to |
2657 | * dispatch. | 2657 | * dispatch. |
2658 | */ | 2658 | */ |
2659 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) | 2659 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) |
2660 | goto keep_queue; | 2660 | goto keep_queue; |
2661 | 2661 | ||
2662 | /* | 2662 | /* |
2663 | * If another queue has a request waiting within our mean seek | 2663 | * If another queue has a request waiting within our mean seek |
2664 | * distance, let it run. The expire code will check for close | 2664 | * distance, let it run. The expire code will check for close |
2665 | * cooperators and put the close queue at the front of the service | 2665 | * cooperators and put the close queue at the front of the service |
2666 | * tree. If possible, merge the expiring queue with the new cfqq. | 2666 | * tree. If possible, merge the expiring queue with the new cfqq. |
2667 | */ | 2667 | */ |
2668 | new_cfqq = cfq_close_cooperator(cfqd, cfqq); | 2668 | new_cfqq = cfq_close_cooperator(cfqd, cfqq); |
2669 | if (new_cfqq) { | 2669 | if (new_cfqq) { |
2670 | if (!cfqq->new_cfqq) | 2670 | if (!cfqq->new_cfqq) |
2671 | cfq_setup_merge(cfqq, new_cfqq); | 2671 | cfq_setup_merge(cfqq, new_cfqq); |
2672 | goto expire; | 2672 | goto expire; |
2673 | } | 2673 | } |
2674 | 2674 | ||
2675 | /* | 2675 | /* |
2676 | * No requests pending. If the active queue still has requests in | 2676 | * No requests pending. If the active queue still has requests in |
2677 | * flight or is idling for a new request, allow either of these | 2677 | * flight or is idling for a new request, allow either of these |
2678 | * conditions to happen (or time out) before selecting a new queue. | 2678 | * conditions to happen (or time out) before selecting a new queue. |
2679 | */ | 2679 | */ |
2680 | if (timer_pending(&cfqd->idle_slice_timer)) { | 2680 | if (timer_pending(&cfqd->idle_slice_timer)) { |
2681 | cfqq = NULL; | 2681 | cfqq = NULL; |
2682 | goto keep_queue; | 2682 | goto keep_queue; |
2683 | } | 2683 | } |
2684 | 2684 | ||
2685 | /* | 2685 | /* |
2686 | * This is a deep seek queue, but the device is much faster than | 2686 | * This is a deep seek queue, but the device is much faster than |
2687 | * the queue can deliver, don't idle | 2687 | * the queue can deliver, don't idle |
2688 | **/ | 2688 | **/ |
2689 | if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && | 2689 | if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && |
2690 | (cfq_cfqq_slice_new(cfqq) || | 2690 | (cfq_cfqq_slice_new(cfqq) || |
2691 | (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { | 2691 | (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { |
2692 | cfq_clear_cfqq_deep(cfqq); | 2692 | cfq_clear_cfqq_deep(cfqq); |
2693 | cfq_clear_cfqq_idle_window(cfqq); | 2693 | cfq_clear_cfqq_idle_window(cfqq); |
2694 | } | 2694 | } |
2695 | 2695 | ||
2696 | if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { | 2696 | if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { |
2697 | cfqq = NULL; | 2697 | cfqq = NULL; |
2698 | goto keep_queue; | 2698 | goto keep_queue; |
2699 | } | 2699 | } |
2700 | 2700 | ||
2701 | /* | 2701 | /* |
2702 | * If group idle is enabled and there are requests dispatched from | 2702 | * If group idle is enabled and there are requests dispatched from |
2703 | * this group, wait for requests to complete. | 2703 | * this group, wait for requests to complete. |
2704 | */ | 2704 | */ |
2705 | check_group_idle: | 2705 | check_group_idle: |
2706 | if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 && | 2706 | if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 && |
2707 | cfqq->cfqg->dispatched && | 2707 | cfqq->cfqg->dispatched && |
2708 | !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) { | 2708 | !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) { |
2709 | cfqq = NULL; | 2709 | cfqq = NULL; |
2710 | goto keep_queue; | 2710 | goto keep_queue; |
2711 | } | 2711 | } |
2712 | 2712 | ||
2713 | expire: | 2713 | expire: |
2714 | cfq_slice_expired(cfqd, 0); | 2714 | cfq_slice_expired(cfqd, 0); |
2715 | new_queue: | 2715 | new_queue: |
2716 | /* | 2716 | /* |
2717 | * Current queue expired. Check if we have to switch to a new | 2717 | * Current queue expired. Check if we have to switch to a new |
2718 | * service tree | 2718 | * service tree |
2719 | */ | 2719 | */ |
2720 | if (!new_cfqq) | 2720 | if (!new_cfqq) |
2721 | cfq_choose_cfqg(cfqd); | 2721 | cfq_choose_cfqg(cfqd); |
2722 | 2722 | ||
2723 | cfqq = cfq_set_active_queue(cfqd, new_cfqq); | 2723 | cfqq = cfq_set_active_queue(cfqd, new_cfqq); |
2724 | keep_queue: | 2724 | keep_queue: |
2725 | return cfqq; | 2725 | return cfqq; |
2726 | } | 2726 | } |
2727 | 2727 | ||
2728 | static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) | 2728 | static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) |
2729 | { | 2729 | { |
2730 | int dispatched = 0; | 2730 | int dispatched = 0; |
2731 | 2731 | ||
2732 | while (cfqq->next_rq) { | 2732 | while (cfqq->next_rq) { |
2733 | cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); | 2733 | cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); |
2734 | dispatched++; | 2734 | dispatched++; |
2735 | } | 2735 | } |
2736 | 2736 | ||
2737 | BUG_ON(!list_empty(&cfqq->fifo)); | 2737 | BUG_ON(!list_empty(&cfqq->fifo)); |
2738 | 2738 | ||
2739 | /* By default cfqq is not expired if it is empty. Do it explicitly */ | 2739 | /* By default cfqq is not expired if it is empty. Do it explicitly */ |
2740 | __cfq_slice_expired(cfqq->cfqd, cfqq, 0); | 2740 | __cfq_slice_expired(cfqq->cfqd, cfqq, 0); |
2741 | return dispatched; | 2741 | return dispatched; |
2742 | } | 2742 | } |
2743 | 2743 | ||
2744 | /* | 2744 | /* |
2745 | * Drain our current requests. Used for barriers and when switching | 2745 | * Drain our current requests. Used for barriers and when switching |
2746 | * io schedulers on-the-fly. | 2746 | * io schedulers on-the-fly. |
2747 | */ | 2747 | */ |
2748 | static int cfq_forced_dispatch(struct cfq_data *cfqd) | 2748 | static int cfq_forced_dispatch(struct cfq_data *cfqd) |
2749 | { | 2749 | { |
2750 | struct cfq_queue *cfqq; | 2750 | struct cfq_queue *cfqq; |
2751 | int dispatched = 0; | 2751 | int dispatched = 0; |
2752 | 2752 | ||
2753 | /* Expire the timeslice of the current active queue first */ | 2753 | /* Expire the timeslice of the current active queue first */ |
2754 | cfq_slice_expired(cfqd, 0); | 2754 | cfq_slice_expired(cfqd, 0); |
2755 | while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) { | 2755 | while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) { |
2756 | __cfq_set_active_queue(cfqd, cfqq); | 2756 | __cfq_set_active_queue(cfqd, cfqq); |
2757 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); | 2757 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); |
2758 | } | 2758 | } |
2759 | 2759 | ||
2760 | BUG_ON(cfqd->busy_queues); | 2760 | BUG_ON(cfqd->busy_queues); |
2761 | 2761 | ||
2762 | cfq_log(cfqd, "forced_dispatch=%d", dispatched); | 2762 | cfq_log(cfqd, "forced_dispatch=%d", dispatched); |
2763 | return dispatched; | 2763 | return dispatched; |
2764 | } | 2764 | } |
2765 | 2765 | ||
2766 | static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, | 2766 | static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, |
2767 | struct cfq_queue *cfqq) | 2767 | struct cfq_queue *cfqq) |
2768 | { | 2768 | { |
2769 | /* the queue hasn't finished any request, can't estimate */ | 2769 | /* the queue hasn't finished any request, can't estimate */ |
2770 | if (cfq_cfqq_slice_new(cfqq)) | 2770 | if (cfq_cfqq_slice_new(cfqq)) |
2771 | return true; | 2771 | return true; |
2772 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, | 2772 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, |
2773 | cfqq->slice_end)) | 2773 | cfqq->slice_end)) |
2774 | return true; | 2774 | return true; |
2775 | 2775 | ||
2776 | return false; | 2776 | return false; |
2777 | } | 2777 | } |
2778 | 2778 | ||
2779 | static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2779 | static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
2780 | { | 2780 | { |
2781 | unsigned int max_dispatch; | 2781 | unsigned int max_dispatch; |
2782 | 2782 | ||
2783 | /* | 2783 | /* |
2784 | * Drain async requests before we start sync IO | 2784 | * Drain async requests before we start sync IO |
2785 | */ | 2785 | */ |
2786 | if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC]) | 2786 | if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC]) |
2787 | return false; | 2787 | return false; |
2788 | 2788 | ||
2789 | /* | 2789 | /* |
2790 | * If this is an async queue and we have sync IO in flight, let it wait | 2790 | * If this is an async queue and we have sync IO in flight, let it wait |
2791 | */ | 2791 | */ |
2792 | if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq)) | 2792 | if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq)) |
2793 | return false; | 2793 | return false; |
2794 | 2794 | ||
2795 | max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1); | 2795 | max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1); |
2796 | if (cfq_class_idle(cfqq)) | 2796 | if (cfq_class_idle(cfqq)) |
2797 | max_dispatch = 1; | 2797 | max_dispatch = 1; |
2798 | 2798 | ||
2799 | /* | 2799 | /* |
2800 | * Does this cfqq already have too much IO in flight? | 2800 | * Does this cfqq already have too much IO in flight? |
2801 | */ | 2801 | */ |
2802 | if (cfqq->dispatched >= max_dispatch) { | 2802 | if (cfqq->dispatched >= max_dispatch) { |
2803 | bool promote_sync = false; | 2803 | bool promote_sync = false; |
2804 | /* | 2804 | /* |
2805 | * idle queue must always only have a single IO in flight | 2805 | * idle queue must always only have a single IO in flight |
2806 | */ | 2806 | */ |
2807 | if (cfq_class_idle(cfqq)) | 2807 | if (cfq_class_idle(cfqq)) |
2808 | return false; | 2808 | return false; |
2809 | 2809 | ||
2810 | /* | 2810 | /* |
2811 | * If there is only one sync queue | 2811 | * If there is only one sync queue |
2812 | * we can ignore async queue here and give the sync | 2812 | * we can ignore async queue here and give the sync |
2813 | * queue no dispatch limit. The reason is a sync queue can | 2813 | * queue no dispatch limit. The reason is a sync queue can |
2814 | * preempt async queue, limiting the sync queue doesn't make | 2814 | * preempt async queue, limiting the sync queue doesn't make |
2815 | * sense. This is useful for aiostress test. | 2815 | * sense. This is useful for aiostress test. |
2816 | */ | 2816 | */ |
2817 | if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1) | 2817 | if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1) |
2818 | promote_sync = true; | 2818 | promote_sync = true; |
2819 | 2819 | ||
2820 | /* | 2820 | /* |
2821 | * We have other queues, don't allow more IO from this one | 2821 | * We have other queues, don't allow more IO from this one |
2822 | */ | 2822 | */ |
2823 | if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) && | 2823 | if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) && |
2824 | !promote_sync) | 2824 | !promote_sync) |
2825 | return false; | 2825 | return false; |
2826 | 2826 | ||
2827 | /* | 2827 | /* |
2828 | * Sole queue user, no limit | 2828 | * Sole queue user, no limit |
2829 | */ | 2829 | */ |
2830 | if (cfqd->busy_queues == 1 || promote_sync) | 2830 | if (cfqd->busy_queues == 1 || promote_sync) |
2831 | max_dispatch = -1; | 2831 | max_dispatch = -1; |
2832 | else | 2832 | else |
2833 | /* | 2833 | /* |
2834 | * Normally we start throttling cfqq when cfq_quantum/2 | 2834 | * Normally we start throttling cfqq when cfq_quantum/2 |
2835 | * requests have been dispatched. But we can drive | 2835 | * requests have been dispatched. But we can drive |
2836 | * deeper queue depths at the beginning of slice | 2836 | * deeper queue depths at the beginning of slice |
2837 | * subjected to upper limit of cfq_quantum. | 2837 | * subjected to upper limit of cfq_quantum. |
2838 | * */ | 2838 | * */ |
2839 | max_dispatch = cfqd->cfq_quantum; | 2839 | max_dispatch = cfqd->cfq_quantum; |
2840 | } | 2840 | } |
2841 | 2841 | ||
2842 | /* | 2842 | /* |
2843 | * Async queues must wait a bit before being allowed dispatch. | 2843 | * Async queues must wait a bit before being allowed dispatch. |
2844 | * We also ramp up the dispatch depth gradually for async IO, | 2844 | * We also ramp up the dispatch depth gradually for async IO, |
2845 | * based on the last sync IO we serviced | 2845 | * based on the last sync IO we serviced |
2846 | */ | 2846 | */ |
2847 | if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { | 2847 | if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { |
2848 | unsigned long last_sync = jiffies - cfqd->last_delayed_sync; | 2848 | unsigned long last_sync = jiffies - cfqd->last_delayed_sync; |
2849 | unsigned int depth; | 2849 | unsigned int depth; |
2850 | 2850 | ||
2851 | depth = last_sync / cfqd->cfq_slice[1]; | 2851 | depth = last_sync / cfqd->cfq_slice[1]; |
2852 | if (!depth && !cfqq->dispatched) | 2852 | if (!depth && !cfqq->dispatched) |
2853 | depth = 1; | 2853 | depth = 1; |
2854 | if (depth < max_dispatch) | 2854 | if (depth < max_dispatch) |
2855 | max_dispatch = depth; | 2855 | max_dispatch = depth; |
2856 | } | 2856 | } |
2857 | 2857 | ||
2858 | /* | 2858 | /* |
2859 | * If we're below the current max, allow a dispatch | 2859 | * If we're below the current max, allow a dispatch |
2860 | */ | 2860 | */ |
2861 | return cfqq->dispatched < max_dispatch; | 2861 | return cfqq->dispatched < max_dispatch; |
2862 | } | 2862 | } |
2863 | 2863 | ||
2864 | /* | 2864 | /* |
2865 | * Dispatch a request from cfqq, moving them to the request queue | 2865 | * Dispatch a request from cfqq, moving them to the request queue |
2866 | * dispatch list. | 2866 | * dispatch list. |
2867 | */ | 2867 | */ |
2868 | static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2868 | static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
2869 | { | 2869 | { |
2870 | struct request *rq; | 2870 | struct request *rq; |
2871 | 2871 | ||
2872 | BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); | 2872 | BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); |
2873 | 2873 | ||
2874 | if (!cfq_may_dispatch(cfqd, cfqq)) | 2874 | if (!cfq_may_dispatch(cfqd, cfqq)) |
2875 | return false; | 2875 | return false; |
2876 | 2876 | ||
2877 | /* | 2877 | /* |
2878 | * follow expired path, else get first next available | 2878 | * follow expired path, else get first next available |
2879 | */ | 2879 | */ |
2880 | rq = cfq_check_fifo(cfqq); | 2880 | rq = cfq_check_fifo(cfqq); |
2881 | if (!rq) | 2881 | if (!rq) |
2882 | rq = cfqq->next_rq; | 2882 | rq = cfqq->next_rq; |
2883 | 2883 | ||
2884 | /* | 2884 | /* |
2885 | * insert request into driver dispatch list | 2885 | * insert request into driver dispatch list |
2886 | */ | 2886 | */ |
2887 | cfq_dispatch_insert(cfqd->queue, rq); | 2887 | cfq_dispatch_insert(cfqd->queue, rq); |
2888 | 2888 | ||
2889 | if (!cfqd->active_cic) { | 2889 | if (!cfqd->active_cic) { |
2890 | struct cfq_io_cq *cic = RQ_CIC(rq); | 2890 | struct cfq_io_cq *cic = RQ_CIC(rq); |
2891 | 2891 | ||
2892 | atomic_long_inc(&cic->icq.ioc->refcount); | 2892 | atomic_long_inc(&cic->icq.ioc->refcount); |
2893 | cfqd->active_cic = cic; | 2893 | cfqd->active_cic = cic; |
2894 | } | 2894 | } |
2895 | 2895 | ||
2896 | return true; | 2896 | return true; |
2897 | } | 2897 | } |
2898 | 2898 | ||
2899 | /* | 2899 | /* |
2900 | * Find the cfqq that we need to service and move a request from that to the | 2900 | * Find the cfqq that we need to service and move a request from that to the |
2901 | * dispatch list | 2901 | * dispatch list |
2902 | */ | 2902 | */ |
2903 | static int cfq_dispatch_requests(struct request_queue *q, int force) | 2903 | static int cfq_dispatch_requests(struct request_queue *q, int force) |
2904 | { | 2904 | { |
2905 | struct cfq_data *cfqd = q->elevator->elevator_data; | 2905 | struct cfq_data *cfqd = q->elevator->elevator_data; |
2906 | struct cfq_queue *cfqq; | 2906 | struct cfq_queue *cfqq; |
2907 | 2907 | ||
2908 | if (!cfqd->busy_queues) | 2908 | if (!cfqd->busy_queues) |
2909 | return 0; | 2909 | return 0; |
2910 | 2910 | ||
2911 | if (unlikely(force)) | 2911 | if (unlikely(force)) |
2912 | return cfq_forced_dispatch(cfqd); | 2912 | return cfq_forced_dispatch(cfqd); |
2913 | 2913 | ||
2914 | cfqq = cfq_select_queue(cfqd); | 2914 | cfqq = cfq_select_queue(cfqd); |
2915 | if (!cfqq) | 2915 | if (!cfqq) |
2916 | return 0; | 2916 | return 0; |
2917 | 2917 | ||
2918 | /* | 2918 | /* |
2919 | * Dispatch a request from this cfqq, if it is allowed | 2919 | * Dispatch a request from this cfqq, if it is allowed |
2920 | */ | 2920 | */ |
2921 | if (!cfq_dispatch_request(cfqd, cfqq)) | 2921 | if (!cfq_dispatch_request(cfqd, cfqq)) |
2922 | return 0; | 2922 | return 0; |
2923 | 2923 | ||
2924 | cfqq->slice_dispatch++; | 2924 | cfqq->slice_dispatch++; |
2925 | cfq_clear_cfqq_must_dispatch(cfqq); | 2925 | cfq_clear_cfqq_must_dispatch(cfqq); |
2926 | 2926 | ||
2927 | /* | 2927 | /* |
2928 | * expire an async queue immediately if it has used up its slice. idle | 2928 | * expire an async queue immediately if it has used up its slice. idle |
2929 | * queue always expire after 1 dispatch round. | 2929 | * queue always expire after 1 dispatch round. |
2930 | */ | 2930 | */ |
2931 | if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && | 2931 | if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && |
2932 | cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || | 2932 | cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || |
2933 | cfq_class_idle(cfqq))) { | 2933 | cfq_class_idle(cfqq))) { |
2934 | cfqq->slice_end = jiffies + 1; | 2934 | cfqq->slice_end = jiffies + 1; |
2935 | cfq_slice_expired(cfqd, 0); | 2935 | cfq_slice_expired(cfqd, 0); |
2936 | } | 2936 | } |
2937 | 2937 | ||
2938 | cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); | 2938 | cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); |
2939 | return 1; | 2939 | return 1; |
2940 | } | 2940 | } |
2941 | 2941 | ||
2942 | /* | 2942 | /* |
2943 | * task holds one reference to the queue, dropped when task exits. each rq | 2943 | * task holds one reference to the queue, dropped when task exits. each rq |
2944 | * in-flight on this queue also holds a reference, dropped when rq is freed. | 2944 | * in-flight on this queue also holds a reference, dropped when rq is freed. |
2945 | * | 2945 | * |
2946 | * Each cfq queue took a reference on the parent group. Drop it now. | 2946 | * Each cfq queue took a reference on the parent group. Drop it now. |
2947 | * queue lock must be held here. | 2947 | * queue lock must be held here. |
2948 | */ | 2948 | */ |
2949 | static void cfq_put_queue(struct cfq_queue *cfqq) | 2949 | static void cfq_put_queue(struct cfq_queue *cfqq) |
2950 | { | 2950 | { |
2951 | struct cfq_data *cfqd = cfqq->cfqd; | 2951 | struct cfq_data *cfqd = cfqq->cfqd; |
2952 | struct cfq_group *cfqg; | 2952 | struct cfq_group *cfqg; |
2953 | 2953 | ||
2954 | BUG_ON(cfqq->ref <= 0); | 2954 | BUG_ON(cfqq->ref <= 0); |
2955 | 2955 | ||
2956 | cfqq->ref--; | 2956 | cfqq->ref--; |
2957 | if (cfqq->ref) | 2957 | if (cfqq->ref) |
2958 | return; | 2958 | return; |
2959 | 2959 | ||
2960 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); | 2960 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); |
2961 | BUG_ON(rb_first(&cfqq->sort_list)); | 2961 | BUG_ON(rb_first(&cfqq->sort_list)); |
2962 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); | 2962 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); |
2963 | cfqg = cfqq->cfqg; | 2963 | cfqg = cfqq->cfqg; |
2964 | 2964 | ||
2965 | if (unlikely(cfqd->active_queue == cfqq)) { | 2965 | if (unlikely(cfqd->active_queue == cfqq)) { |
2966 | __cfq_slice_expired(cfqd, cfqq, 0); | 2966 | __cfq_slice_expired(cfqd, cfqq, 0); |
2967 | cfq_schedule_dispatch(cfqd); | 2967 | cfq_schedule_dispatch(cfqd); |
2968 | } | 2968 | } |
2969 | 2969 | ||
2970 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 2970 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
2971 | kmem_cache_free(cfq_pool, cfqq); | 2971 | kmem_cache_free(cfq_pool, cfqq); |
2972 | cfqg_put(cfqg); | 2972 | cfqg_put(cfqg); |
2973 | } | 2973 | } |
2974 | 2974 | ||
2975 | static void cfq_put_cooperator(struct cfq_queue *cfqq) | 2975 | static void cfq_put_cooperator(struct cfq_queue *cfqq) |
2976 | { | 2976 | { |
2977 | struct cfq_queue *__cfqq, *next; | 2977 | struct cfq_queue *__cfqq, *next; |
2978 | 2978 | ||
2979 | /* | 2979 | /* |
2980 | * If this queue was scheduled to merge with another queue, be | 2980 | * If this queue was scheduled to merge with another queue, be |
2981 | * sure to drop the reference taken on that queue (and others in | 2981 | * sure to drop the reference taken on that queue (and others in |
2982 | * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs. | 2982 | * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs. |
2983 | */ | 2983 | */ |
2984 | __cfqq = cfqq->new_cfqq; | 2984 | __cfqq = cfqq->new_cfqq; |
2985 | while (__cfqq) { | 2985 | while (__cfqq) { |
2986 | if (__cfqq == cfqq) { | 2986 | if (__cfqq == cfqq) { |
2987 | WARN(1, "cfqq->new_cfqq loop detected\n"); | 2987 | WARN(1, "cfqq->new_cfqq loop detected\n"); |
2988 | break; | 2988 | break; |
2989 | } | 2989 | } |
2990 | next = __cfqq->new_cfqq; | 2990 | next = __cfqq->new_cfqq; |
2991 | cfq_put_queue(__cfqq); | 2991 | cfq_put_queue(__cfqq); |
2992 | __cfqq = next; | 2992 | __cfqq = next; |
2993 | } | 2993 | } |
2994 | } | 2994 | } |
2995 | 2995 | ||
2996 | static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2996 | static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
2997 | { | 2997 | { |
2998 | if (unlikely(cfqq == cfqd->active_queue)) { | 2998 | if (unlikely(cfqq == cfqd->active_queue)) { |
2999 | __cfq_slice_expired(cfqd, cfqq, 0); | 2999 | __cfq_slice_expired(cfqd, cfqq, 0); |
3000 | cfq_schedule_dispatch(cfqd); | 3000 | cfq_schedule_dispatch(cfqd); |
3001 | } | 3001 | } |
3002 | 3002 | ||
3003 | cfq_put_cooperator(cfqq); | 3003 | cfq_put_cooperator(cfqq); |
3004 | 3004 | ||
3005 | cfq_put_queue(cfqq); | 3005 | cfq_put_queue(cfqq); |
3006 | } | 3006 | } |
3007 | 3007 | ||
3008 | static void cfq_init_icq(struct io_cq *icq) | 3008 | static void cfq_init_icq(struct io_cq *icq) |
3009 | { | 3009 | { |
3010 | struct cfq_io_cq *cic = icq_to_cic(icq); | 3010 | struct cfq_io_cq *cic = icq_to_cic(icq); |
3011 | 3011 | ||
3012 | cic->ttime.last_end_request = jiffies; | 3012 | cic->ttime.last_end_request = jiffies; |
3013 | } | 3013 | } |
3014 | 3014 | ||
3015 | static void cfq_exit_icq(struct io_cq *icq) | 3015 | static void cfq_exit_icq(struct io_cq *icq) |
3016 | { | 3016 | { |
3017 | struct cfq_io_cq *cic = icq_to_cic(icq); | 3017 | struct cfq_io_cq *cic = icq_to_cic(icq); |
3018 | struct cfq_data *cfqd = cic_to_cfqd(cic); | 3018 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
3019 | 3019 | ||
3020 | if (cic->cfqq[BLK_RW_ASYNC]) { | 3020 | if (cic->cfqq[BLK_RW_ASYNC]) { |
3021 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); | 3021 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); |
3022 | cic->cfqq[BLK_RW_ASYNC] = NULL; | 3022 | cic->cfqq[BLK_RW_ASYNC] = NULL; |
3023 | } | 3023 | } |
3024 | 3024 | ||
3025 | if (cic->cfqq[BLK_RW_SYNC]) { | 3025 | if (cic->cfqq[BLK_RW_SYNC]) { |
3026 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]); | 3026 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]); |
3027 | cic->cfqq[BLK_RW_SYNC] = NULL; | 3027 | cic->cfqq[BLK_RW_SYNC] = NULL; |
3028 | } | 3028 | } |
3029 | } | 3029 | } |
3030 | 3030 | ||
3031 | static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic) | 3031 | static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic) |
3032 | { | 3032 | { |
3033 | struct task_struct *tsk = current; | 3033 | struct task_struct *tsk = current; |
3034 | int ioprio_class; | 3034 | int ioprio_class; |
3035 | 3035 | ||
3036 | if (!cfq_cfqq_prio_changed(cfqq)) | 3036 | if (!cfq_cfqq_prio_changed(cfqq)) |
3037 | return; | 3037 | return; |
3038 | 3038 | ||
3039 | ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); | 3039 | ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); |
3040 | switch (ioprio_class) { | 3040 | switch (ioprio_class) { |
3041 | default: | 3041 | default: |
3042 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); | 3042 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); |
3043 | case IOPRIO_CLASS_NONE: | 3043 | case IOPRIO_CLASS_NONE: |
3044 | /* | 3044 | /* |
3045 | * no prio set, inherit CPU scheduling settings | 3045 | * no prio set, inherit CPU scheduling settings |
3046 | */ | 3046 | */ |
3047 | cfqq->ioprio = task_nice_ioprio(tsk); | 3047 | cfqq->ioprio = task_nice_ioprio(tsk); |
3048 | cfqq->ioprio_class = task_nice_ioclass(tsk); | 3048 | cfqq->ioprio_class = task_nice_ioclass(tsk); |
3049 | break; | 3049 | break; |
3050 | case IOPRIO_CLASS_RT: | 3050 | case IOPRIO_CLASS_RT: |
3051 | cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio); | 3051 | cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio); |
3052 | cfqq->ioprio_class = IOPRIO_CLASS_RT; | 3052 | cfqq->ioprio_class = IOPRIO_CLASS_RT; |
3053 | break; | 3053 | break; |
3054 | case IOPRIO_CLASS_BE: | 3054 | case IOPRIO_CLASS_BE: |
3055 | cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio); | 3055 | cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio); |
3056 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | 3056 | cfqq->ioprio_class = IOPRIO_CLASS_BE; |
3057 | break; | 3057 | break; |
3058 | case IOPRIO_CLASS_IDLE: | 3058 | case IOPRIO_CLASS_IDLE: |
3059 | cfqq->ioprio_class = IOPRIO_CLASS_IDLE; | 3059 | cfqq->ioprio_class = IOPRIO_CLASS_IDLE; |
3060 | cfqq->ioprio = 7; | 3060 | cfqq->ioprio = 7; |
3061 | cfq_clear_cfqq_idle_window(cfqq); | 3061 | cfq_clear_cfqq_idle_window(cfqq); |
3062 | break; | 3062 | break; |
3063 | } | 3063 | } |
3064 | 3064 | ||
3065 | /* | 3065 | /* |
3066 | * keep track of original prio settings in case we have to temporarily | 3066 | * keep track of original prio settings in case we have to temporarily |
3067 | * elevate the priority of this queue | 3067 | * elevate the priority of this queue |
3068 | */ | 3068 | */ |
3069 | cfqq->org_ioprio = cfqq->ioprio; | 3069 | cfqq->org_ioprio = cfqq->ioprio; |
3070 | cfq_clear_cfqq_prio_changed(cfqq); | 3070 | cfq_clear_cfqq_prio_changed(cfqq); |
3071 | } | 3071 | } |
3072 | 3072 | ||
3073 | static void check_ioprio_changed(struct cfq_io_cq *cic, struct bio *bio) | 3073 | static void check_ioprio_changed(struct cfq_io_cq *cic, struct bio *bio) |
3074 | { | 3074 | { |
3075 | int ioprio = cic->icq.ioc->ioprio; | 3075 | int ioprio = cic->icq.ioc->ioprio; |
3076 | struct cfq_data *cfqd = cic_to_cfqd(cic); | 3076 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
3077 | struct cfq_queue *cfqq; | 3077 | struct cfq_queue *cfqq; |
3078 | 3078 | ||
3079 | /* | 3079 | /* |
3080 | * Check whether ioprio has changed. The condition may trigger | 3080 | * Check whether ioprio has changed. The condition may trigger |
3081 | * spuriously on a newly created cic but there's no harm. | 3081 | * spuriously on a newly created cic but there's no harm. |
3082 | */ | 3082 | */ |
3083 | if (unlikely(!cfqd) || likely(cic->ioprio == ioprio)) | 3083 | if (unlikely(!cfqd) || likely(cic->ioprio == ioprio)) |
3084 | return; | 3084 | return; |
3085 | 3085 | ||
3086 | cfqq = cic->cfqq[BLK_RW_ASYNC]; | 3086 | cfqq = cic->cfqq[BLK_RW_ASYNC]; |
3087 | if (cfqq) { | 3087 | if (cfqq) { |
3088 | struct cfq_queue *new_cfqq; | 3088 | struct cfq_queue *new_cfqq; |
3089 | new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, bio, | 3089 | new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, bio, |
3090 | GFP_ATOMIC); | 3090 | GFP_ATOMIC); |
3091 | if (new_cfqq) { | 3091 | if (new_cfqq) { |
3092 | cic->cfqq[BLK_RW_ASYNC] = new_cfqq; | 3092 | cic->cfqq[BLK_RW_ASYNC] = new_cfqq; |
3093 | cfq_put_queue(cfqq); | 3093 | cfq_put_queue(cfqq); |
3094 | } | 3094 | } |
3095 | } | 3095 | } |
3096 | 3096 | ||
3097 | cfqq = cic->cfqq[BLK_RW_SYNC]; | 3097 | cfqq = cic->cfqq[BLK_RW_SYNC]; |
3098 | if (cfqq) | 3098 | if (cfqq) |
3099 | cfq_mark_cfqq_prio_changed(cfqq); | 3099 | cfq_mark_cfqq_prio_changed(cfqq); |
3100 | 3100 | ||
3101 | cic->ioprio = ioprio; | 3101 | cic->ioprio = ioprio; |
3102 | } | 3102 | } |
3103 | 3103 | ||
3104 | static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 3104 | static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3105 | pid_t pid, bool is_sync) | 3105 | pid_t pid, bool is_sync) |
3106 | { | 3106 | { |
3107 | RB_CLEAR_NODE(&cfqq->rb_node); | 3107 | RB_CLEAR_NODE(&cfqq->rb_node); |
3108 | RB_CLEAR_NODE(&cfqq->p_node); | 3108 | RB_CLEAR_NODE(&cfqq->p_node); |
3109 | INIT_LIST_HEAD(&cfqq->fifo); | 3109 | INIT_LIST_HEAD(&cfqq->fifo); |
3110 | 3110 | ||
3111 | cfqq->ref = 0; | 3111 | cfqq->ref = 0; |
3112 | cfqq->cfqd = cfqd; | 3112 | cfqq->cfqd = cfqd; |
3113 | 3113 | ||
3114 | cfq_mark_cfqq_prio_changed(cfqq); | 3114 | cfq_mark_cfqq_prio_changed(cfqq); |
3115 | 3115 | ||
3116 | if (is_sync) { | 3116 | if (is_sync) { |
3117 | if (!cfq_class_idle(cfqq)) | 3117 | if (!cfq_class_idle(cfqq)) |
3118 | cfq_mark_cfqq_idle_window(cfqq); | 3118 | cfq_mark_cfqq_idle_window(cfqq); |
3119 | cfq_mark_cfqq_sync(cfqq); | 3119 | cfq_mark_cfqq_sync(cfqq); |
3120 | } | 3120 | } |
3121 | cfqq->pid = pid; | 3121 | cfqq->pid = pid; |
3122 | } | 3122 | } |
3123 | 3123 | ||
3124 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 3124 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3125 | static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) | 3125 | static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) |
3126 | { | 3126 | { |
3127 | struct cfq_data *cfqd = cic_to_cfqd(cic); | 3127 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
3128 | struct cfq_queue *sync_cfqq; | 3128 | struct cfq_queue *sync_cfqq; |
3129 | uint64_t id; | 3129 | uint64_t id; |
3130 | 3130 | ||
3131 | rcu_read_lock(); | 3131 | rcu_read_lock(); |
3132 | id = bio_blkio_cgroup(bio)->id; | 3132 | id = bio_blkio_cgroup(bio)->id; |
3133 | rcu_read_unlock(); | 3133 | rcu_read_unlock(); |
3134 | 3134 | ||
3135 | /* | 3135 | /* |
3136 | * Check whether blkcg has changed. The condition may trigger | 3136 | * Check whether blkcg has changed. The condition may trigger |
3137 | * spuriously on a newly created cic but there's no harm. | 3137 | * spuriously on a newly created cic but there's no harm. |
3138 | */ | 3138 | */ |
3139 | if (unlikely(!cfqd) || likely(cic->blkcg_id == id)) | 3139 | if (unlikely(!cfqd) || likely(cic->blkcg_id == id)) |
3140 | return; | 3140 | return; |
3141 | 3141 | ||
3142 | sync_cfqq = cic_to_cfqq(cic, 1); | 3142 | sync_cfqq = cic_to_cfqq(cic, 1); |
3143 | if (sync_cfqq) { | 3143 | if (sync_cfqq) { |
3144 | /* | 3144 | /* |
3145 | * Drop reference to sync queue. A new sync queue will be | 3145 | * Drop reference to sync queue. A new sync queue will be |
3146 | * assigned in new group upon arrival of a fresh request. | 3146 | * assigned in new group upon arrival of a fresh request. |
3147 | */ | 3147 | */ |
3148 | cfq_log_cfqq(cfqd, sync_cfqq, "changed cgroup"); | 3148 | cfq_log_cfqq(cfqd, sync_cfqq, "changed cgroup"); |
3149 | cic_set_cfqq(cic, NULL, 1); | 3149 | cic_set_cfqq(cic, NULL, 1); |
3150 | cfq_put_queue(sync_cfqq); | 3150 | cfq_put_queue(sync_cfqq); |
3151 | } | 3151 | } |
3152 | 3152 | ||
3153 | cic->blkcg_id = id; | 3153 | cic->blkcg_id = id; |
3154 | } | 3154 | } |
3155 | #else | 3155 | #else |
3156 | static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { } | 3156 | static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { } |
3157 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ | 3157 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ |
3158 | 3158 | ||
3159 | static struct cfq_queue * | 3159 | static struct cfq_queue * |
3160 | cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, | 3160 | cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, |
3161 | struct bio *bio, gfp_t gfp_mask) | 3161 | struct bio *bio, gfp_t gfp_mask) |
3162 | { | 3162 | { |
3163 | struct blkio_cgroup *blkcg; | 3163 | struct blkio_cgroup *blkcg; |
3164 | struct cfq_queue *cfqq, *new_cfqq = NULL; | 3164 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
3165 | struct cfq_group *cfqg; | 3165 | struct cfq_group *cfqg; |
3166 | 3166 | ||
3167 | retry: | 3167 | retry: |
3168 | rcu_read_lock(); | 3168 | rcu_read_lock(); |
3169 | 3169 | ||
3170 | blkcg = bio_blkio_cgroup(bio); | 3170 | blkcg = bio_blkio_cgroup(bio); |
3171 | cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); | 3171 | cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); |
3172 | cfqq = cic_to_cfqq(cic, is_sync); | 3172 | cfqq = cic_to_cfqq(cic, is_sync); |
3173 | 3173 | ||
3174 | /* | 3174 | /* |
3175 | * Always try a new alloc if we fell back to the OOM cfqq | 3175 | * Always try a new alloc if we fell back to the OOM cfqq |
3176 | * originally, since it should just be a temporary situation. | 3176 | * originally, since it should just be a temporary situation. |
3177 | */ | 3177 | */ |
3178 | if (!cfqq || cfqq == &cfqd->oom_cfqq) { | 3178 | if (!cfqq || cfqq == &cfqd->oom_cfqq) { |
3179 | cfqq = NULL; | 3179 | cfqq = NULL; |
3180 | if (new_cfqq) { | 3180 | if (new_cfqq) { |
3181 | cfqq = new_cfqq; | 3181 | cfqq = new_cfqq; |
3182 | new_cfqq = NULL; | 3182 | new_cfqq = NULL; |
3183 | } else if (gfp_mask & __GFP_WAIT) { | 3183 | } else if (gfp_mask & __GFP_WAIT) { |
3184 | rcu_read_unlock(); | 3184 | rcu_read_unlock(); |
3185 | spin_unlock_irq(cfqd->queue->queue_lock); | 3185 | spin_unlock_irq(cfqd->queue->queue_lock); |
3186 | new_cfqq = kmem_cache_alloc_node(cfq_pool, | 3186 | new_cfqq = kmem_cache_alloc_node(cfq_pool, |
3187 | gfp_mask | __GFP_ZERO, | 3187 | gfp_mask | __GFP_ZERO, |
3188 | cfqd->queue->node); | 3188 | cfqd->queue->node); |
3189 | spin_lock_irq(cfqd->queue->queue_lock); | 3189 | spin_lock_irq(cfqd->queue->queue_lock); |
3190 | if (new_cfqq) | 3190 | if (new_cfqq) |
3191 | goto retry; | 3191 | goto retry; |
3192 | } else { | 3192 | } else { |
3193 | cfqq = kmem_cache_alloc_node(cfq_pool, | 3193 | cfqq = kmem_cache_alloc_node(cfq_pool, |
3194 | gfp_mask | __GFP_ZERO, | 3194 | gfp_mask | __GFP_ZERO, |
3195 | cfqd->queue->node); | 3195 | cfqd->queue->node); |
3196 | } | 3196 | } |
3197 | 3197 | ||
3198 | if (cfqq) { | 3198 | if (cfqq) { |
3199 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); | 3199 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); |
3200 | cfq_init_prio_data(cfqq, cic); | 3200 | cfq_init_prio_data(cfqq, cic); |
3201 | cfq_link_cfqq_cfqg(cfqq, cfqg); | 3201 | cfq_link_cfqq_cfqg(cfqq, cfqg); |
3202 | cfq_log_cfqq(cfqd, cfqq, "alloced"); | 3202 | cfq_log_cfqq(cfqd, cfqq, "alloced"); |
3203 | } else | 3203 | } else |
3204 | cfqq = &cfqd->oom_cfqq; | 3204 | cfqq = &cfqd->oom_cfqq; |
3205 | } | 3205 | } |
3206 | 3206 | ||
3207 | if (new_cfqq) | 3207 | if (new_cfqq) |
3208 | kmem_cache_free(cfq_pool, new_cfqq); | 3208 | kmem_cache_free(cfq_pool, new_cfqq); |
3209 | 3209 | ||
3210 | rcu_read_unlock(); | 3210 | rcu_read_unlock(); |
3211 | return cfqq; | 3211 | return cfqq; |
3212 | } | 3212 | } |
3213 | 3213 | ||
3214 | static struct cfq_queue ** | 3214 | static struct cfq_queue ** |
3215 | cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) | 3215 | cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) |
3216 | { | 3216 | { |
3217 | switch (ioprio_class) { | 3217 | switch (ioprio_class) { |
3218 | case IOPRIO_CLASS_RT: | 3218 | case IOPRIO_CLASS_RT: |
3219 | return &cfqd->async_cfqq[0][ioprio]; | 3219 | return &cfqd->async_cfqq[0][ioprio]; |
3220 | case IOPRIO_CLASS_NONE: | 3220 | case IOPRIO_CLASS_NONE: |
3221 | ioprio = IOPRIO_NORM; | 3221 | ioprio = IOPRIO_NORM; |
3222 | /* fall through */ | 3222 | /* fall through */ |
3223 | case IOPRIO_CLASS_BE: | 3223 | case IOPRIO_CLASS_BE: |
3224 | return &cfqd->async_cfqq[1][ioprio]; | 3224 | return &cfqd->async_cfqq[1][ioprio]; |
3225 | case IOPRIO_CLASS_IDLE: | 3225 | case IOPRIO_CLASS_IDLE: |
3226 | return &cfqd->async_idle_cfqq; | 3226 | return &cfqd->async_idle_cfqq; |
3227 | default: | 3227 | default: |
3228 | BUG(); | 3228 | BUG(); |
3229 | } | 3229 | } |
3230 | } | 3230 | } |
3231 | 3231 | ||
3232 | static struct cfq_queue * | 3232 | static struct cfq_queue * |
3233 | cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, | 3233 | cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, |
3234 | struct bio *bio, gfp_t gfp_mask) | 3234 | struct bio *bio, gfp_t gfp_mask) |
3235 | { | 3235 | { |
3236 | const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); | 3236 | const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); |
3237 | const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio); | 3237 | const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio); |
3238 | struct cfq_queue **async_cfqq = NULL; | 3238 | struct cfq_queue **async_cfqq = NULL; |
3239 | struct cfq_queue *cfqq = NULL; | 3239 | struct cfq_queue *cfqq = NULL; |
3240 | 3240 | ||
3241 | if (!is_sync) { | 3241 | if (!is_sync) { |
3242 | async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); | 3242 | async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); |
3243 | cfqq = *async_cfqq; | 3243 | cfqq = *async_cfqq; |
3244 | } | 3244 | } |
3245 | 3245 | ||
3246 | if (!cfqq) | 3246 | if (!cfqq) |
3247 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, bio, gfp_mask); | 3247 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, bio, gfp_mask); |
3248 | 3248 | ||
3249 | /* | 3249 | /* |
3250 | * pin the queue now that it's allocated, scheduler exit will prune it | 3250 | * pin the queue now that it's allocated, scheduler exit will prune it |
3251 | */ | 3251 | */ |
3252 | if (!is_sync && !(*async_cfqq)) { | 3252 | if (!is_sync && !(*async_cfqq)) { |
3253 | cfqq->ref++; | 3253 | cfqq->ref++; |
3254 | *async_cfqq = cfqq; | 3254 | *async_cfqq = cfqq; |
3255 | } | 3255 | } |
3256 | 3256 | ||
3257 | cfqq->ref++; | 3257 | cfqq->ref++; |
3258 | return cfqq; | 3258 | return cfqq; |
3259 | } | 3259 | } |
3260 | 3260 | ||
3261 | static void | 3261 | static void |
3262 | __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) | 3262 | __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) |
3263 | { | 3263 | { |
3264 | unsigned long elapsed = jiffies - ttime->last_end_request; | 3264 | unsigned long elapsed = jiffies - ttime->last_end_request; |
3265 | elapsed = min(elapsed, 2UL * slice_idle); | 3265 | elapsed = min(elapsed, 2UL * slice_idle); |
3266 | 3266 | ||
3267 | ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; | 3267 | ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; |
3268 | ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8; | 3268 | ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8; |
3269 | ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples; | 3269 | ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples; |
3270 | } | 3270 | } |
3271 | 3271 | ||
3272 | static void | 3272 | static void |
3273 | cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 3273 | cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3274 | struct cfq_io_cq *cic) | 3274 | struct cfq_io_cq *cic) |
3275 | { | 3275 | { |
3276 | if (cfq_cfqq_sync(cfqq)) { | 3276 | if (cfq_cfqq_sync(cfqq)) { |
3277 | __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle); | 3277 | __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle); |
3278 | __cfq_update_io_thinktime(&cfqq->service_tree->ttime, | 3278 | __cfq_update_io_thinktime(&cfqq->service_tree->ttime, |
3279 | cfqd->cfq_slice_idle); | 3279 | cfqd->cfq_slice_idle); |
3280 | } | 3280 | } |
3281 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 3281 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3282 | __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle); | 3282 | __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle); |
3283 | #endif | 3283 | #endif |
3284 | } | 3284 | } |
3285 | 3285 | ||
3286 | static void | 3286 | static void |
3287 | cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 3287 | cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3288 | struct request *rq) | 3288 | struct request *rq) |
3289 | { | 3289 | { |
3290 | sector_t sdist = 0; | 3290 | sector_t sdist = 0; |
3291 | sector_t n_sec = blk_rq_sectors(rq); | 3291 | sector_t n_sec = blk_rq_sectors(rq); |
3292 | if (cfqq->last_request_pos) { | 3292 | if (cfqq->last_request_pos) { |
3293 | if (cfqq->last_request_pos < blk_rq_pos(rq)) | 3293 | if (cfqq->last_request_pos < blk_rq_pos(rq)) |
3294 | sdist = blk_rq_pos(rq) - cfqq->last_request_pos; | 3294 | sdist = blk_rq_pos(rq) - cfqq->last_request_pos; |
3295 | else | 3295 | else |
3296 | sdist = cfqq->last_request_pos - blk_rq_pos(rq); | 3296 | sdist = cfqq->last_request_pos - blk_rq_pos(rq); |
3297 | } | 3297 | } |
3298 | 3298 | ||
3299 | cfqq->seek_history <<= 1; | 3299 | cfqq->seek_history <<= 1; |
3300 | if (blk_queue_nonrot(cfqd->queue)) | 3300 | if (blk_queue_nonrot(cfqd->queue)) |
3301 | cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT); | 3301 | cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT); |
3302 | else | 3302 | else |
3303 | cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); | 3303 | cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); |
3304 | } | 3304 | } |
3305 | 3305 | ||
3306 | /* | 3306 | /* |
3307 | * Disable idle window if the process thinks too long or seeks so much that | 3307 | * Disable idle window if the process thinks too long or seeks so much that |
3308 | * it doesn't matter | 3308 | * it doesn't matter |
3309 | */ | 3309 | */ |
3310 | static void | 3310 | static void |
3311 | cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 3311 | cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3312 | struct cfq_io_cq *cic) | 3312 | struct cfq_io_cq *cic) |
3313 | { | 3313 | { |
3314 | int old_idle, enable_idle; | 3314 | int old_idle, enable_idle; |
3315 | 3315 | ||
3316 | /* | 3316 | /* |
3317 | * Don't idle for async or idle io prio class | 3317 | * Don't idle for async or idle io prio class |
3318 | */ | 3318 | */ |
3319 | if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) | 3319 | if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) |
3320 | return; | 3320 | return; |
3321 | 3321 | ||
3322 | enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); | 3322 | enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); |
3323 | 3323 | ||
3324 | if (cfqq->queued[0] + cfqq->queued[1] >= 4) | 3324 | if (cfqq->queued[0] + cfqq->queued[1] >= 4) |
3325 | cfq_mark_cfqq_deep(cfqq); | 3325 | cfq_mark_cfqq_deep(cfqq); |
3326 | 3326 | ||
3327 | if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) | 3327 | if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) |
3328 | enable_idle = 0; | 3328 | enable_idle = 0; |
3329 | else if (!atomic_read(&cic->icq.ioc->active_ref) || | 3329 | else if (!atomic_read(&cic->icq.ioc->active_ref) || |
3330 | !cfqd->cfq_slice_idle || | 3330 | !cfqd->cfq_slice_idle || |
3331 | (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) | 3331 | (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) |
3332 | enable_idle = 0; | 3332 | enable_idle = 0; |
3333 | else if (sample_valid(cic->ttime.ttime_samples)) { | 3333 | else if (sample_valid(cic->ttime.ttime_samples)) { |
3334 | if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle) | 3334 | if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle) |
3335 | enable_idle = 0; | 3335 | enable_idle = 0; |
3336 | else | 3336 | else |
3337 | enable_idle = 1; | 3337 | enable_idle = 1; |
3338 | } | 3338 | } |
3339 | 3339 | ||
3340 | if (old_idle != enable_idle) { | 3340 | if (old_idle != enable_idle) { |
3341 | cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle); | 3341 | cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle); |
3342 | if (enable_idle) | 3342 | if (enable_idle) |
3343 | cfq_mark_cfqq_idle_window(cfqq); | 3343 | cfq_mark_cfqq_idle_window(cfqq); |
3344 | else | 3344 | else |
3345 | cfq_clear_cfqq_idle_window(cfqq); | 3345 | cfq_clear_cfqq_idle_window(cfqq); |
3346 | } | 3346 | } |
3347 | } | 3347 | } |
3348 | 3348 | ||
3349 | /* | 3349 | /* |
3350 | * Check if new_cfqq should preempt the currently active queue. Return 0 for | 3350 | * Check if new_cfqq should preempt the currently active queue. Return 0 for |
3351 | * no or if we aren't sure, a 1 will cause a preempt. | 3351 | * no or if we aren't sure, a 1 will cause a preempt. |
3352 | */ | 3352 | */ |
3353 | static bool | 3353 | static bool |
3354 | cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | 3354 | cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, |
3355 | struct request *rq) | 3355 | struct request *rq) |
3356 | { | 3356 | { |
3357 | struct cfq_queue *cfqq; | 3357 | struct cfq_queue *cfqq; |
3358 | 3358 | ||
3359 | cfqq = cfqd->active_queue; | 3359 | cfqq = cfqd->active_queue; |
3360 | if (!cfqq) | 3360 | if (!cfqq) |
3361 | return false; | 3361 | return false; |
3362 | 3362 | ||
3363 | if (cfq_class_idle(new_cfqq)) | 3363 | if (cfq_class_idle(new_cfqq)) |
3364 | return false; | 3364 | return false; |
3365 | 3365 | ||
3366 | if (cfq_class_idle(cfqq)) | 3366 | if (cfq_class_idle(cfqq)) |
3367 | return true; | 3367 | return true; |
3368 | 3368 | ||
3369 | /* | 3369 | /* |
3370 | * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice. | 3370 | * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice. |
3371 | */ | 3371 | */ |
3372 | if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq)) | 3372 | if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq)) |
3373 | return false; | 3373 | return false; |
3374 | 3374 | ||
3375 | /* | 3375 | /* |
3376 | * if the new request is sync, but the currently running queue is | 3376 | * if the new request is sync, but the currently running queue is |
3377 | * not, let the sync request have priority. | 3377 | * not, let the sync request have priority. |
3378 | */ | 3378 | */ |
3379 | if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) | 3379 | if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) |
3380 | return true; | 3380 | return true; |
3381 | 3381 | ||
3382 | if (new_cfqq->cfqg != cfqq->cfqg) | 3382 | if (new_cfqq->cfqg != cfqq->cfqg) |
3383 | return false; | 3383 | return false; |
3384 | 3384 | ||
3385 | if (cfq_slice_used(cfqq)) | 3385 | if (cfq_slice_used(cfqq)) |
3386 | return true; | 3386 | return true; |
3387 | 3387 | ||
3388 | /* Allow preemption only if we are idling on sync-noidle tree */ | 3388 | /* Allow preemption only if we are idling on sync-noidle tree */ |
3389 | if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && | 3389 | if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && |
3390 | cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD && | 3390 | cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD && |
3391 | new_cfqq->service_tree->count == 2 && | 3391 | new_cfqq->service_tree->count == 2 && |
3392 | RB_EMPTY_ROOT(&cfqq->sort_list)) | 3392 | RB_EMPTY_ROOT(&cfqq->sort_list)) |
3393 | return true; | 3393 | return true; |
3394 | 3394 | ||
3395 | /* | 3395 | /* |
3396 | * So both queues are sync. Let the new request get disk time if | 3396 | * So both queues are sync. Let the new request get disk time if |
3397 | * it's a metadata request and the current queue is doing regular IO. | 3397 | * it's a metadata request and the current queue is doing regular IO. |
3398 | */ | 3398 | */ |
3399 | if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending) | 3399 | if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending) |
3400 | return true; | 3400 | return true; |
3401 | 3401 | ||
3402 | /* | 3402 | /* |
3403 | * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. | 3403 | * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. |
3404 | */ | 3404 | */ |
3405 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) | 3405 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) |
3406 | return true; | 3406 | return true; |
3407 | 3407 | ||
3408 | /* An idle queue should not be idle now for some reason */ | 3408 | /* An idle queue should not be idle now for some reason */ |
3409 | if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq)) | 3409 | if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq)) |
3410 | return true; | 3410 | return true; |
3411 | 3411 | ||
3412 | if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) | 3412 | if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) |
3413 | return false; | 3413 | return false; |
3414 | 3414 | ||
3415 | /* | 3415 | /* |
3416 | * if this request is as-good as one we would expect from the | 3416 | * if this request is as-good as one we would expect from the |
3417 | * current cfqq, let it preempt | 3417 | * current cfqq, let it preempt |
3418 | */ | 3418 | */ |
3419 | if (cfq_rq_close(cfqd, cfqq, rq)) | 3419 | if (cfq_rq_close(cfqd, cfqq, rq)) |
3420 | return true; | 3420 | return true; |
3421 | 3421 | ||
3422 | return false; | 3422 | return false; |
3423 | } | 3423 | } |
3424 | 3424 | ||
3425 | /* | 3425 | /* |
3426 | * cfqq preempts the active queue. if we allowed preempt with no slice left, | 3426 | * cfqq preempts the active queue. if we allowed preempt with no slice left, |
3427 | * let it have half of its nominal slice. | 3427 | * let it have half of its nominal slice. |
3428 | */ | 3428 | */ |
3429 | static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 3429 | static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
3430 | { | 3430 | { |
3431 | enum wl_type_t old_type = cfqq_type(cfqd->active_queue); | 3431 | enum wl_type_t old_type = cfqq_type(cfqd->active_queue); |
3432 | 3432 | ||
3433 | cfq_log_cfqq(cfqd, cfqq, "preempt"); | 3433 | cfq_log_cfqq(cfqd, cfqq, "preempt"); |
3434 | cfq_slice_expired(cfqd, 1); | 3434 | cfq_slice_expired(cfqd, 1); |
3435 | 3435 | ||
3436 | /* | 3436 | /* |
3437 | * workload type is changed, don't save slice, otherwise preempt | 3437 | * workload type is changed, don't save slice, otherwise preempt |
3438 | * doesn't happen | 3438 | * doesn't happen |
3439 | */ | 3439 | */ |
3440 | if (old_type != cfqq_type(cfqq)) | 3440 | if (old_type != cfqq_type(cfqq)) |
3441 | cfqq->cfqg->saved_workload_slice = 0; | 3441 | cfqq->cfqg->saved_workload_slice = 0; |
3442 | 3442 | ||
3443 | /* | 3443 | /* |
3444 | * Put the new queue at the front of the of the current list, | 3444 | * Put the new queue at the front of the of the current list, |
3445 | * so we know that it will be selected next. | 3445 | * so we know that it will be selected next. |
3446 | */ | 3446 | */ |
3447 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); | 3447 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); |
3448 | 3448 | ||
3449 | cfq_service_tree_add(cfqd, cfqq, 1); | 3449 | cfq_service_tree_add(cfqd, cfqq, 1); |
3450 | 3450 | ||
3451 | cfqq->slice_end = 0; | 3451 | cfqq->slice_end = 0; |
3452 | cfq_mark_cfqq_slice_new(cfqq); | 3452 | cfq_mark_cfqq_slice_new(cfqq); |
3453 | } | 3453 | } |
3454 | 3454 | ||
3455 | /* | 3455 | /* |
3456 | * Called when a new fs request (rq) is added (to cfqq). Check if there's | 3456 | * Called when a new fs request (rq) is added (to cfqq). Check if there's |
3457 | * something we should do about it | 3457 | * something we should do about it |
3458 | */ | 3458 | */ |
3459 | static void | 3459 | static void |
3460 | cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 3460 | cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3461 | struct request *rq) | 3461 | struct request *rq) |
3462 | { | 3462 | { |
3463 | struct cfq_io_cq *cic = RQ_CIC(rq); | 3463 | struct cfq_io_cq *cic = RQ_CIC(rq); |
3464 | 3464 | ||
3465 | cfqd->rq_queued++; | 3465 | cfqd->rq_queued++; |
3466 | if (rq->cmd_flags & REQ_PRIO) | 3466 | if (rq->cmd_flags & REQ_PRIO) |
3467 | cfqq->prio_pending++; | 3467 | cfqq->prio_pending++; |
3468 | 3468 | ||
3469 | cfq_update_io_thinktime(cfqd, cfqq, cic); | 3469 | cfq_update_io_thinktime(cfqd, cfqq, cic); |
3470 | cfq_update_io_seektime(cfqd, cfqq, rq); | 3470 | cfq_update_io_seektime(cfqd, cfqq, rq); |
3471 | cfq_update_idle_window(cfqd, cfqq, cic); | 3471 | cfq_update_idle_window(cfqd, cfqq, cic); |
3472 | 3472 | ||
3473 | cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); | 3473 | cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); |
3474 | 3474 | ||
3475 | if (cfqq == cfqd->active_queue) { | 3475 | if (cfqq == cfqd->active_queue) { |
3476 | /* | 3476 | /* |
3477 | * Remember that we saw a request from this process, but | 3477 | * Remember that we saw a request from this process, but |
3478 | * don't start queuing just yet. Otherwise we risk seeing lots | 3478 | * don't start queuing just yet. Otherwise we risk seeing lots |
3479 | * of tiny requests, because we disrupt the normal plugging | 3479 | * of tiny requests, because we disrupt the normal plugging |
3480 | * and merging. If the request is already larger than a single | 3480 | * and merging. If the request is already larger than a single |
3481 | * page, let it rip immediately. For that case we assume that | 3481 | * page, let it rip immediately. For that case we assume that |
3482 | * merging is already done. Ditto for a busy system that | 3482 | * merging is already done. Ditto for a busy system that |
3483 | * has other work pending, don't risk delaying until the | 3483 | * has other work pending, don't risk delaying until the |
3484 | * idle timer unplug to continue working. | 3484 | * idle timer unplug to continue working. |
3485 | */ | 3485 | */ |
3486 | if (cfq_cfqq_wait_request(cfqq)) { | 3486 | if (cfq_cfqq_wait_request(cfqq)) { |
3487 | if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || | 3487 | if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || |
3488 | cfqd->busy_queues > 1) { | 3488 | cfqd->busy_queues > 1) { |
3489 | cfq_del_timer(cfqd, cfqq); | 3489 | cfq_del_timer(cfqd, cfqq); |
3490 | cfq_clear_cfqq_wait_request(cfqq); | 3490 | cfq_clear_cfqq_wait_request(cfqq); |
3491 | __blk_run_queue(cfqd->queue); | 3491 | __blk_run_queue(cfqd->queue); |
3492 | } else { | 3492 | } else { |
3493 | cfqg_stats_update_idle_time(cfqq->cfqg); | 3493 | cfqg_stats_update_idle_time(cfqq->cfqg); |
3494 | cfq_mark_cfqq_must_dispatch(cfqq); | 3494 | cfq_mark_cfqq_must_dispatch(cfqq); |
3495 | } | 3495 | } |
3496 | } | 3496 | } |
3497 | } else if (cfq_should_preempt(cfqd, cfqq, rq)) { | 3497 | } else if (cfq_should_preempt(cfqd, cfqq, rq)) { |
3498 | /* | 3498 | /* |
3499 | * not the active queue - expire current slice if it is | 3499 | * not the active queue - expire current slice if it is |
3500 | * idle and has expired it's mean thinktime or this new queue | 3500 | * idle and has expired it's mean thinktime or this new queue |
3501 | * has some old slice time left and is of higher priority or | 3501 | * has some old slice time left and is of higher priority or |
3502 | * this new queue is RT and the current one is BE | 3502 | * this new queue is RT and the current one is BE |
3503 | */ | 3503 | */ |
3504 | cfq_preempt_queue(cfqd, cfqq); | 3504 | cfq_preempt_queue(cfqd, cfqq); |
3505 | __blk_run_queue(cfqd->queue); | 3505 | __blk_run_queue(cfqd->queue); |
3506 | } | 3506 | } |
3507 | } | 3507 | } |
3508 | 3508 | ||
3509 | static void cfq_insert_request(struct request_queue *q, struct request *rq) | 3509 | static void cfq_insert_request(struct request_queue *q, struct request *rq) |
3510 | { | 3510 | { |
3511 | struct cfq_data *cfqd = q->elevator->elevator_data; | 3511 | struct cfq_data *cfqd = q->elevator->elevator_data; |
3512 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 3512 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
3513 | 3513 | ||
3514 | cfq_log_cfqq(cfqd, cfqq, "insert_request"); | 3514 | cfq_log_cfqq(cfqd, cfqq, "insert_request"); |
3515 | cfq_init_prio_data(cfqq, RQ_CIC(rq)); | 3515 | cfq_init_prio_data(cfqq, RQ_CIC(rq)); |
3516 | 3516 | ||
3517 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); | 3517 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); |
3518 | list_add_tail(&rq->queuelist, &cfqq->fifo); | 3518 | list_add_tail(&rq->queuelist, &cfqq->fifo); |
3519 | cfq_add_rq_rb(rq); | 3519 | cfq_add_rq_rb(rq); |
3520 | cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, | 3520 | cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, |
3521 | rq->cmd_flags); | 3521 | rq->cmd_flags); |
3522 | cfq_rq_enqueued(cfqd, cfqq, rq); | 3522 | cfq_rq_enqueued(cfqd, cfqq, rq); |
3523 | } | 3523 | } |
3524 | 3524 | ||
3525 | /* | 3525 | /* |
3526 | * Update hw_tag based on peak queue depth over 50 samples under | 3526 | * Update hw_tag based on peak queue depth over 50 samples under |
3527 | * sufficient load. | 3527 | * sufficient load. |
3528 | */ | 3528 | */ |
3529 | static void cfq_update_hw_tag(struct cfq_data *cfqd) | 3529 | static void cfq_update_hw_tag(struct cfq_data *cfqd) |
3530 | { | 3530 | { |
3531 | struct cfq_queue *cfqq = cfqd->active_queue; | 3531 | struct cfq_queue *cfqq = cfqd->active_queue; |
3532 | 3532 | ||
3533 | if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth) | 3533 | if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth) |
3534 | cfqd->hw_tag_est_depth = cfqd->rq_in_driver; | 3534 | cfqd->hw_tag_est_depth = cfqd->rq_in_driver; |
3535 | 3535 | ||
3536 | if (cfqd->hw_tag == 1) | 3536 | if (cfqd->hw_tag == 1) |
3537 | return; | 3537 | return; |
3538 | 3538 | ||
3539 | if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && | 3539 | if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && |
3540 | cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) | 3540 | cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) |
3541 | return; | 3541 | return; |
3542 | 3542 | ||
3543 | /* | 3543 | /* |
3544 | * If active queue hasn't enough requests and can idle, cfq might not | 3544 | * If active queue hasn't enough requests and can idle, cfq might not |
3545 | * dispatch sufficient requests to hardware. Don't zero hw_tag in this | 3545 | * dispatch sufficient requests to hardware. Don't zero hw_tag in this |
3546 | * case | 3546 | * case |
3547 | */ | 3547 | */ |
3548 | if (cfqq && cfq_cfqq_idle_window(cfqq) && | 3548 | if (cfqq && cfq_cfqq_idle_window(cfqq) && |
3549 | cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] < | 3549 | cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] < |
3550 | CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN) | 3550 | CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN) |
3551 | return; | 3551 | return; |
3552 | 3552 | ||
3553 | if (cfqd->hw_tag_samples++ < 50) | 3553 | if (cfqd->hw_tag_samples++ < 50) |
3554 | return; | 3554 | return; |
3555 | 3555 | ||
3556 | if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN) | 3556 | if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN) |
3557 | cfqd->hw_tag = 1; | 3557 | cfqd->hw_tag = 1; |
3558 | else | 3558 | else |
3559 | cfqd->hw_tag = 0; | 3559 | cfqd->hw_tag = 0; |
3560 | } | 3560 | } |
3561 | 3561 | ||
3562 | static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 3562 | static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
3563 | { | 3563 | { |
3564 | struct cfq_io_cq *cic = cfqd->active_cic; | 3564 | struct cfq_io_cq *cic = cfqd->active_cic; |
3565 | 3565 | ||
3566 | /* If the queue already has requests, don't wait */ | 3566 | /* If the queue already has requests, don't wait */ |
3567 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) | 3567 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) |
3568 | return false; | 3568 | return false; |
3569 | 3569 | ||
3570 | /* If there are other queues in the group, don't wait */ | 3570 | /* If there are other queues in the group, don't wait */ |
3571 | if (cfqq->cfqg->nr_cfqq > 1) | 3571 | if (cfqq->cfqg->nr_cfqq > 1) |
3572 | return false; | 3572 | return false; |
3573 | 3573 | ||
3574 | /* the only queue in the group, but think time is big */ | 3574 | /* the only queue in the group, but think time is big */ |
3575 | if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) | 3575 | if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) |
3576 | return false; | 3576 | return false; |
3577 | 3577 | ||
3578 | if (cfq_slice_used(cfqq)) | 3578 | if (cfq_slice_used(cfqq)) |
3579 | return true; | 3579 | return true; |
3580 | 3580 | ||
3581 | /* if slice left is less than think time, wait busy */ | 3581 | /* if slice left is less than think time, wait busy */ |
3582 | if (cic && sample_valid(cic->ttime.ttime_samples) | 3582 | if (cic && sample_valid(cic->ttime.ttime_samples) |
3583 | && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) | 3583 | && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) |
3584 | return true; | 3584 | return true; |
3585 | 3585 | ||
3586 | /* | 3586 | /* |
3587 | * If think times is less than a jiffy than ttime_mean=0 and above | 3587 | * If think times is less than a jiffy than ttime_mean=0 and above |
3588 | * will not be true. It might happen that slice has not expired yet | 3588 | * will not be true. It might happen that slice has not expired yet |
3589 | * but will expire soon (4-5 ns) during select_queue(). To cover the | 3589 | * but will expire soon (4-5 ns) during select_queue(). To cover the |
3590 | * case where think time is less than a jiffy, mark the queue wait | 3590 | * case where think time is less than a jiffy, mark the queue wait |
3591 | * busy if only 1 jiffy is left in the slice. | 3591 | * busy if only 1 jiffy is left in the slice. |
3592 | */ | 3592 | */ |
3593 | if (cfqq->slice_end - jiffies == 1) | 3593 | if (cfqq->slice_end - jiffies == 1) |
3594 | return true; | 3594 | return true; |
3595 | 3595 | ||
3596 | return false; | 3596 | return false; |
3597 | } | 3597 | } |
3598 | 3598 | ||
3599 | static void cfq_completed_request(struct request_queue *q, struct request *rq) | 3599 | static void cfq_completed_request(struct request_queue *q, struct request *rq) |
3600 | { | 3600 | { |
3601 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 3601 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
3602 | struct cfq_data *cfqd = cfqq->cfqd; | 3602 | struct cfq_data *cfqd = cfqq->cfqd; |
3603 | const int sync = rq_is_sync(rq); | 3603 | const int sync = rq_is_sync(rq); |
3604 | unsigned long now; | 3604 | unsigned long now; |
3605 | 3605 | ||
3606 | now = jiffies; | 3606 | now = jiffies; |
3607 | cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", | 3607 | cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", |
3608 | !!(rq->cmd_flags & REQ_NOIDLE)); | 3608 | !!(rq->cmd_flags & REQ_NOIDLE)); |
3609 | 3609 | ||
3610 | cfq_update_hw_tag(cfqd); | 3610 | cfq_update_hw_tag(cfqd); |
3611 | 3611 | ||
3612 | WARN_ON(!cfqd->rq_in_driver); | 3612 | WARN_ON(!cfqd->rq_in_driver); |
3613 | WARN_ON(!cfqq->dispatched); | 3613 | WARN_ON(!cfqq->dispatched); |
3614 | cfqd->rq_in_driver--; | 3614 | cfqd->rq_in_driver--; |
3615 | cfqq->dispatched--; | 3615 | cfqq->dispatched--; |
3616 | (RQ_CFQG(rq))->dispatched--; | 3616 | (RQ_CFQG(rq))->dispatched--; |
3617 | cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq), | 3617 | cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq), |
3618 | rq_io_start_time_ns(rq), rq->cmd_flags); | 3618 | rq_io_start_time_ns(rq), rq->cmd_flags); |
3619 | 3619 | ||
3620 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; | 3620 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; |
3621 | 3621 | ||
3622 | if (sync) { | 3622 | if (sync) { |
3623 | struct cfq_rb_root *service_tree; | 3623 | struct cfq_rb_root *service_tree; |
3624 | 3624 | ||
3625 | RQ_CIC(rq)->ttime.last_end_request = now; | 3625 | RQ_CIC(rq)->ttime.last_end_request = now; |
3626 | 3626 | ||
3627 | if (cfq_cfqq_on_rr(cfqq)) | 3627 | if (cfq_cfqq_on_rr(cfqq)) |
3628 | service_tree = cfqq->service_tree; | 3628 | service_tree = cfqq->service_tree; |
3629 | else | 3629 | else |
3630 | service_tree = service_tree_for(cfqq->cfqg, | 3630 | service_tree = service_tree_for(cfqq->cfqg, |
3631 | cfqq_prio(cfqq), cfqq_type(cfqq)); | 3631 | cfqq_prio(cfqq), cfqq_type(cfqq)); |
3632 | service_tree->ttime.last_end_request = now; | 3632 | service_tree->ttime.last_end_request = now; |
3633 | if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) | 3633 | if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) |
3634 | cfqd->last_delayed_sync = now; | 3634 | cfqd->last_delayed_sync = now; |
3635 | } | 3635 | } |
3636 | 3636 | ||
3637 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 3637 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3638 | cfqq->cfqg->ttime.last_end_request = now; | 3638 | cfqq->cfqg->ttime.last_end_request = now; |
3639 | #endif | 3639 | #endif |
3640 | 3640 | ||
3641 | /* | 3641 | /* |
3642 | * If this is the active queue, check if it needs to be expired, | 3642 | * If this is the active queue, check if it needs to be expired, |
3643 | * or if we want to idle in case it has no pending requests. | 3643 | * or if we want to idle in case it has no pending requests. |
3644 | */ | 3644 | */ |
3645 | if (cfqd->active_queue == cfqq) { | 3645 | if (cfqd->active_queue == cfqq) { |
3646 | const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list); | 3646 | const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list); |
3647 | 3647 | ||
3648 | if (cfq_cfqq_slice_new(cfqq)) { | 3648 | if (cfq_cfqq_slice_new(cfqq)) { |
3649 | cfq_set_prio_slice(cfqd, cfqq); | 3649 | cfq_set_prio_slice(cfqd, cfqq); |
3650 | cfq_clear_cfqq_slice_new(cfqq); | 3650 | cfq_clear_cfqq_slice_new(cfqq); |
3651 | } | 3651 | } |
3652 | 3652 | ||
3653 | /* | 3653 | /* |
3654 | * Should we wait for next request to come in before we expire | 3654 | * Should we wait for next request to come in before we expire |
3655 | * the queue. | 3655 | * the queue. |
3656 | */ | 3656 | */ |
3657 | if (cfq_should_wait_busy(cfqd, cfqq)) { | 3657 | if (cfq_should_wait_busy(cfqd, cfqq)) { |
3658 | unsigned long extend_sl = cfqd->cfq_slice_idle; | 3658 | unsigned long extend_sl = cfqd->cfq_slice_idle; |
3659 | if (!cfqd->cfq_slice_idle) | 3659 | if (!cfqd->cfq_slice_idle) |
3660 | extend_sl = cfqd->cfq_group_idle; | 3660 | extend_sl = cfqd->cfq_group_idle; |
3661 | cfqq->slice_end = jiffies + extend_sl; | 3661 | cfqq->slice_end = jiffies + extend_sl; |
3662 | cfq_mark_cfqq_wait_busy(cfqq); | 3662 | cfq_mark_cfqq_wait_busy(cfqq); |
3663 | cfq_log_cfqq(cfqd, cfqq, "will busy wait"); | 3663 | cfq_log_cfqq(cfqd, cfqq, "will busy wait"); |
3664 | } | 3664 | } |
3665 | 3665 | ||
3666 | /* | 3666 | /* |
3667 | * Idling is not enabled on: | 3667 | * Idling is not enabled on: |
3668 | * - expired queues | 3668 | * - expired queues |
3669 | * - idle-priority queues | 3669 | * - idle-priority queues |
3670 | * - async queues | 3670 | * - async queues |
3671 | * - queues with still some requests queued | 3671 | * - queues with still some requests queued |
3672 | * - when there is a close cooperator | 3672 | * - when there is a close cooperator |
3673 | */ | 3673 | */ |
3674 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) | 3674 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) |
3675 | cfq_slice_expired(cfqd, 1); | 3675 | cfq_slice_expired(cfqd, 1); |
3676 | else if (sync && cfqq_empty && | 3676 | else if (sync && cfqq_empty && |
3677 | !cfq_close_cooperator(cfqd, cfqq)) { | 3677 | !cfq_close_cooperator(cfqd, cfqq)) { |
3678 | cfq_arm_slice_timer(cfqd); | 3678 | cfq_arm_slice_timer(cfqd); |
3679 | } | 3679 | } |
3680 | } | 3680 | } |
3681 | 3681 | ||
3682 | if (!cfqd->rq_in_driver) | 3682 | if (!cfqd->rq_in_driver) |
3683 | cfq_schedule_dispatch(cfqd); | 3683 | cfq_schedule_dispatch(cfqd); |
3684 | } | 3684 | } |
3685 | 3685 | ||
3686 | static inline int __cfq_may_queue(struct cfq_queue *cfqq) | 3686 | static inline int __cfq_may_queue(struct cfq_queue *cfqq) |
3687 | { | 3687 | { |
3688 | if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { | 3688 | if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { |
3689 | cfq_mark_cfqq_must_alloc_slice(cfqq); | 3689 | cfq_mark_cfqq_must_alloc_slice(cfqq); |
3690 | return ELV_MQUEUE_MUST; | 3690 | return ELV_MQUEUE_MUST; |
3691 | } | 3691 | } |
3692 | 3692 | ||
3693 | return ELV_MQUEUE_MAY; | 3693 | return ELV_MQUEUE_MAY; |
3694 | } | 3694 | } |
3695 | 3695 | ||
3696 | static int cfq_may_queue(struct request_queue *q, int rw) | 3696 | static int cfq_may_queue(struct request_queue *q, int rw) |
3697 | { | 3697 | { |
3698 | struct cfq_data *cfqd = q->elevator->elevator_data; | 3698 | struct cfq_data *cfqd = q->elevator->elevator_data; |
3699 | struct task_struct *tsk = current; | 3699 | struct task_struct *tsk = current; |
3700 | struct cfq_io_cq *cic; | 3700 | struct cfq_io_cq *cic; |
3701 | struct cfq_queue *cfqq; | 3701 | struct cfq_queue *cfqq; |
3702 | 3702 | ||
3703 | /* | 3703 | /* |
3704 | * don't force setup of a queue from here, as a call to may_queue | 3704 | * don't force setup of a queue from here, as a call to may_queue |
3705 | * does not necessarily imply that a request actually will be queued. | 3705 | * does not necessarily imply that a request actually will be queued. |
3706 | * so just lookup a possibly existing queue, or return 'may queue' | 3706 | * so just lookup a possibly existing queue, or return 'may queue' |
3707 | * if that fails | 3707 | * if that fails |
3708 | */ | 3708 | */ |
3709 | cic = cfq_cic_lookup(cfqd, tsk->io_context); | 3709 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
3710 | if (!cic) | 3710 | if (!cic) |
3711 | return ELV_MQUEUE_MAY; | 3711 | return ELV_MQUEUE_MAY; |
3712 | 3712 | ||
3713 | cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); | 3713 | cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); |
3714 | if (cfqq) { | 3714 | if (cfqq) { |
3715 | cfq_init_prio_data(cfqq, cic); | 3715 | cfq_init_prio_data(cfqq, cic); |
3716 | 3716 | ||
3717 | return __cfq_may_queue(cfqq); | 3717 | return __cfq_may_queue(cfqq); |
3718 | } | 3718 | } |
3719 | 3719 | ||
3720 | return ELV_MQUEUE_MAY; | 3720 | return ELV_MQUEUE_MAY; |
3721 | } | 3721 | } |
3722 | 3722 | ||
3723 | /* | 3723 | /* |
3724 | * queue lock held here | 3724 | * queue lock held here |
3725 | */ | 3725 | */ |
3726 | static void cfq_put_request(struct request *rq) | 3726 | static void cfq_put_request(struct request *rq) |
3727 | { | 3727 | { |
3728 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 3728 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
3729 | 3729 | ||
3730 | if (cfqq) { | 3730 | if (cfqq) { |
3731 | const int rw = rq_data_dir(rq); | 3731 | const int rw = rq_data_dir(rq); |
3732 | 3732 | ||
3733 | BUG_ON(!cfqq->allocated[rw]); | 3733 | BUG_ON(!cfqq->allocated[rw]); |
3734 | cfqq->allocated[rw]--; | 3734 | cfqq->allocated[rw]--; |
3735 | 3735 | ||
3736 | /* Put down rq reference on cfqg */ | 3736 | /* Put down rq reference on cfqg */ |
3737 | cfqg_put(RQ_CFQG(rq)); | 3737 | cfqg_put(RQ_CFQG(rq)); |
3738 | rq->elv.priv[0] = NULL; | 3738 | rq->elv.priv[0] = NULL; |
3739 | rq->elv.priv[1] = NULL; | 3739 | rq->elv.priv[1] = NULL; |
3740 | 3740 | ||
3741 | cfq_put_queue(cfqq); | 3741 | cfq_put_queue(cfqq); |
3742 | } | 3742 | } |
3743 | } | 3743 | } |
3744 | 3744 | ||
3745 | static struct cfq_queue * | 3745 | static struct cfq_queue * |
3746 | cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic, | 3746 | cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic, |
3747 | struct cfq_queue *cfqq) | 3747 | struct cfq_queue *cfqq) |
3748 | { | 3748 | { |
3749 | cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq); | 3749 | cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq); |
3750 | cic_set_cfqq(cic, cfqq->new_cfqq, 1); | 3750 | cic_set_cfqq(cic, cfqq->new_cfqq, 1); |
3751 | cfq_mark_cfqq_coop(cfqq->new_cfqq); | 3751 | cfq_mark_cfqq_coop(cfqq->new_cfqq); |
3752 | cfq_put_queue(cfqq); | 3752 | cfq_put_queue(cfqq); |
3753 | return cic_to_cfqq(cic, 1); | 3753 | return cic_to_cfqq(cic, 1); |
3754 | } | 3754 | } |
3755 | 3755 | ||
3756 | /* | 3756 | /* |
3757 | * Returns NULL if a new cfqq should be allocated, or the old cfqq if this | 3757 | * Returns NULL if a new cfqq should be allocated, or the old cfqq if this |
3758 | * was the last process referring to said cfqq. | 3758 | * was the last process referring to said cfqq. |
3759 | */ | 3759 | */ |
3760 | static struct cfq_queue * | 3760 | static struct cfq_queue * |
3761 | split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq) | 3761 | split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq) |
3762 | { | 3762 | { |
3763 | if (cfqq_process_refs(cfqq) == 1) { | 3763 | if (cfqq_process_refs(cfqq) == 1) { |
3764 | cfqq->pid = current->pid; | 3764 | cfqq->pid = current->pid; |
3765 | cfq_clear_cfqq_coop(cfqq); | 3765 | cfq_clear_cfqq_coop(cfqq); |
3766 | cfq_clear_cfqq_split_coop(cfqq); | 3766 | cfq_clear_cfqq_split_coop(cfqq); |
3767 | return cfqq; | 3767 | return cfqq; |
3768 | } | 3768 | } |
3769 | 3769 | ||
3770 | cic_set_cfqq(cic, NULL, 1); | 3770 | cic_set_cfqq(cic, NULL, 1); |
3771 | 3771 | ||
3772 | cfq_put_cooperator(cfqq); | 3772 | cfq_put_cooperator(cfqq); |
3773 | 3773 | ||
3774 | cfq_put_queue(cfqq); | 3774 | cfq_put_queue(cfqq); |
3775 | return NULL; | 3775 | return NULL; |
3776 | } | 3776 | } |
3777 | /* | 3777 | /* |
3778 | * Allocate cfq data structures associated with this request. | 3778 | * Allocate cfq data structures associated with this request. |
3779 | */ | 3779 | */ |
3780 | static int | 3780 | static int |
3781 | cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio, | 3781 | cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio, |
3782 | gfp_t gfp_mask) | 3782 | gfp_t gfp_mask) |
3783 | { | 3783 | { |
3784 | struct cfq_data *cfqd = q->elevator->elevator_data; | 3784 | struct cfq_data *cfqd = q->elevator->elevator_data; |
3785 | struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq); | 3785 | struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq); |
3786 | const int rw = rq_data_dir(rq); | 3786 | const int rw = rq_data_dir(rq); |
3787 | const bool is_sync = rq_is_sync(rq); | 3787 | const bool is_sync = rq_is_sync(rq); |
3788 | struct cfq_queue *cfqq; | 3788 | struct cfq_queue *cfqq; |
3789 | 3789 | ||
3790 | might_sleep_if(gfp_mask & __GFP_WAIT); | 3790 | might_sleep_if(gfp_mask & __GFP_WAIT); |
3791 | 3791 | ||
3792 | spin_lock_irq(q->queue_lock); | 3792 | spin_lock_irq(q->queue_lock); |
3793 | 3793 | ||
3794 | check_ioprio_changed(cic, bio); | 3794 | check_ioprio_changed(cic, bio); |
3795 | check_blkcg_changed(cic, bio); | 3795 | check_blkcg_changed(cic, bio); |
3796 | new_queue: | 3796 | new_queue: |
3797 | cfqq = cic_to_cfqq(cic, is_sync); | 3797 | cfqq = cic_to_cfqq(cic, is_sync); |
3798 | if (!cfqq || cfqq == &cfqd->oom_cfqq) { | 3798 | if (!cfqq || cfqq == &cfqd->oom_cfqq) { |
3799 | cfqq = cfq_get_queue(cfqd, is_sync, cic, bio, gfp_mask); | 3799 | cfqq = cfq_get_queue(cfqd, is_sync, cic, bio, gfp_mask); |
3800 | cic_set_cfqq(cic, cfqq, is_sync); | 3800 | cic_set_cfqq(cic, cfqq, is_sync); |
3801 | } else { | 3801 | } else { |
3802 | /* | 3802 | /* |
3803 | * If the queue was seeky for too long, break it apart. | 3803 | * If the queue was seeky for too long, break it apart. |
3804 | */ | 3804 | */ |
3805 | if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) { | 3805 | if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) { |
3806 | cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq"); | 3806 | cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq"); |
3807 | cfqq = split_cfqq(cic, cfqq); | 3807 | cfqq = split_cfqq(cic, cfqq); |
3808 | if (!cfqq) | 3808 | if (!cfqq) |
3809 | goto new_queue; | 3809 | goto new_queue; |
3810 | } | 3810 | } |
3811 | 3811 | ||
3812 | /* | 3812 | /* |
3813 | * Check to see if this queue is scheduled to merge with | 3813 | * Check to see if this queue is scheduled to merge with |
3814 | * another, closely cooperating queue. The merging of | 3814 | * another, closely cooperating queue. The merging of |
3815 | * queues happens here as it must be done in process context. | 3815 | * queues happens here as it must be done in process context. |
3816 | * The reference on new_cfqq was taken in merge_cfqqs. | 3816 | * The reference on new_cfqq was taken in merge_cfqqs. |
3817 | */ | 3817 | */ |
3818 | if (cfqq->new_cfqq) | 3818 | if (cfqq->new_cfqq) |
3819 | cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq); | 3819 | cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq); |
3820 | } | 3820 | } |
3821 | 3821 | ||
3822 | cfqq->allocated[rw]++; | 3822 | cfqq->allocated[rw]++; |
3823 | 3823 | ||
3824 | cfqq->ref++; | 3824 | cfqq->ref++; |
3825 | cfqg_get(cfqq->cfqg); | 3825 | cfqg_get(cfqq->cfqg); |
3826 | rq->elv.priv[0] = cfqq; | 3826 | rq->elv.priv[0] = cfqq; |
3827 | rq->elv.priv[1] = cfqq->cfqg; | 3827 | rq->elv.priv[1] = cfqq->cfqg; |
3828 | spin_unlock_irq(q->queue_lock); | 3828 | spin_unlock_irq(q->queue_lock); |
3829 | return 0; | 3829 | return 0; |
3830 | } | 3830 | } |
3831 | 3831 | ||
3832 | static void cfq_kick_queue(struct work_struct *work) | 3832 | static void cfq_kick_queue(struct work_struct *work) |
3833 | { | 3833 | { |
3834 | struct cfq_data *cfqd = | 3834 | struct cfq_data *cfqd = |
3835 | container_of(work, struct cfq_data, unplug_work); | 3835 | container_of(work, struct cfq_data, unplug_work); |
3836 | struct request_queue *q = cfqd->queue; | 3836 | struct request_queue *q = cfqd->queue; |
3837 | 3837 | ||
3838 | spin_lock_irq(q->queue_lock); | 3838 | spin_lock_irq(q->queue_lock); |
3839 | __blk_run_queue(cfqd->queue); | 3839 | __blk_run_queue(cfqd->queue); |
3840 | spin_unlock_irq(q->queue_lock); | 3840 | spin_unlock_irq(q->queue_lock); |
3841 | } | 3841 | } |
3842 | 3842 | ||
3843 | /* | 3843 | /* |
3844 | * Timer running if the active_queue is currently idling inside its time slice | 3844 | * Timer running if the active_queue is currently idling inside its time slice |
3845 | */ | 3845 | */ |
3846 | static void cfq_idle_slice_timer(unsigned long data) | 3846 | static void cfq_idle_slice_timer(unsigned long data) |
3847 | { | 3847 | { |
3848 | struct cfq_data *cfqd = (struct cfq_data *) data; | 3848 | struct cfq_data *cfqd = (struct cfq_data *) data; |
3849 | struct cfq_queue *cfqq; | 3849 | struct cfq_queue *cfqq; |
3850 | unsigned long flags; | 3850 | unsigned long flags; |
3851 | int timed_out = 1; | 3851 | int timed_out = 1; |
3852 | 3852 | ||
3853 | cfq_log(cfqd, "idle timer fired"); | 3853 | cfq_log(cfqd, "idle timer fired"); |
3854 | 3854 | ||
3855 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | 3855 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); |
3856 | 3856 | ||
3857 | cfqq = cfqd->active_queue; | 3857 | cfqq = cfqd->active_queue; |
3858 | if (cfqq) { | 3858 | if (cfqq) { |
3859 | timed_out = 0; | 3859 | timed_out = 0; |
3860 | 3860 | ||
3861 | /* | 3861 | /* |
3862 | * We saw a request before the queue expired, let it through | 3862 | * We saw a request before the queue expired, let it through |
3863 | */ | 3863 | */ |
3864 | if (cfq_cfqq_must_dispatch(cfqq)) | 3864 | if (cfq_cfqq_must_dispatch(cfqq)) |
3865 | goto out_kick; | 3865 | goto out_kick; |
3866 | 3866 | ||
3867 | /* | 3867 | /* |
3868 | * expired | 3868 | * expired |
3869 | */ | 3869 | */ |
3870 | if (cfq_slice_used(cfqq)) | 3870 | if (cfq_slice_used(cfqq)) |
3871 | goto expire; | 3871 | goto expire; |
3872 | 3872 | ||
3873 | /* | 3873 | /* |
3874 | * only expire and reinvoke request handler, if there are | 3874 | * only expire and reinvoke request handler, if there are |
3875 | * other queues with pending requests | 3875 | * other queues with pending requests |
3876 | */ | 3876 | */ |
3877 | if (!cfqd->busy_queues) | 3877 | if (!cfqd->busy_queues) |
3878 | goto out_cont; | 3878 | goto out_cont; |
3879 | 3879 | ||
3880 | /* | 3880 | /* |
3881 | * not expired and it has a request pending, let it dispatch | 3881 | * not expired and it has a request pending, let it dispatch |
3882 | */ | 3882 | */ |
3883 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) | 3883 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) |
3884 | goto out_kick; | 3884 | goto out_kick; |
3885 | 3885 | ||
3886 | /* | 3886 | /* |
3887 | * Queue depth flag is reset only when the idle didn't succeed | 3887 | * Queue depth flag is reset only when the idle didn't succeed |
3888 | */ | 3888 | */ |
3889 | cfq_clear_cfqq_deep(cfqq); | 3889 | cfq_clear_cfqq_deep(cfqq); |
3890 | } | 3890 | } |
3891 | expire: | 3891 | expire: |
3892 | cfq_slice_expired(cfqd, timed_out); | 3892 | cfq_slice_expired(cfqd, timed_out); |
3893 | out_kick: | 3893 | out_kick: |
3894 | cfq_schedule_dispatch(cfqd); | 3894 | cfq_schedule_dispatch(cfqd); |
3895 | out_cont: | 3895 | out_cont: |
3896 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | 3896 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); |
3897 | } | 3897 | } |
3898 | 3898 | ||
3899 | static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) | 3899 | static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) |
3900 | { | 3900 | { |
3901 | del_timer_sync(&cfqd->idle_slice_timer); | 3901 | del_timer_sync(&cfqd->idle_slice_timer); |
3902 | cancel_work_sync(&cfqd->unplug_work); | 3902 | cancel_work_sync(&cfqd->unplug_work); |
3903 | } | 3903 | } |
3904 | 3904 | ||
3905 | static void cfq_put_async_queues(struct cfq_data *cfqd) | 3905 | static void cfq_put_async_queues(struct cfq_data *cfqd) |
3906 | { | 3906 | { |
3907 | int i; | 3907 | int i; |
3908 | 3908 | ||
3909 | for (i = 0; i < IOPRIO_BE_NR; i++) { | 3909 | for (i = 0; i < IOPRIO_BE_NR; i++) { |
3910 | if (cfqd->async_cfqq[0][i]) | 3910 | if (cfqd->async_cfqq[0][i]) |
3911 | cfq_put_queue(cfqd->async_cfqq[0][i]); | 3911 | cfq_put_queue(cfqd->async_cfqq[0][i]); |
3912 | if (cfqd->async_cfqq[1][i]) | 3912 | if (cfqd->async_cfqq[1][i]) |
3913 | cfq_put_queue(cfqd->async_cfqq[1][i]); | 3913 | cfq_put_queue(cfqd->async_cfqq[1][i]); |
3914 | } | 3914 | } |
3915 | 3915 | ||
3916 | if (cfqd->async_idle_cfqq) | 3916 | if (cfqd->async_idle_cfqq) |
3917 | cfq_put_queue(cfqd->async_idle_cfqq); | 3917 | cfq_put_queue(cfqd->async_idle_cfqq); |
3918 | } | 3918 | } |
3919 | 3919 | ||
3920 | static void cfq_exit_queue(struct elevator_queue *e) | 3920 | static void cfq_exit_queue(struct elevator_queue *e) |
3921 | { | 3921 | { |
3922 | struct cfq_data *cfqd = e->elevator_data; | 3922 | struct cfq_data *cfqd = e->elevator_data; |
3923 | struct request_queue *q = cfqd->queue; | 3923 | struct request_queue *q = cfqd->queue; |
3924 | 3924 | ||
3925 | cfq_shutdown_timer_wq(cfqd); | 3925 | cfq_shutdown_timer_wq(cfqd); |
3926 | 3926 | ||
3927 | spin_lock_irq(q->queue_lock); | 3927 | spin_lock_irq(q->queue_lock); |
3928 | 3928 | ||
3929 | if (cfqd->active_queue) | 3929 | if (cfqd->active_queue) |
3930 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); | 3930 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); |
3931 | 3931 | ||
3932 | cfq_put_async_queues(cfqd); | 3932 | cfq_put_async_queues(cfqd); |
3933 | 3933 | ||
3934 | spin_unlock_irq(q->queue_lock); | 3934 | spin_unlock_irq(q->queue_lock); |
3935 | 3935 | ||
3936 | cfq_shutdown_timer_wq(cfqd); | 3936 | cfq_shutdown_timer_wq(cfqd); |
3937 | 3937 | ||
3938 | #ifndef CONFIG_CFQ_GROUP_IOSCHED | 3938 | #ifndef CONFIG_CFQ_GROUP_IOSCHED |
3939 | kfree(cfqd->root_group); | 3939 | kfree(cfqd->root_group); |
3940 | #endif | 3940 | #endif |
3941 | update_root_blkg_pd(q, &blkio_policy_cfq); | 3941 | update_root_blkg_pd(q, &blkio_policy_cfq); |
3942 | kfree(cfqd); | 3942 | kfree(cfqd); |
3943 | } | 3943 | } |
3944 | 3944 | ||
3945 | static int cfq_init_queue(struct request_queue *q) | 3945 | static int cfq_init_queue(struct request_queue *q) |
3946 | { | 3946 | { |
3947 | struct cfq_data *cfqd; | 3947 | struct cfq_data *cfqd; |
3948 | struct blkio_group *blkg __maybe_unused; | 3948 | struct blkio_group *blkg __maybe_unused; |
3949 | int i; | 3949 | int i; |
3950 | 3950 | ||
3951 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); | 3951 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
3952 | if (!cfqd) | 3952 | if (!cfqd) |
3953 | return -ENOMEM; | 3953 | return -ENOMEM; |
3954 | 3954 | ||
3955 | cfqd->queue = q; | 3955 | cfqd->queue = q; |
3956 | q->elevator->elevator_data = cfqd; | 3956 | q->elevator->elevator_data = cfqd; |
3957 | 3957 | ||
3958 | /* Init root service tree */ | 3958 | /* Init root service tree */ |
3959 | cfqd->grp_service_tree = CFQ_RB_ROOT; | 3959 | cfqd->grp_service_tree = CFQ_RB_ROOT; |
3960 | 3960 | ||
3961 | /* Init root group and prefer root group over other groups by default */ | 3961 | /* Init root group and prefer root group over other groups by default */ |
3962 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 3962 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3963 | rcu_read_lock(); | 3963 | rcu_read_lock(); |
3964 | spin_lock_irq(q->queue_lock); | 3964 | spin_lock_irq(q->queue_lock); |
3965 | 3965 | ||
3966 | blkg = blkg_lookup_create(&blkio_root_cgroup, q, true); | 3966 | blkg = blkg_lookup_create(&blkio_root_cgroup, q, true); |
3967 | if (!IS_ERR(blkg)) | 3967 | if (!IS_ERR(blkg)) { |
3968 | q->root_blkg = blkg; | ||
3968 | cfqd->root_group = blkg_to_cfqg(blkg); | 3969 | cfqd->root_group = blkg_to_cfqg(blkg); |
3970 | } | ||
3969 | 3971 | ||
3970 | spin_unlock_irq(q->queue_lock); | 3972 | spin_unlock_irq(q->queue_lock); |
3971 | rcu_read_unlock(); | 3973 | rcu_read_unlock(); |
3972 | #else | 3974 | #else |
3973 | cfqd->root_group = kzalloc_node(sizeof(*cfqd->root_group), | 3975 | cfqd->root_group = kzalloc_node(sizeof(*cfqd->root_group), |
3974 | GFP_KERNEL, cfqd->queue->node); | 3976 | GFP_KERNEL, cfqd->queue->node); |
3975 | if (cfqd->root_group) | 3977 | if (cfqd->root_group) |
3976 | cfq_init_cfqg_base(cfqd->root_group); | 3978 | cfq_init_cfqg_base(cfqd->root_group); |
3977 | #endif | 3979 | #endif |
3978 | if (!cfqd->root_group) { | 3980 | if (!cfqd->root_group) { |
3979 | kfree(cfqd); | 3981 | kfree(cfqd); |
3980 | return -ENOMEM; | 3982 | return -ENOMEM; |
3981 | } | 3983 | } |
3982 | 3984 | ||
3983 | cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT; | 3985 | cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT; |
3984 | 3986 | ||
3985 | /* | 3987 | /* |
3986 | * Not strictly needed (since RB_ROOT just clears the node and we | 3988 | * Not strictly needed (since RB_ROOT just clears the node and we |
3987 | * zeroed cfqd on alloc), but better be safe in case someone decides | 3989 | * zeroed cfqd on alloc), but better be safe in case someone decides |
3988 | * to add magic to the rb code | 3990 | * to add magic to the rb code |
3989 | */ | 3991 | */ |
3990 | for (i = 0; i < CFQ_PRIO_LISTS; i++) | 3992 | for (i = 0; i < CFQ_PRIO_LISTS; i++) |
3991 | cfqd->prio_trees[i] = RB_ROOT; | 3993 | cfqd->prio_trees[i] = RB_ROOT; |
3992 | 3994 | ||
3993 | /* | 3995 | /* |
3994 | * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues. | 3996 | * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues. |
3995 | * Grab a permanent reference to it, so that the normal code flow | 3997 | * Grab a permanent reference to it, so that the normal code flow |
3996 | * will not attempt to free it. oom_cfqq is linked to root_group | 3998 | * will not attempt to free it. oom_cfqq is linked to root_group |
3997 | * but shouldn't hold a reference as it'll never be unlinked. Lose | 3999 | * but shouldn't hold a reference as it'll never be unlinked. Lose |
3998 | * the reference from linking right away. | 4000 | * the reference from linking right away. |
3999 | */ | 4001 | */ |
4000 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); | 4002 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); |
4001 | cfqd->oom_cfqq.ref++; | 4003 | cfqd->oom_cfqq.ref++; |
4002 | 4004 | ||
4003 | spin_lock_irq(q->queue_lock); | 4005 | spin_lock_irq(q->queue_lock); |
4004 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group); | 4006 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group); |
4005 | cfqg_put(cfqd->root_group); | 4007 | cfqg_put(cfqd->root_group); |
4006 | spin_unlock_irq(q->queue_lock); | 4008 | spin_unlock_irq(q->queue_lock); |
4007 | 4009 | ||
4008 | init_timer(&cfqd->idle_slice_timer); | 4010 | init_timer(&cfqd->idle_slice_timer); |
4009 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; | 4011 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; |
4010 | cfqd->idle_slice_timer.data = (unsigned long) cfqd; | 4012 | cfqd->idle_slice_timer.data = (unsigned long) cfqd; |
4011 | 4013 | ||
4012 | INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); | 4014 | INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); |
4013 | 4015 | ||
4014 | cfqd->cfq_quantum = cfq_quantum; | 4016 | cfqd->cfq_quantum = cfq_quantum; |
4015 | cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; | 4017 | cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; |
4016 | cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; | 4018 | cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; |
4017 | cfqd->cfq_back_max = cfq_back_max; | 4019 | cfqd->cfq_back_max = cfq_back_max; |
4018 | cfqd->cfq_back_penalty = cfq_back_penalty; | 4020 | cfqd->cfq_back_penalty = cfq_back_penalty; |
4019 | cfqd->cfq_slice[0] = cfq_slice_async; | 4021 | cfqd->cfq_slice[0] = cfq_slice_async; |
4020 | cfqd->cfq_slice[1] = cfq_slice_sync; | 4022 | cfqd->cfq_slice[1] = cfq_slice_sync; |
4021 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; | 4023 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; |
4022 | cfqd->cfq_slice_idle = cfq_slice_idle; | 4024 | cfqd->cfq_slice_idle = cfq_slice_idle; |
4023 | cfqd->cfq_group_idle = cfq_group_idle; | 4025 | cfqd->cfq_group_idle = cfq_group_idle; |
4024 | cfqd->cfq_latency = 1; | 4026 | cfqd->cfq_latency = 1; |
4025 | cfqd->hw_tag = -1; | 4027 | cfqd->hw_tag = -1; |
4026 | /* | 4028 | /* |
4027 | * we optimistically start assuming sync ops weren't delayed in last | 4029 | * we optimistically start assuming sync ops weren't delayed in last |
4028 | * second, in order to have larger depth for async operations. | 4030 | * second, in order to have larger depth for async operations. |
4029 | */ | 4031 | */ |
4030 | cfqd->last_delayed_sync = jiffies - HZ; | 4032 | cfqd->last_delayed_sync = jiffies - HZ; |
4031 | return 0; | 4033 | return 0; |
4032 | } | 4034 | } |
4033 | 4035 | ||
4034 | /* | 4036 | /* |
4035 | * sysfs parts below --> | 4037 | * sysfs parts below --> |
4036 | */ | 4038 | */ |
4037 | static ssize_t | 4039 | static ssize_t |
4038 | cfq_var_show(unsigned int var, char *page) | 4040 | cfq_var_show(unsigned int var, char *page) |
4039 | { | 4041 | { |
4040 | return sprintf(page, "%d\n", var); | 4042 | return sprintf(page, "%d\n", var); |
4041 | } | 4043 | } |
4042 | 4044 | ||
4043 | static ssize_t | 4045 | static ssize_t |
4044 | cfq_var_store(unsigned int *var, const char *page, size_t count) | 4046 | cfq_var_store(unsigned int *var, const char *page, size_t count) |
4045 | { | 4047 | { |
4046 | char *p = (char *) page; | 4048 | char *p = (char *) page; |
4047 | 4049 | ||
4048 | *var = simple_strtoul(p, &p, 10); | 4050 | *var = simple_strtoul(p, &p, 10); |
4049 | return count; | 4051 | return count; |
4050 | } | 4052 | } |
4051 | 4053 | ||
4052 | #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ | 4054 | #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ |
4053 | static ssize_t __FUNC(struct elevator_queue *e, char *page) \ | 4055 | static ssize_t __FUNC(struct elevator_queue *e, char *page) \ |
4054 | { \ | 4056 | { \ |
4055 | struct cfq_data *cfqd = e->elevator_data; \ | 4057 | struct cfq_data *cfqd = e->elevator_data; \ |
4056 | unsigned int __data = __VAR; \ | 4058 | unsigned int __data = __VAR; \ |
4057 | if (__CONV) \ | 4059 | if (__CONV) \ |
4058 | __data = jiffies_to_msecs(__data); \ | 4060 | __data = jiffies_to_msecs(__data); \ |
4059 | return cfq_var_show(__data, (page)); \ | 4061 | return cfq_var_show(__data, (page)); \ |
4060 | } | 4062 | } |
4061 | SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); | 4063 | SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); |
4062 | SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); | 4064 | SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); |
4063 | SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); | 4065 | SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); |
4064 | SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); | 4066 | SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); |
4065 | SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); | 4067 | SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); |
4066 | SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); | 4068 | SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); |
4067 | SHOW_FUNCTION(cfq_group_idle_show, cfqd->cfq_group_idle, 1); | 4069 | SHOW_FUNCTION(cfq_group_idle_show, cfqd->cfq_group_idle, 1); |
4068 | SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); | 4070 | SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); |
4069 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); | 4071 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); |
4070 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); | 4072 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); |
4071 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); | 4073 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); |
4072 | #undef SHOW_FUNCTION | 4074 | #undef SHOW_FUNCTION |
4073 | 4075 | ||
4074 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ | 4076 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ |
4075 | static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ | 4077 | static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ |
4076 | { \ | 4078 | { \ |
4077 | struct cfq_data *cfqd = e->elevator_data; \ | 4079 | struct cfq_data *cfqd = e->elevator_data; \ |
4078 | unsigned int __data; \ | 4080 | unsigned int __data; \ |
4079 | int ret = cfq_var_store(&__data, (page), count); \ | 4081 | int ret = cfq_var_store(&__data, (page), count); \ |
4080 | if (__data < (MIN)) \ | 4082 | if (__data < (MIN)) \ |
4081 | __data = (MIN); \ | 4083 | __data = (MIN); \ |
4082 | else if (__data > (MAX)) \ | 4084 | else if (__data > (MAX)) \ |
4083 | __data = (MAX); \ | 4085 | __data = (MAX); \ |
4084 | if (__CONV) \ | 4086 | if (__CONV) \ |
4085 | *(__PTR) = msecs_to_jiffies(__data); \ | 4087 | *(__PTR) = msecs_to_jiffies(__data); \ |
4086 | else \ | 4088 | else \ |
4087 | *(__PTR) = __data; \ | 4089 | *(__PTR) = __data; \ |
4088 | return ret; \ | 4090 | return ret; \ |
4089 | } | 4091 | } |
4090 | STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); | 4092 | STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); |
4091 | STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, | 4093 | STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, |
4092 | UINT_MAX, 1); | 4094 | UINT_MAX, 1); |
4093 | STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, | 4095 | STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, |
4094 | UINT_MAX, 1); | 4096 | UINT_MAX, 1); |
4095 | STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); | 4097 | STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); |
4096 | STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, | 4098 | STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, |
4097 | UINT_MAX, 0); | 4099 | UINT_MAX, 0); |
4098 | STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); | 4100 | STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); |
4099 | STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1); | 4101 | STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1); |
4100 | STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); | 4102 | STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); |
4101 | STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); | 4103 | STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); |
4102 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, | 4104 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, |
4103 | UINT_MAX, 0); | 4105 | UINT_MAX, 0); |
4104 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); | 4106 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); |
4105 | #undef STORE_FUNCTION | 4107 | #undef STORE_FUNCTION |
4106 | 4108 | ||
4107 | #define CFQ_ATTR(name) \ | 4109 | #define CFQ_ATTR(name) \ |
4108 | __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) | 4110 | __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) |
4109 | 4111 | ||
4110 | static struct elv_fs_entry cfq_attrs[] = { | 4112 | static struct elv_fs_entry cfq_attrs[] = { |
4111 | CFQ_ATTR(quantum), | 4113 | CFQ_ATTR(quantum), |
4112 | CFQ_ATTR(fifo_expire_sync), | 4114 | CFQ_ATTR(fifo_expire_sync), |
4113 | CFQ_ATTR(fifo_expire_async), | 4115 | CFQ_ATTR(fifo_expire_async), |
4114 | CFQ_ATTR(back_seek_max), | 4116 | CFQ_ATTR(back_seek_max), |
4115 | CFQ_ATTR(back_seek_penalty), | 4117 | CFQ_ATTR(back_seek_penalty), |
4116 | CFQ_ATTR(slice_sync), | 4118 | CFQ_ATTR(slice_sync), |
4117 | CFQ_ATTR(slice_async), | 4119 | CFQ_ATTR(slice_async), |
4118 | CFQ_ATTR(slice_async_rq), | 4120 | CFQ_ATTR(slice_async_rq), |
4119 | CFQ_ATTR(slice_idle), | 4121 | CFQ_ATTR(slice_idle), |
4120 | CFQ_ATTR(group_idle), | 4122 | CFQ_ATTR(group_idle), |
4121 | CFQ_ATTR(low_latency), | 4123 | CFQ_ATTR(low_latency), |
4122 | __ATTR_NULL | 4124 | __ATTR_NULL |
4123 | }; | 4125 | }; |
4124 | 4126 | ||
4125 | static struct elevator_type iosched_cfq = { | 4127 | static struct elevator_type iosched_cfq = { |
4126 | .ops = { | 4128 | .ops = { |
4127 | .elevator_merge_fn = cfq_merge, | 4129 | .elevator_merge_fn = cfq_merge, |
4128 | .elevator_merged_fn = cfq_merged_request, | 4130 | .elevator_merged_fn = cfq_merged_request, |
4129 | .elevator_merge_req_fn = cfq_merged_requests, | 4131 | .elevator_merge_req_fn = cfq_merged_requests, |
4130 | .elevator_allow_merge_fn = cfq_allow_merge, | 4132 | .elevator_allow_merge_fn = cfq_allow_merge, |
4131 | .elevator_bio_merged_fn = cfq_bio_merged, | 4133 | .elevator_bio_merged_fn = cfq_bio_merged, |
4132 | .elevator_dispatch_fn = cfq_dispatch_requests, | 4134 | .elevator_dispatch_fn = cfq_dispatch_requests, |
4133 | .elevator_add_req_fn = cfq_insert_request, | 4135 | .elevator_add_req_fn = cfq_insert_request, |
4134 | .elevator_activate_req_fn = cfq_activate_request, | 4136 | .elevator_activate_req_fn = cfq_activate_request, |
4135 | .elevator_deactivate_req_fn = cfq_deactivate_request, | 4137 | .elevator_deactivate_req_fn = cfq_deactivate_request, |
4136 | .elevator_completed_req_fn = cfq_completed_request, | 4138 | .elevator_completed_req_fn = cfq_completed_request, |
4137 | .elevator_former_req_fn = elv_rb_former_request, | 4139 | .elevator_former_req_fn = elv_rb_former_request, |
4138 | .elevator_latter_req_fn = elv_rb_latter_request, | 4140 | .elevator_latter_req_fn = elv_rb_latter_request, |
4139 | .elevator_init_icq_fn = cfq_init_icq, | 4141 | .elevator_init_icq_fn = cfq_init_icq, |
4140 | .elevator_exit_icq_fn = cfq_exit_icq, | 4142 | .elevator_exit_icq_fn = cfq_exit_icq, |
4141 | .elevator_set_req_fn = cfq_set_request, | 4143 | .elevator_set_req_fn = cfq_set_request, |
4142 | .elevator_put_req_fn = cfq_put_request, | 4144 | .elevator_put_req_fn = cfq_put_request, |
4143 | .elevator_may_queue_fn = cfq_may_queue, | 4145 | .elevator_may_queue_fn = cfq_may_queue, |
4144 | .elevator_init_fn = cfq_init_queue, | 4146 | .elevator_init_fn = cfq_init_queue, |
4145 | .elevator_exit_fn = cfq_exit_queue, | 4147 | .elevator_exit_fn = cfq_exit_queue, |
4146 | }, | 4148 | }, |
4147 | .icq_size = sizeof(struct cfq_io_cq), | 4149 | .icq_size = sizeof(struct cfq_io_cq), |
4148 | .icq_align = __alignof__(struct cfq_io_cq), | 4150 | .icq_align = __alignof__(struct cfq_io_cq), |
4149 | .elevator_attrs = cfq_attrs, | 4151 | .elevator_attrs = cfq_attrs, |
4150 | .elevator_name = "cfq", | 4152 | .elevator_name = "cfq", |
4151 | .elevator_owner = THIS_MODULE, | 4153 | .elevator_owner = THIS_MODULE, |
4152 | }; | 4154 | }; |
4153 | 4155 | ||
4154 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 4156 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
4155 | static struct blkio_policy_type blkio_policy_cfq = { | 4157 | static struct blkio_policy_type blkio_policy_cfq = { |
4156 | .ops = { | 4158 | .ops = { |
4157 | .blkio_init_group_fn = cfq_init_blkio_group, | 4159 | .blkio_init_group_fn = cfq_init_blkio_group, |
4158 | .blkio_reset_group_stats_fn = cfqg_stats_reset, | 4160 | .blkio_reset_group_stats_fn = cfqg_stats_reset, |
4159 | }, | 4161 | }, |
4160 | .pdata_size = sizeof(struct cfq_group), | 4162 | .pdata_size = sizeof(struct cfq_group), |
4161 | .cftypes = cfq_blkcg_files, | 4163 | .cftypes = cfq_blkcg_files, |
4162 | }; | 4164 | }; |
4163 | #endif | 4165 | #endif |
4164 | 4166 | ||
4165 | static int __init cfq_init(void) | 4167 | static int __init cfq_init(void) |
4166 | { | 4168 | { |
4167 | int ret; | 4169 | int ret; |
4168 | 4170 | ||
4169 | /* | 4171 | /* |
4170 | * could be 0 on HZ < 1000 setups | 4172 | * could be 0 on HZ < 1000 setups |
4171 | */ | 4173 | */ |
4172 | if (!cfq_slice_async) | 4174 | if (!cfq_slice_async) |
4173 | cfq_slice_async = 1; | 4175 | cfq_slice_async = 1; |
4174 | if (!cfq_slice_idle) | 4176 | if (!cfq_slice_idle) |
4175 | cfq_slice_idle = 1; | 4177 | cfq_slice_idle = 1; |
4176 | 4178 | ||
4177 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 4179 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
4178 | if (!cfq_group_idle) | 4180 | if (!cfq_group_idle) |
4179 | cfq_group_idle = 1; | 4181 | cfq_group_idle = 1; |
4180 | #else | 4182 | #else |
4181 | cfq_group_idle = 0; | 4183 | cfq_group_idle = 0; |
4182 | #endif | 4184 | #endif |
4183 | 4185 | ||
4184 | ret = blkio_policy_register(&blkio_policy_cfq); | 4186 | ret = blkio_policy_register(&blkio_policy_cfq); |
4185 | if (ret) | 4187 | if (ret) |
4186 | return ret; | 4188 | return ret; |
4187 | 4189 | ||
4188 | cfq_pool = KMEM_CACHE(cfq_queue, 0); | 4190 | cfq_pool = KMEM_CACHE(cfq_queue, 0); |
4189 | if (!cfq_pool) | 4191 | if (!cfq_pool) |
4190 | goto err_pol_unreg; | 4192 | goto err_pol_unreg; |
4191 | 4193 | ||
4192 | ret = elv_register(&iosched_cfq); | 4194 | ret = elv_register(&iosched_cfq); |
4193 | if (ret) | 4195 | if (ret) |
4194 | goto err_free_pool; | 4196 | goto err_free_pool; |
4195 | 4197 | ||
4196 | return 0; | 4198 | return 0; |
4197 | 4199 | ||
4198 | err_free_pool: | 4200 | err_free_pool: |
4199 | kmem_cache_destroy(cfq_pool); | 4201 | kmem_cache_destroy(cfq_pool); |
4200 | err_pol_unreg: | 4202 | err_pol_unreg: |
4201 | blkio_policy_unregister(&blkio_policy_cfq); | 4203 | blkio_policy_unregister(&blkio_policy_cfq); |
4202 | return ret; | 4204 | return ret; |
4203 | } | 4205 | } |
4204 | 4206 | ||
4205 | static void __exit cfq_exit(void) | 4207 | static void __exit cfq_exit(void) |
4206 | { | 4208 | { |
4207 | blkio_policy_unregister(&blkio_policy_cfq); | 4209 | blkio_policy_unregister(&blkio_policy_cfq); |
4208 | elv_unregister(&iosched_cfq); | 4210 | elv_unregister(&iosched_cfq); |
4209 | kmem_cache_destroy(cfq_pool); | 4211 | kmem_cache_destroy(cfq_pool); |
4210 | } | 4212 | } |
4211 | 4213 | ||
4212 | module_init(cfq_init); | 4214 | module_init(cfq_init); |
4213 | module_exit(cfq_exit); | 4215 | module_exit(cfq_exit); |
4214 | 4216 | ||
4215 | MODULE_AUTHOR("Jens Axboe"); | 4217 | MODULE_AUTHOR("Jens Axboe"); |
4216 | MODULE_LICENSE("GPL"); | 4218 | MODULE_LICENSE("GPL"); |
4217 | MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); | 4219 | MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); |
4218 | 4220 |
include/linux/blkdev.h
1 | #ifndef _LINUX_BLKDEV_H | 1 | #ifndef _LINUX_BLKDEV_H |
2 | #define _LINUX_BLKDEV_H | 2 | #define _LINUX_BLKDEV_H |
3 | 3 | ||
4 | #ifdef CONFIG_BLOCK | 4 | #ifdef CONFIG_BLOCK |
5 | 5 | ||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/major.h> | 7 | #include <linux/major.h> |
8 | #include <linux/genhd.h> | 8 | #include <linux/genhd.h> |
9 | #include <linux/list.h> | 9 | #include <linux/list.h> |
10 | #include <linux/timer.h> | 10 | #include <linux/timer.h> |
11 | #include <linux/workqueue.h> | 11 | #include <linux/workqueue.h> |
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/backing-dev.h> | 13 | #include <linux/backing-dev.h> |
14 | #include <linux/wait.h> | 14 | #include <linux/wait.h> |
15 | #include <linux/mempool.h> | 15 | #include <linux/mempool.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/stringify.h> | 17 | #include <linux/stringify.h> |
18 | #include <linux/gfp.h> | 18 | #include <linux/gfp.h> |
19 | #include <linux/bsg.h> | 19 | #include <linux/bsg.h> |
20 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
21 | 21 | ||
22 | #include <asm/scatterlist.h> | 22 | #include <asm/scatterlist.h> |
23 | 23 | ||
24 | struct module; | 24 | struct module; |
25 | struct scsi_ioctl_command; | 25 | struct scsi_ioctl_command; |
26 | 26 | ||
27 | struct request_queue; | 27 | struct request_queue; |
28 | struct elevator_queue; | 28 | struct elevator_queue; |
29 | struct request_pm_state; | 29 | struct request_pm_state; |
30 | struct blk_trace; | 30 | struct blk_trace; |
31 | struct request; | 31 | struct request; |
32 | struct sg_io_hdr; | 32 | struct sg_io_hdr; |
33 | struct bsg_job; | 33 | struct bsg_job; |
34 | struct blkio_group; | ||
34 | 35 | ||
35 | #define BLKDEV_MIN_RQ 4 | 36 | #define BLKDEV_MIN_RQ 4 |
36 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 37 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
37 | 38 | ||
38 | /* | 39 | /* |
39 | * Maximum number of blkcg policies allowed to be registered concurrently. | 40 | * Maximum number of blkcg policies allowed to be registered concurrently. |
40 | * Defined here to simplify include dependency. | 41 | * Defined here to simplify include dependency. |
41 | */ | 42 | */ |
42 | #define BLKCG_MAX_POLS 2 | 43 | #define BLKCG_MAX_POLS 2 |
43 | 44 | ||
44 | struct request; | 45 | struct request; |
45 | typedef void (rq_end_io_fn)(struct request *, int); | 46 | typedef void (rq_end_io_fn)(struct request *, int); |
46 | 47 | ||
47 | struct request_list { | 48 | struct request_list { |
48 | /* | 49 | /* |
49 | * count[], starved[], and wait[] are indexed by | 50 | * count[], starved[], and wait[] are indexed by |
50 | * BLK_RW_SYNC/BLK_RW_ASYNC | 51 | * BLK_RW_SYNC/BLK_RW_ASYNC |
51 | */ | 52 | */ |
52 | int count[2]; | 53 | int count[2]; |
53 | int starved[2]; | 54 | int starved[2]; |
54 | int elvpriv; | 55 | int elvpriv; |
55 | mempool_t *rq_pool; | 56 | mempool_t *rq_pool; |
56 | wait_queue_head_t wait[2]; | 57 | wait_queue_head_t wait[2]; |
57 | }; | 58 | }; |
58 | 59 | ||
59 | /* | 60 | /* |
60 | * request command types | 61 | * request command types |
61 | */ | 62 | */ |
62 | enum rq_cmd_type_bits { | 63 | enum rq_cmd_type_bits { |
63 | REQ_TYPE_FS = 1, /* fs request */ | 64 | REQ_TYPE_FS = 1, /* fs request */ |
64 | REQ_TYPE_BLOCK_PC, /* scsi command */ | 65 | REQ_TYPE_BLOCK_PC, /* scsi command */ |
65 | REQ_TYPE_SENSE, /* sense request */ | 66 | REQ_TYPE_SENSE, /* sense request */ |
66 | REQ_TYPE_PM_SUSPEND, /* suspend request */ | 67 | REQ_TYPE_PM_SUSPEND, /* suspend request */ |
67 | REQ_TYPE_PM_RESUME, /* resume request */ | 68 | REQ_TYPE_PM_RESUME, /* resume request */ |
68 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ | 69 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ |
69 | REQ_TYPE_SPECIAL, /* driver defined type */ | 70 | REQ_TYPE_SPECIAL, /* driver defined type */ |
70 | /* | 71 | /* |
71 | * for ATA/ATAPI devices. this really doesn't belong here, ide should | 72 | * for ATA/ATAPI devices. this really doesn't belong here, ide should |
72 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver | 73 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver |
73 | * private REQ_LB opcodes to differentiate what type of request this is | 74 | * private REQ_LB opcodes to differentiate what type of request this is |
74 | */ | 75 | */ |
75 | REQ_TYPE_ATA_TASKFILE, | 76 | REQ_TYPE_ATA_TASKFILE, |
76 | REQ_TYPE_ATA_PC, | 77 | REQ_TYPE_ATA_PC, |
77 | }; | 78 | }; |
78 | 79 | ||
79 | #define BLK_MAX_CDB 16 | 80 | #define BLK_MAX_CDB 16 |
80 | 81 | ||
81 | /* | 82 | /* |
82 | * try to put the fields that are referenced together in the same cacheline. | 83 | * try to put the fields that are referenced together in the same cacheline. |
83 | * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init() | 84 | * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init() |
84 | * as well! | 85 | * as well! |
85 | */ | 86 | */ |
86 | struct request { | 87 | struct request { |
87 | struct list_head queuelist; | 88 | struct list_head queuelist; |
88 | struct call_single_data csd; | 89 | struct call_single_data csd; |
89 | 90 | ||
90 | struct request_queue *q; | 91 | struct request_queue *q; |
91 | 92 | ||
92 | unsigned int cmd_flags; | 93 | unsigned int cmd_flags; |
93 | enum rq_cmd_type_bits cmd_type; | 94 | enum rq_cmd_type_bits cmd_type; |
94 | unsigned long atomic_flags; | 95 | unsigned long atomic_flags; |
95 | 96 | ||
96 | int cpu; | 97 | int cpu; |
97 | 98 | ||
98 | /* the following two fields are internal, NEVER access directly */ | 99 | /* the following two fields are internal, NEVER access directly */ |
99 | unsigned int __data_len; /* total data len */ | 100 | unsigned int __data_len; /* total data len */ |
100 | sector_t __sector; /* sector cursor */ | 101 | sector_t __sector; /* sector cursor */ |
101 | 102 | ||
102 | struct bio *bio; | 103 | struct bio *bio; |
103 | struct bio *biotail; | 104 | struct bio *biotail; |
104 | 105 | ||
105 | struct hlist_node hash; /* merge hash */ | 106 | struct hlist_node hash; /* merge hash */ |
106 | /* | 107 | /* |
107 | * The rb_node is only used inside the io scheduler, requests | 108 | * The rb_node is only used inside the io scheduler, requests |
108 | * are pruned when moved to the dispatch queue. So let the | 109 | * are pruned when moved to the dispatch queue. So let the |
109 | * completion_data share space with the rb_node. | 110 | * completion_data share space with the rb_node. |
110 | */ | 111 | */ |
111 | union { | 112 | union { |
112 | struct rb_node rb_node; /* sort/lookup */ | 113 | struct rb_node rb_node; /* sort/lookup */ |
113 | void *completion_data; | 114 | void *completion_data; |
114 | }; | 115 | }; |
115 | 116 | ||
116 | /* | 117 | /* |
117 | * Three pointers are available for the IO schedulers, if they need | 118 | * Three pointers are available for the IO schedulers, if they need |
118 | * more they have to dynamically allocate it. Flush requests are | 119 | * more they have to dynamically allocate it. Flush requests are |
119 | * never put on the IO scheduler. So let the flush fields share | 120 | * never put on the IO scheduler. So let the flush fields share |
120 | * space with the elevator data. | 121 | * space with the elevator data. |
121 | */ | 122 | */ |
122 | union { | 123 | union { |
123 | struct { | 124 | struct { |
124 | struct io_cq *icq; | 125 | struct io_cq *icq; |
125 | void *priv[2]; | 126 | void *priv[2]; |
126 | } elv; | 127 | } elv; |
127 | 128 | ||
128 | struct { | 129 | struct { |
129 | unsigned int seq; | 130 | unsigned int seq; |
130 | struct list_head list; | 131 | struct list_head list; |
131 | rq_end_io_fn *saved_end_io; | 132 | rq_end_io_fn *saved_end_io; |
132 | } flush; | 133 | } flush; |
133 | }; | 134 | }; |
134 | 135 | ||
135 | struct gendisk *rq_disk; | 136 | struct gendisk *rq_disk; |
136 | struct hd_struct *part; | 137 | struct hd_struct *part; |
137 | unsigned long start_time; | 138 | unsigned long start_time; |
138 | #ifdef CONFIG_BLK_CGROUP | 139 | #ifdef CONFIG_BLK_CGROUP |
139 | unsigned long long start_time_ns; | 140 | unsigned long long start_time_ns; |
140 | unsigned long long io_start_time_ns; /* when passed to hardware */ | 141 | unsigned long long io_start_time_ns; /* when passed to hardware */ |
141 | #endif | 142 | #endif |
142 | /* Number of scatter-gather DMA addr+len pairs after | 143 | /* Number of scatter-gather DMA addr+len pairs after |
143 | * physical address coalescing is performed. | 144 | * physical address coalescing is performed. |
144 | */ | 145 | */ |
145 | unsigned short nr_phys_segments; | 146 | unsigned short nr_phys_segments; |
146 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | 147 | #if defined(CONFIG_BLK_DEV_INTEGRITY) |
147 | unsigned short nr_integrity_segments; | 148 | unsigned short nr_integrity_segments; |
148 | #endif | 149 | #endif |
149 | 150 | ||
150 | unsigned short ioprio; | 151 | unsigned short ioprio; |
151 | 152 | ||
152 | int ref_count; | 153 | int ref_count; |
153 | 154 | ||
154 | void *special; /* opaque pointer available for LLD use */ | 155 | void *special; /* opaque pointer available for LLD use */ |
155 | char *buffer; /* kaddr of the current segment if available */ | 156 | char *buffer; /* kaddr of the current segment if available */ |
156 | 157 | ||
157 | int tag; | 158 | int tag; |
158 | int errors; | 159 | int errors; |
159 | 160 | ||
160 | /* | 161 | /* |
161 | * when request is used as a packet command carrier | 162 | * when request is used as a packet command carrier |
162 | */ | 163 | */ |
163 | unsigned char __cmd[BLK_MAX_CDB]; | 164 | unsigned char __cmd[BLK_MAX_CDB]; |
164 | unsigned char *cmd; | 165 | unsigned char *cmd; |
165 | unsigned short cmd_len; | 166 | unsigned short cmd_len; |
166 | 167 | ||
167 | unsigned int extra_len; /* length of alignment and padding */ | 168 | unsigned int extra_len; /* length of alignment and padding */ |
168 | unsigned int sense_len; | 169 | unsigned int sense_len; |
169 | unsigned int resid_len; /* residual count */ | 170 | unsigned int resid_len; /* residual count */ |
170 | void *sense; | 171 | void *sense; |
171 | 172 | ||
172 | unsigned long deadline; | 173 | unsigned long deadline; |
173 | struct list_head timeout_list; | 174 | struct list_head timeout_list; |
174 | unsigned int timeout; | 175 | unsigned int timeout; |
175 | int retries; | 176 | int retries; |
176 | 177 | ||
177 | /* | 178 | /* |
178 | * completion callback. | 179 | * completion callback. |
179 | */ | 180 | */ |
180 | rq_end_io_fn *end_io; | 181 | rq_end_io_fn *end_io; |
181 | void *end_io_data; | 182 | void *end_io_data; |
182 | 183 | ||
183 | /* for bidi */ | 184 | /* for bidi */ |
184 | struct request *next_rq; | 185 | struct request *next_rq; |
185 | }; | 186 | }; |
186 | 187 | ||
187 | static inline unsigned short req_get_ioprio(struct request *req) | 188 | static inline unsigned short req_get_ioprio(struct request *req) |
188 | { | 189 | { |
189 | return req->ioprio; | 190 | return req->ioprio; |
190 | } | 191 | } |
191 | 192 | ||
192 | /* | 193 | /* |
193 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME | 194 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME |
194 | * requests. Some step values could eventually be made generic. | 195 | * requests. Some step values could eventually be made generic. |
195 | */ | 196 | */ |
196 | struct request_pm_state | 197 | struct request_pm_state |
197 | { | 198 | { |
198 | /* PM state machine step value, currently driver specific */ | 199 | /* PM state machine step value, currently driver specific */ |
199 | int pm_step; | 200 | int pm_step; |
200 | /* requested PM state value (S1, S2, S3, S4, ...) */ | 201 | /* requested PM state value (S1, S2, S3, S4, ...) */ |
201 | u32 pm_state; | 202 | u32 pm_state; |
202 | void* data; /* for driver use */ | 203 | void* data; /* for driver use */ |
203 | }; | 204 | }; |
204 | 205 | ||
205 | #include <linux/elevator.h> | 206 | #include <linux/elevator.h> |
206 | 207 | ||
207 | typedef void (request_fn_proc) (struct request_queue *q); | 208 | typedef void (request_fn_proc) (struct request_queue *q); |
208 | typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); | 209 | typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); |
209 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); | 210 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); |
210 | typedef void (unprep_rq_fn) (struct request_queue *, struct request *); | 211 | typedef void (unprep_rq_fn) (struct request_queue *, struct request *); |
211 | 212 | ||
212 | struct bio_vec; | 213 | struct bio_vec; |
213 | struct bvec_merge_data { | 214 | struct bvec_merge_data { |
214 | struct block_device *bi_bdev; | 215 | struct block_device *bi_bdev; |
215 | sector_t bi_sector; | 216 | sector_t bi_sector; |
216 | unsigned bi_size; | 217 | unsigned bi_size; |
217 | unsigned long bi_rw; | 218 | unsigned long bi_rw; |
218 | }; | 219 | }; |
219 | typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, | 220 | typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, |
220 | struct bio_vec *); | 221 | struct bio_vec *); |
221 | typedef void (softirq_done_fn)(struct request *); | 222 | typedef void (softirq_done_fn)(struct request *); |
222 | typedef int (dma_drain_needed_fn)(struct request *); | 223 | typedef int (dma_drain_needed_fn)(struct request *); |
223 | typedef int (lld_busy_fn) (struct request_queue *q); | 224 | typedef int (lld_busy_fn) (struct request_queue *q); |
224 | typedef int (bsg_job_fn) (struct bsg_job *); | 225 | typedef int (bsg_job_fn) (struct bsg_job *); |
225 | 226 | ||
226 | enum blk_eh_timer_return { | 227 | enum blk_eh_timer_return { |
227 | BLK_EH_NOT_HANDLED, | 228 | BLK_EH_NOT_HANDLED, |
228 | BLK_EH_HANDLED, | 229 | BLK_EH_HANDLED, |
229 | BLK_EH_RESET_TIMER, | 230 | BLK_EH_RESET_TIMER, |
230 | }; | 231 | }; |
231 | 232 | ||
232 | typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); | 233 | typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); |
233 | 234 | ||
234 | enum blk_queue_state { | 235 | enum blk_queue_state { |
235 | Queue_down, | 236 | Queue_down, |
236 | Queue_up, | 237 | Queue_up, |
237 | }; | 238 | }; |
238 | 239 | ||
239 | struct blk_queue_tag { | 240 | struct blk_queue_tag { |
240 | struct request **tag_index; /* map of busy tags */ | 241 | struct request **tag_index; /* map of busy tags */ |
241 | unsigned long *tag_map; /* bit map of free/busy tags */ | 242 | unsigned long *tag_map; /* bit map of free/busy tags */ |
242 | int busy; /* current depth */ | 243 | int busy; /* current depth */ |
243 | int max_depth; /* what we will send to device */ | 244 | int max_depth; /* what we will send to device */ |
244 | int real_max_depth; /* what the array can hold */ | 245 | int real_max_depth; /* what the array can hold */ |
245 | atomic_t refcnt; /* map can be shared */ | 246 | atomic_t refcnt; /* map can be shared */ |
246 | }; | 247 | }; |
247 | 248 | ||
248 | #define BLK_SCSI_MAX_CMDS (256) | 249 | #define BLK_SCSI_MAX_CMDS (256) |
249 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) | 250 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) |
250 | 251 | ||
251 | struct queue_limits { | 252 | struct queue_limits { |
252 | unsigned long bounce_pfn; | 253 | unsigned long bounce_pfn; |
253 | unsigned long seg_boundary_mask; | 254 | unsigned long seg_boundary_mask; |
254 | 255 | ||
255 | unsigned int max_hw_sectors; | 256 | unsigned int max_hw_sectors; |
256 | unsigned int max_sectors; | 257 | unsigned int max_sectors; |
257 | unsigned int max_segment_size; | 258 | unsigned int max_segment_size; |
258 | unsigned int physical_block_size; | 259 | unsigned int physical_block_size; |
259 | unsigned int alignment_offset; | 260 | unsigned int alignment_offset; |
260 | unsigned int io_min; | 261 | unsigned int io_min; |
261 | unsigned int io_opt; | 262 | unsigned int io_opt; |
262 | unsigned int max_discard_sectors; | 263 | unsigned int max_discard_sectors; |
263 | unsigned int discard_granularity; | 264 | unsigned int discard_granularity; |
264 | unsigned int discard_alignment; | 265 | unsigned int discard_alignment; |
265 | 266 | ||
266 | unsigned short logical_block_size; | 267 | unsigned short logical_block_size; |
267 | unsigned short max_segments; | 268 | unsigned short max_segments; |
268 | unsigned short max_integrity_segments; | 269 | unsigned short max_integrity_segments; |
269 | 270 | ||
270 | unsigned char misaligned; | 271 | unsigned char misaligned; |
271 | unsigned char discard_misaligned; | 272 | unsigned char discard_misaligned; |
272 | unsigned char cluster; | 273 | unsigned char cluster; |
273 | unsigned char discard_zeroes_data; | 274 | unsigned char discard_zeroes_data; |
274 | }; | 275 | }; |
275 | 276 | ||
276 | struct request_queue { | 277 | struct request_queue { |
277 | /* | 278 | /* |
278 | * Together with queue_head for cacheline sharing | 279 | * Together with queue_head for cacheline sharing |
279 | */ | 280 | */ |
280 | struct list_head queue_head; | 281 | struct list_head queue_head; |
281 | struct request *last_merge; | 282 | struct request *last_merge; |
282 | struct elevator_queue *elevator; | 283 | struct elevator_queue *elevator; |
283 | 284 | ||
284 | /* | 285 | /* |
285 | * the queue request freelist, one for reads and one for writes | 286 | * the queue request freelist, one for reads and one for writes |
286 | */ | 287 | */ |
287 | struct request_list rq; | 288 | struct request_list rq; |
288 | 289 | ||
289 | request_fn_proc *request_fn; | 290 | request_fn_proc *request_fn; |
290 | make_request_fn *make_request_fn; | 291 | make_request_fn *make_request_fn; |
291 | prep_rq_fn *prep_rq_fn; | 292 | prep_rq_fn *prep_rq_fn; |
292 | unprep_rq_fn *unprep_rq_fn; | 293 | unprep_rq_fn *unprep_rq_fn; |
293 | merge_bvec_fn *merge_bvec_fn; | 294 | merge_bvec_fn *merge_bvec_fn; |
294 | softirq_done_fn *softirq_done_fn; | 295 | softirq_done_fn *softirq_done_fn; |
295 | rq_timed_out_fn *rq_timed_out_fn; | 296 | rq_timed_out_fn *rq_timed_out_fn; |
296 | dma_drain_needed_fn *dma_drain_needed; | 297 | dma_drain_needed_fn *dma_drain_needed; |
297 | lld_busy_fn *lld_busy_fn; | 298 | lld_busy_fn *lld_busy_fn; |
298 | 299 | ||
299 | /* | 300 | /* |
300 | * Dispatch queue sorting | 301 | * Dispatch queue sorting |
301 | */ | 302 | */ |
302 | sector_t end_sector; | 303 | sector_t end_sector; |
303 | struct request *boundary_rq; | 304 | struct request *boundary_rq; |
304 | 305 | ||
305 | /* | 306 | /* |
306 | * Delayed queue handling | 307 | * Delayed queue handling |
307 | */ | 308 | */ |
308 | struct delayed_work delay_work; | 309 | struct delayed_work delay_work; |
309 | 310 | ||
310 | struct backing_dev_info backing_dev_info; | 311 | struct backing_dev_info backing_dev_info; |
311 | 312 | ||
312 | /* | 313 | /* |
313 | * The queue owner gets to use this for whatever they like. | 314 | * The queue owner gets to use this for whatever they like. |
314 | * ll_rw_blk doesn't touch it. | 315 | * ll_rw_blk doesn't touch it. |
315 | */ | 316 | */ |
316 | void *queuedata; | 317 | void *queuedata; |
317 | 318 | ||
318 | /* | 319 | /* |
319 | * various queue flags, see QUEUE_* below | 320 | * various queue flags, see QUEUE_* below |
320 | */ | 321 | */ |
321 | unsigned long queue_flags; | 322 | unsigned long queue_flags; |
322 | 323 | ||
323 | /* | 324 | /* |
324 | * ida allocated id for this queue. Used to index queues from | 325 | * ida allocated id for this queue. Used to index queues from |
325 | * ioctx. | 326 | * ioctx. |
326 | */ | 327 | */ |
327 | int id; | 328 | int id; |
328 | 329 | ||
329 | /* | 330 | /* |
330 | * queue needs bounce pages for pages above this limit | 331 | * queue needs bounce pages for pages above this limit |
331 | */ | 332 | */ |
332 | gfp_t bounce_gfp; | 333 | gfp_t bounce_gfp; |
333 | 334 | ||
334 | /* | 335 | /* |
335 | * protects queue structures from reentrancy. ->__queue_lock should | 336 | * protects queue structures from reentrancy. ->__queue_lock should |
336 | * _never_ be used directly, it is queue private. always use | 337 | * _never_ be used directly, it is queue private. always use |
337 | * ->queue_lock. | 338 | * ->queue_lock. |
338 | */ | 339 | */ |
339 | spinlock_t __queue_lock; | 340 | spinlock_t __queue_lock; |
340 | spinlock_t *queue_lock; | 341 | spinlock_t *queue_lock; |
341 | 342 | ||
342 | /* | 343 | /* |
343 | * queue kobject | 344 | * queue kobject |
344 | */ | 345 | */ |
345 | struct kobject kobj; | 346 | struct kobject kobj; |
346 | 347 | ||
347 | /* | 348 | /* |
348 | * queue settings | 349 | * queue settings |
349 | */ | 350 | */ |
350 | unsigned long nr_requests; /* Max # of requests */ | 351 | unsigned long nr_requests; /* Max # of requests */ |
351 | unsigned int nr_congestion_on; | 352 | unsigned int nr_congestion_on; |
352 | unsigned int nr_congestion_off; | 353 | unsigned int nr_congestion_off; |
353 | unsigned int nr_batching; | 354 | unsigned int nr_batching; |
354 | 355 | ||
355 | unsigned int dma_drain_size; | 356 | unsigned int dma_drain_size; |
356 | void *dma_drain_buffer; | 357 | void *dma_drain_buffer; |
357 | unsigned int dma_pad_mask; | 358 | unsigned int dma_pad_mask; |
358 | unsigned int dma_alignment; | 359 | unsigned int dma_alignment; |
359 | 360 | ||
360 | struct blk_queue_tag *queue_tags; | 361 | struct blk_queue_tag *queue_tags; |
361 | struct list_head tag_busy_list; | 362 | struct list_head tag_busy_list; |
362 | 363 | ||
363 | unsigned int nr_sorted; | 364 | unsigned int nr_sorted; |
364 | unsigned int in_flight[2]; | 365 | unsigned int in_flight[2]; |
365 | 366 | ||
366 | unsigned int rq_timeout; | 367 | unsigned int rq_timeout; |
367 | struct timer_list timeout; | 368 | struct timer_list timeout; |
368 | struct list_head timeout_list; | 369 | struct list_head timeout_list; |
369 | 370 | ||
370 | struct list_head icq_list; | 371 | struct list_head icq_list; |
371 | #ifdef CONFIG_BLK_CGROUP | 372 | #ifdef CONFIG_BLK_CGROUP |
373 | struct blkio_group *root_blkg; | ||
372 | struct list_head blkg_list; | 374 | struct list_head blkg_list; |
373 | #endif | 375 | #endif |
374 | 376 | ||
375 | struct queue_limits limits; | 377 | struct queue_limits limits; |
376 | 378 | ||
377 | /* | 379 | /* |
378 | * sg stuff | 380 | * sg stuff |
379 | */ | 381 | */ |
380 | unsigned int sg_timeout; | 382 | unsigned int sg_timeout; |
381 | unsigned int sg_reserved_size; | 383 | unsigned int sg_reserved_size; |
382 | int node; | 384 | int node; |
383 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 385 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
384 | struct blk_trace *blk_trace; | 386 | struct blk_trace *blk_trace; |
385 | #endif | 387 | #endif |
386 | /* | 388 | /* |
387 | * for flush operations | 389 | * for flush operations |
388 | */ | 390 | */ |
389 | unsigned int flush_flags; | 391 | unsigned int flush_flags; |
390 | unsigned int flush_not_queueable:1; | 392 | unsigned int flush_not_queueable:1; |
391 | unsigned int flush_queue_delayed:1; | 393 | unsigned int flush_queue_delayed:1; |
392 | unsigned int flush_pending_idx:1; | 394 | unsigned int flush_pending_idx:1; |
393 | unsigned int flush_running_idx:1; | 395 | unsigned int flush_running_idx:1; |
394 | unsigned long flush_pending_since; | 396 | unsigned long flush_pending_since; |
395 | struct list_head flush_queue[2]; | 397 | struct list_head flush_queue[2]; |
396 | struct list_head flush_data_in_flight; | 398 | struct list_head flush_data_in_flight; |
397 | struct request flush_rq; | 399 | struct request flush_rq; |
398 | 400 | ||
399 | struct mutex sysfs_lock; | 401 | struct mutex sysfs_lock; |
400 | 402 | ||
401 | int bypass_depth; | 403 | int bypass_depth; |
402 | 404 | ||
403 | #if defined(CONFIG_BLK_DEV_BSG) | 405 | #if defined(CONFIG_BLK_DEV_BSG) |
404 | bsg_job_fn *bsg_job_fn; | 406 | bsg_job_fn *bsg_job_fn; |
405 | int bsg_job_size; | 407 | int bsg_job_size; |
406 | struct bsg_class_device bsg_dev; | 408 | struct bsg_class_device bsg_dev; |
407 | #endif | 409 | #endif |
408 | 410 | ||
409 | #ifdef CONFIG_BLK_CGROUP | 411 | #ifdef CONFIG_BLK_CGROUP |
410 | struct list_head all_q_node; | 412 | struct list_head all_q_node; |
411 | #endif | 413 | #endif |
412 | #ifdef CONFIG_BLK_DEV_THROTTLING | 414 | #ifdef CONFIG_BLK_DEV_THROTTLING |
413 | /* Throttle data */ | 415 | /* Throttle data */ |
414 | struct throtl_data *td; | 416 | struct throtl_data *td; |
415 | #endif | 417 | #endif |
416 | }; | 418 | }; |
417 | 419 | ||
418 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 420 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |
419 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ | 421 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ |
420 | #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ | 422 | #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ |
421 | #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ | 423 | #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ |
422 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ | 424 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ |
423 | #define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */ | 425 | #define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */ |
424 | #define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ | 426 | #define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ |
425 | #define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ | 427 | #define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ |
426 | #define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */ | 428 | #define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */ |
427 | #define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */ | 429 | #define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */ |
428 | #define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */ | 430 | #define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */ |
429 | #define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */ | 431 | #define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */ |
430 | #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ | 432 | #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ |
431 | #define QUEUE_FLAG_IO_STAT 13 /* do IO stats */ | 433 | #define QUEUE_FLAG_IO_STAT 13 /* do IO stats */ |
432 | #define QUEUE_FLAG_DISCARD 14 /* supports DISCARD */ | 434 | #define QUEUE_FLAG_DISCARD 14 /* supports DISCARD */ |
433 | #define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */ | 435 | #define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */ |
434 | #define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ | 436 | #define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ |
435 | #define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ | 437 | #define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ |
436 | #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ | 438 | #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ |
437 | 439 | ||
438 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ | 440 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ |
439 | (1 << QUEUE_FLAG_STACKABLE) | \ | 441 | (1 << QUEUE_FLAG_STACKABLE) | \ |
440 | (1 << QUEUE_FLAG_SAME_COMP) | \ | 442 | (1 << QUEUE_FLAG_SAME_COMP) | \ |
441 | (1 << QUEUE_FLAG_ADD_RANDOM)) | 443 | (1 << QUEUE_FLAG_ADD_RANDOM)) |
442 | 444 | ||
443 | static inline int queue_is_locked(struct request_queue *q) | 445 | static inline int queue_is_locked(struct request_queue *q) |
444 | { | 446 | { |
445 | #ifdef CONFIG_SMP | 447 | #ifdef CONFIG_SMP |
446 | spinlock_t *lock = q->queue_lock; | 448 | spinlock_t *lock = q->queue_lock; |
447 | return lock && spin_is_locked(lock); | 449 | return lock && spin_is_locked(lock); |
448 | #else | 450 | #else |
449 | return 1; | 451 | return 1; |
450 | #endif | 452 | #endif |
451 | } | 453 | } |
452 | 454 | ||
453 | static inline void queue_flag_set_unlocked(unsigned int flag, | 455 | static inline void queue_flag_set_unlocked(unsigned int flag, |
454 | struct request_queue *q) | 456 | struct request_queue *q) |
455 | { | 457 | { |
456 | __set_bit(flag, &q->queue_flags); | 458 | __set_bit(flag, &q->queue_flags); |
457 | } | 459 | } |
458 | 460 | ||
459 | static inline int queue_flag_test_and_clear(unsigned int flag, | 461 | static inline int queue_flag_test_and_clear(unsigned int flag, |
460 | struct request_queue *q) | 462 | struct request_queue *q) |
461 | { | 463 | { |
462 | WARN_ON_ONCE(!queue_is_locked(q)); | 464 | WARN_ON_ONCE(!queue_is_locked(q)); |
463 | 465 | ||
464 | if (test_bit(flag, &q->queue_flags)) { | 466 | if (test_bit(flag, &q->queue_flags)) { |
465 | __clear_bit(flag, &q->queue_flags); | 467 | __clear_bit(flag, &q->queue_flags); |
466 | return 1; | 468 | return 1; |
467 | } | 469 | } |
468 | 470 | ||
469 | return 0; | 471 | return 0; |
470 | } | 472 | } |
471 | 473 | ||
472 | static inline int queue_flag_test_and_set(unsigned int flag, | 474 | static inline int queue_flag_test_and_set(unsigned int flag, |
473 | struct request_queue *q) | 475 | struct request_queue *q) |
474 | { | 476 | { |
475 | WARN_ON_ONCE(!queue_is_locked(q)); | 477 | WARN_ON_ONCE(!queue_is_locked(q)); |
476 | 478 | ||
477 | if (!test_bit(flag, &q->queue_flags)) { | 479 | if (!test_bit(flag, &q->queue_flags)) { |
478 | __set_bit(flag, &q->queue_flags); | 480 | __set_bit(flag, &q->queue_flags); |
479 | return 0; | 481 | return 0; |
480 | } | 482 | } |
481 | 483 | ||
482 | return 1; | 484 | return 1; |
483 | } | 485 | } |
484 | 486 | ||
485 | static inline void queue_flag_set(unsigned int flag, struct request_queue *q) | 487 | static inline void queue_flag_set(unsigned int flag, struct request_queue *q) |
486 | { | 488 | { |
487 | WARN_ON_ONCE(!queue_is_locked(q)); | 489 | WARN_ON_ONCE(!queue_is_locked(q)); |
488 | __set_bit(flag, &q->queue_flags); | 490 | __set_bit(flag, &q->queue_flags); |
489 | } | 491 | } |
490 | 492 | ||
491 | static inline void queue_flag_clear_unlocked(unsigned int flag, | 493 | static inline void queue_flag_clear_unlocked(unsigned int flag, |
492 | struct request_queue *q) | 494 | struct request_queue *q) |
493 | { | 495 | { |
494 | __clear_bit(flag, &q->queue_flags); | 496 | __clear_bit(flag, &q->queue_flags); |
495 | } | 497 | } |
496 | 498 | ||
497 | static inline int queue_in_flight(struct request_queue *q) | 499 | static inline int queue_in_flight(struct request_queue *q) |
498 | { | 500 | { |
499 | return q->in_flight[0] + q->in_flight[1]; | 501 | return q->in_flight[0] + q->in_flight[1]; |
500 | } | 502 | } |
501 | 503 | ||
502 | static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) | 504 | static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) |
503 | { | 505 | { |
504 | WARN_ON_ONCE(!queue_is_locked(q)); | 506 | WARN_ON_ONCE(!queue_is_locked(q)); |
505 | __clear_bit(flag, &q->queue_flags); | 507 | __clear_bit(flag, &q->queue_flags); |
506 | } | 508 | } |
507 | 509 | ||
508 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) | 510 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) |
509 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) | 511 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) |
510 | #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) | 512 | #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) |
511 | #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) | 513 | #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) |
512 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) | 514 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) |
513 | #define blk_queue_noxmerges(q) \ | 515 | #define blk_queue_noxmerges(q) \ |
514 | test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) | 516 | test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) |
515 | #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) | 517 | #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) |
516 | #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) | 518 | #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) |
517 | #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) | 519 | #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) |
518 | #define blk_queue_stackable(q) \ | 520 | #define blk_queue_stackable(q) \ |
519 | test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) | 521 | test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) |
520 | #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) | 522 | #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) |
521 | #define blk_queue_secdiscard(q) (blk_queue_discard(q) && \ | 523 | #define blk_queue_secdiscard(q) (blk_queue_discard(q) && \ |
522 | test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags)) | 524 | test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags)) |
523 | 525 | ||
524 | #define blk_noretry_request(rq) \ | 526 | #define blk_noretry_request(rq) \ |
525 | ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ | 527 | ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ |
526 | REQ_FAILFAST_DRIVER)) | 528 | REQ_FAILFAST_DRIVER)) |
527 | 529 | ||
528 | #define blk_account_rq(rq) \ | 530 | #define blk_account_rq(rq) \ |
529 | (((rq)->cmd_flags & REQ_STARTED) && \ | 531 | (((rq)->cmd_flags & REQ_STARTED) && \ |
530 | ((rq)->cmd_type == REQ_TYPE_FS || \ | 532 | ((rq)->cmd_type == REQ_TYPE_FS || \ |
531 | ((rq)->cmd_flags & REQ_DISCARD))) | 533 | ((rq)->cmd_flags & REQ_DISCARD))) |
532 | 534 | ||
533 | #define blk_pm_request(rq) \ | 535 | #define blk_pm_request(rq) \ |
534 | ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ | 536 | ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ |
535 | (rq)->cmd_type == REQ_TYPE_PM_RESUME) | 537 | (rq)->cmd_type == REQ_TYPE_PM_RESUME) |
536 | 538 | ||
537 | #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) | 539 | #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) |
538 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) | 540 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) |
539 | /* rq->queuelist of dequeued request must be list_empty() */ | 541 | /* rq->queuelist of dequeued request must be list_empty() */ |
540 | #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) | 542 | #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) |
541 | 543 | ||
542 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) | 544 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) |
543 | 545 | ||
544 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) | 546 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) |
545 | 547 | ||
546 | static inline unsigned int blk_queue_cluster(struct request_queue *q) | 548 | static inline unsigned int blk_queue_cluster(struct request_queue *q) |
547 | { | 549 | { |
548 | return q->limits.cluster; | 550 | return q->limits.cluster; |
549 | } | 551 | } |
550 | 552 | ||
551 | /* | 553 | /* |
552 | * We regard a request as sync, if either a read or a sync write | 554 | * We regard a request as sync, if either a read or a sync write |
553 | */ | 555 | */ |
554 | static inline bool rw_is_sync(unsigned int rw_flags) | 556 | static inline bool rw_is_sync(unsigned int rw_flags) |
555 | { | 557 | { |
556 | return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC); | 558 | return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC); |
557 | } | 559 | } |
558 | 560 | ||
559 | static inline bool rq_is_sync(struct request *rq) | 561 | static inline bool rq_is_sync(struct request *rq) |
560 | { | 562 | { |
561 | return rw_is_sync(rq->cmd_flags); | 563 | return rw_is_sync(rq->cmd_flags); |
562 | } | 564 | } |
563 | 565 | ||
564 | static inline int blk_queue_full(struct request_queue *q, int sync) | 566 | static inline int blk_queue_full(struct request_queue *q, int sync) |
565 | { | 567 | { |
566 | if (sync) | 568 | if (sync) |
567 | return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags); | 569 | return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags); |
568 | return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags); | 570 | return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags); |
569 | } | 571 | } |
570 | 572 | ||
571 | static inline void blk_set_queue_full(struct request_queue *q, int sync) | 573 | static inline void blk_set_queue_full(struct request_queue *q, int sync) |
572 | { | 574 | { |
573 | if (sync) | 575 | if (sync) |
574 | queue_flag_set(QUEUE_FLAG_SYNCFULL, q); | 576 | queue_flag_set(QUEUE_FLAG_SYNCFULL, q); |
575 | else | 577 | else |
576 | queue_flag_set(QUEUE_FLAG_ASYNCFULL, q); | 578 | queue_flag_set(QUEUE_FLAG_ASYNCFULL, q); |
577 | } | 579 | } |
578 | 580 | ||
579 | static inline void blk_clear_queue_full(struct request_queue *q, int sync) | 581 | static inline void blk_clear_queue_full(struct request_queue *q, int sync) |
580 | { | 582 | { |
581 | if (sync) | 583 | if (sync) |
582 | queue_flag_clear(QUEUE_FLAG_SYNCFULL, q); | 584 | queue_flag_clear(QUEUE_FLAG_SYNCFULL, q); |
583 | else | 585 | else |
584 | queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q); | 586 | queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q); |
585 | } | 587 | } |
586 | 588 | ||
587 | 589 | ||
588 | /* | 590 | /* |
589 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may | 591 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may |
590 | * it already be started by driver. | 592 | * it already be started by driver. |
591 | */ | 593 | */ |
592 | #define RQ_NOMERGE_FLAGS \ | 594 | #define RQ_NOMERGE_FLAGS \ |
593 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) | 595 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) |
594 | #define rq_mergeable(rq) \ | 596 | #define rq_mergeable(rq) \ |
595 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ | 597 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ |
596 | (((rq)->cmd_flags & REQ_DISCARD) || \ | 598 | (((rq)->cmd_flags & REQ_DISCARD) || \ |
597 | (rq)->cmd_type == REQ_TYPE_FS)) | 599 | (rq)->cmd_type == REQ_TYPE_FS)) |
598 | 600 | ||
599 | /* | 601 | /* |
600 | * q->prep_rq_fn return values | 602 | * q->prep_rq_fn return values |
601 | */ | 603 | */ |
602 | #define BLKPREP_OK 0 /* serve it */ | 604 | #define BLKPREP_OK 0 /* serve it */ |
603 | #define BLKPREP_KILL 1 /* fatal error, kill */ | 605 | #define BLKPREP_KILL 1 /* fatal error, kill */ |
604 | #define BLKPREP_DEFER 2 /* leave on queue */ | 606 | #define BLKPREP_DEFER 2 /* leave on queue */ |
605 | 607 | ||
606 | extern unsigned long blk_max_low_pfn, blk_max_pfn; | 608 | extern unsigned long blk_max_low_pfn, blk_max_pfn; |
607 | 609 | ||
608 | /* | 610 | /* |
609 | * standard bounce addresses: | 611 | * standard bounce addresses: |
610 | * | 612 | * |
611 | * BLK_BOUNCE_HIGH : bounce all highmem pages | 613 | * BLK_BOUNCE_HIGH : bounce all highmem pages |
612 | * BLK_BOUNCE_ANY : don't bounce anything | 614 | * BLK_BOUNCE_ANY : don't bounce anything |
613 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary | 615 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary |
614 | */ | 616 | */ |
615 | 617 | ||
616 | #if BITS_PER_LONG == 32 | 618 | #if BITS_PER_LONG == 32 |
617 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) | 619 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) |
618 | #else | 620 | #else |
619 | #define BLK_BOUNCE_HIGH -1ULL | 621 | #define BLK_BOUNCE_HIGH -1ULL |
620 | #endif | 622 | #endif |
621 | #define BLK_BOUNCE_ANY (-1ULL) | 623 | #define BLK_BOUNCE_ANY (-1ULL) |
622 | #define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) | 624 | #define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) |
623 | 625 | ||
624 | /* | 626 | /* |
625 | * default timeout for SG_IO if none specified | 627 | * default timeout for SG_IO if none specified |
626 | */ | 628 | */ |
627 | #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) | 629 | #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) |
628 | #define BLK_MIN_SG_TIMEOUT (7 * HZ) | 630 | #define BLK_MIN_SG_TIMEOUT (7 * HZ) |
629 | 631 | ||
630 | #ifdef CONFIG_BOUNCE | 632 | #ifdef CONFIG_BOUNCE |
631 | extern int init_emergency_isa_pool(void); | 633 | extern int init_emergency_isa_pool(void); |
632 | extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); | 634 | extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); |
633 | #else | 635 | #else |
634 | static inline int init_emergency_isa_pool(void) | 636 | static inline int init_emergency_isa_pool(void) |
635 | { | 637 | { |
636 | return 0; | 638 | return 0; |
637 | } | 639 | } |
638 | static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) | 640 | static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) |
639 | { | 641 | { |
640 | } | 642 | } |
641 | #endif /* CONFIG_MMU */ | 643 | #endif /* CONFIG_MMU */ |
642 | 644 | ||
643 | struct rq_map_data { | 645 | struct rq_map_data { |
644 | struct page **pages; | 646 | struct page **pages; |
645 | int page_order; | 647 | int page_order; |
646 | int nr_entries; | 648 | int nr_entries; |
647 | unsigned long offset; | 649 | unsigned long offset; |
648 | int null_mapped; | 650 | int null_mapped; |
649 | int from_user; | 651 | int from_user; |
650 | }; | 652 | }; |
651 | 653 | ||
652 | struct req_iterator { | 654 | struct req_iterator { |
653 | int i; | 655 | int i; |
654 | struct bio *bio; | 656 | struct bio *bio; |
655 | }; | 657 | }; |
656 | 658 | ||
657 | /* This should not be used directly - use rq_for_each_segment */ | 659 | /* This should not be used directly - use rq_for_each_segment */ |
658 | #define for_each_bio(_bio) \ | 660 | #define for_each_bio(_bio) \ |
659 | for (; _bio; _bio = _bio->bi_next) | 661 | for (; _bio; _bio = _bio->bi_next) |
660 | #define __rq_for_each_bio(_bio, rq) \ | 662 | #define __rq_for_each_bio(_bio, rq) \ |
661 | if ((rq->bio)) \ | 663 | if ((rq->bio)) \ |
662 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) | 664 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) |
663 | 665 | ||
664 | #define rq_for_each_segment(bvl, _rq, _iter) \ | 666 | #define rq_for_each_segment(bvl, _rq, _iter) \ |
665 | __rq_for_each_bio(_iter.bio, _rq) \ | 667 | __rq_for_each_bio(_iter.bio, _rq) \ |
666 | bio_for_each_segment(bvl, _iter.bio, _iter.i) | 668 | bio_for_each_segment(bvl, _iter.bio, _iter.i) |
667 | 669 | ||
668 | #define rq_iter_last(rq, _iter) \ | 670 | #define rq_iter_last(rq, _iter) \ |
669 | (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) | 671 | (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) |
670 | 672 | ||
671 | #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE | 673 | #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
672 | # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" | 674 | # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" |
673 | #endif | 675 | #endif |
674 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE | 676 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
675 | extern void rq_flush_dcache_pages(struct request *rq); | 677 | extern void rq_flush_dcache_pages(struct request *rq); |
676 | #else | 678 | #else |
677 | static inline void rq_flush_dcache_pages(struct request *rq) | 679 | static inline void rq_flush_dcache_pages(struct request *rq) |
678 | { | 680 | { |
679 | } | 681 | } |
680 | #endif | 682 | #endif |
681 | 683 | ||
682 | extern int blk_register_queue(struct gendisk *disk); | 684 | extern int blk_register_queue(struct gendisk *disk); |
683 | extern void blk_unregister_queue(struct gendisk *disk); | 685 | extern void blk_unregister_queue(struct gendisk *disk); |
684 | extern void generic_make_request(struct bio *bio); | 686 | extern void generic_make_request(struct bio *bio); |
685 | extern void blk_rq_init(struct request_queue *q, struct request *rq); | 687 | extern void blk_rq_init(struct request_queue *q, struct request *rq); |
686 | extern void blk_put_request(struct request *); | 688 | extern void blk_put_request(struct request *); |
687 | extern void __blk_put_request(struct request_queue *, struct request *); | 689 | extern void __blk_put_request(struct request_queue *, struct request *); |
688 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); | 690 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); |
689 | extern struct request *blk_make_request(struct request_queue *, struct bio *, | 691 | extern struct request *blk_make_request(struct request_queue *, struct bio *, |
690 | gfp_t); | 692 | gfp_t); |
691 | extern void blk_requeue_request(struct request_queue *, struct request *); | 693 | extern void blk_requeue_request(struct request_queue *, struct request *); |
692 | extern void blk_add_request_payload(struct request *rq, struct page *page, | 694 | extern void blk_add_request_payload(struct request *rq, struct page *page, |
693 | unsigned int len); | 695 | unsigned int len); |
694 | extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); | 696 | extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); |
695 | extern int blk_lld_busy(struct request_queue *q); | 697 | extern int blk_lld_busy(struct request_queue *q); |
696 | extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | 698 | extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, |
697 | struct bio_set *bs, gfp_t gfp_mask, | 699 | struct bio_set *bs, gfp_t gfp_mask, |
698 | int (*bio_ctr)(struct bio *, struct bio *, void *), | 700 | int (*bio_ctr)(struct bio *, struct bio *, void *), |
699 | void *data); | 701 | void *data); |
700 | extern void blk_rq_unprep_clone(struct request *rq); | 702 | extern void blk_rq_unprep_clone(struct request *rq); |
701 | extern int blk_insert_cloned_request(struct request_queue *q, | 703 | extern int blk_insert_cloned_request(struct request_queue *q, |
702 | struct request *rq); | 704 | struct request *rq); |
703 | extern void blk_delay_queue(struct request_queue *, unsigned long); | 705 | extern void blk_delay_queue(struct request_queue *, unsigned long); |
704 | extern void blk_recount_segments(struct request_queue *, struct bio *); | 706 | extern void blk_recount_segments(struct request_queue *, struct bio *); |
705 | extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); | 707 | extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); |
706 | extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, | 708 | extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, |
707 | unsigned int, void __user *); | 709 | unsigned int, void __user *); |
708 | extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, | 710 | extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, |
709 | unsigned int, void __user *); | 711 | unsigned int, void __user *); |
710 | extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, | 712 | extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, |
711 | struct scsi_ioctl_command __user *); | 713 | struct scsi_ioctl_command __user *); |
712 | 714 | ||
713 | extern void blk_queue_bio(struct request_queue *q, struct bio *bio); | 715 | extern void blk_queue_bio(struct request_queue *q, struct bio *bio); |
714 | 716 | ||
715 | /* | 717 | /* |
716 | * A queue has just exitted congestion. Note this in the global counter of | 718 | * A queue has just exitted congestion. Note this in the global counter of |
717 | * congested queues, and wake up anyone who was waiting for requests to be | 719 | * congested queues, and wake up anyone who was waiting for requests to be |
718 | * put back. | 720 | * put back. |
719 | */ | 721 | */ |
720 | static inline void blk_clear_queue_congested(struct request_queue *q, int sync) | 722 | static inline void blk_clear_queue_congested(struct request_queue *q, int sync) |
721 | { | 723 | { |
722 | clear_bdi_congested(&q->backing_dev_info, sync); | 724 | clear_bdi_congested(&q->backing_dev_info, sync); |
723 | } | 725 | } |
724 | 726 | ||
725 | /* | 727 | /* |
726 | * A queue has just entered congestion. Flag that in the queue's VM-visible | 728 | * A queue has just entered congestion. Flag that in the queue's VM-visible |
727 | * state flags and increment the global gounter of congested queues. | 729 | * state flags and increment the global gounter of congested queues. |
728 | */ | 730 | */ |
729 | static inline void blk_set_queue_congested(struct request_queue *q, int sync) | 731 | static inline void blk_set_queue_congested(struct request_queue *q, int sync) |
730 | { | 732 | { |
731 | set_bdi_congested(&q->backing_dev_info, sync); | 733 | set_bdi_congested(&q->backing_dev_info, sync); |
732 | } | 734 | } |
733 | 735 | ||
734 | extern void blk_start_queue(struct request_queue *q); | 736 | extern void blk_start_queue(struct request_queue *q); |
735 | extern void blk_stop_queue(struct request_queue *q); | 737 | extern void blk_stop_queue(struct request_queue *q); |
736 | extern void blk_sync_queue(struct request_queue *q); | 738 | extern void blk_sync_queue(struct request_queue *q); |
737 | extern void __blk_stop_queue(struct request_queue *q); | 739 | extern void __blk_stop_queue(struct request_queue *q); |
738 | extern void __blk_run_queue(struct request_queue *q); | 740 | extern void __blk_run_queue(struct request_queue *q); |
739 | extern void blk_run_queue(struct request_queue *); | 741 | extern void blk_run_queue(struct request_queue *); |
740 | extern void blk_run_queue_async(struct request_queue *q); | 742 | extern void blk_run_queue_async(struct request_queue *q); |
741 | extern int blk_rq_map_user(struct request_queue *, struct request *, | 743 | extern int blk_rq_map_user(struct request_queue *, struct request *, |
742 | struct rq_map_data *, void __user *, unsigned long, | 744 | struct rq_map_data *, void __user *, unsigned long, |
743 | gfp_t); | 745 | gfp_t); |
744 | extern int blk_rq_unmap_user(struct bio *); | 746 | extern int blk_rq_unmap_user(struct bio *); |
745 | extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); | 747 | extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); |
746 | extern int blk_rq_map_user_iov(struct request_queue *, struct request *, | 748 | extern int blk_rq_map_user_iov(struct request_queue *, struct request *, |
747 | struct rq_map_data *, struct sg_iovec *, int, | 749 | struct rq_map_data *, struct sg_iovec *, int, |
748 | unsigned int, gfp_t); | 750 | unsigned int, gfp_t); |
749 | extern int blk_execute_rq(struct request_queue *, struct gendisk *, | 751 | extern int blk_execute_rq(struct request_queue *, struct gendisk *, |
750 | struct request *, int); | 752 | struct request *, int); |
751 | extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, | 753 | extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, |
752 | struct request *, int, rq_end_io_fn *); | 754 | struct request *, int, rq_end_io_fn *); |
753 | 755 | ||
754 | static inline struct request_queue *bdev_get_queue(struct block_device *bdev) | 756 | static inline struct request_queue *bdev_get_queue(struct block_device *bdev) |
755 | { | 757 | { |
756 | return bdev->bd_disk->queue; | 758 | return bdev->bd_disk->queue; |
757 | } | 759 | } |
758 | 760 | ||
759 | /* | 761 | /* |
760 | * blk_rq_pos() : the current sector | 762 | * blk_rq_pos() : the current sector |
761 | * blk_rq_bytes() : bytes left in the entire request | 763 | * blk_rq_bytes() : bytes left in the entire request |
762 | * blk_rq_cur_bytes() : bytes left in the current segment | 764 | * blk_rq_cur_bytes() : bytes left in the current segment |
763 | * blk_rq_err_bytes() : bytes left till the next error boundary | 765 | * blk_rq_err_bytes() : bytes left till the next error boundary |
764 | * blk_rq_sectors() : sectors left in the entire request | 766 | * blk_rq_sectors() : sectors left in the entire request |
765 | * blk_rq_cur_sectors() : sectors left in the current segment | 767 | * blk_rq_cur_sectors() : sectors left in the current segment |
766 | */ | 768 | */ |
767 | static inline sector_t blk_rq_pos(const struct request *rq) | 769 | static inline sector_t blk_rq_pos(const struct request *rq) |
768 | { | 770 | { |
769 | return rq->__sector; | 771 | return rq->__sector; |
770 | } | 772 | } |
771 | 773 | ||
772 | static inline unsigned int blk_rq_bytes(const struct request *rq) | 774 | static inline unsigned int blk_rq_bytes(const struct request *rq) |
773 | { | 775 | { |
774 | return rq->__data_len; | 776 | return rq->__data_len; |
775 | } | 777 | } |
776 | 778 | ||
777 | static inline int blk_rq_cur_bytes(const struct request *rq) | 779 | static inline int blk_rq_cur_bytes(const struct request *rq) |
778 | { | 780 | { |
779 | return rq->bio ? bio_cur_bytes(rq->bio) : 0; | 781 | return rq->bio ? bio_cur_bytes(rq->bio) : 0; |
780 | } | 782 | } |
781 | 783 | ||
782 | extern unsigned int blk_rq_err_bytes(const struct request *rq); | 784 | extern unsigned int blk_rq_err_bytes(const struct request *rq); |
783 | 785 | ||
784 | static inline unsigned int blk_rq_sectors(const struct request *rq) | 786 | static inline unsigned int blk_rq_sectors(const struct request *rq) |
785 | { | 787 | { |
786 | return blk_rq_bytes(rq) >> 9; | 788 | return blk_rq_bytes(rq) >> 9; |
787 | } | 789 | } |
788 | 790 | ||
789 | static inline unsigned int blk_rq_cur_sectors(const struct request *rq) | 791 | static inline unsigned int blk_rq_cur_sectors(const struct request *rq) |
790 | { | 792 | { |
791 | return blk_rq_cur_bytes(rq) >> 9; | 793 | return blk_rq_cur_bytes(rq) >> 9; |
792 | } | 794 | } |
793 | 795 | ||
794 | /* | 796 | /* |
795 | * Request issue related functions. | 797 | * Request issue related functions. |
796 | */ | 798 | */ |
797 | extern struct request *blk_peek_request(struct request_queue *q); | 799 | extern struct request *blk_peek_request(struct request_queue *q); |
798 | extern void blk_start_request(struct request *rq); | 800 | extern void blk_start_request(struct request *rq); |
799 | extern struct request *blk_fetch_request(struct request_queue *q); | 801 | extern struct request *blk_fetch_request(struct request_queue *q); |
800 | 802 | ||
801 | /* | 803 | /* |
802 | * Request completion related functions. | 804 | * Request completion related functions. |
803 | * | 805 | * |
804 | * blk_update_request() completes given number of bytes and updates | 806 | * blk_update_request() completes given number of bytes and updates |
805 | * the request without completing it. | 807 | * the request without completing it. |
806 | * | 808 | * |
807 | * blk_end_request() and friends. __blk_end_request() must be called | 809 | * blk_end_request() and friends. __blk_end_request() must be called |
808 | * with the request queue spinlock acquired. | 810 | * with the request queue spinlock acquired. |
809 | * | 811 | * |
810 | * Several drivers define their own end_request and call | 812 | * Several drivers define their own end_request and call |
811 | * blk_end_request() for parts of the original function. | 813 | * blk_end_request() for parts of the original function. |
812 | * This prevents code duplication in drivers. | 814 | * This prevents code duplication in drivers. |
813 | */ | 815 | */ |
814 | extern bool blk_update_request(struct request *rq, int error, | 816 | extern bool blk_update_request(struct request *rq, int error, |
815 | unsigned int nr_bytes); | 817 | unsigned int nr_bytes); |
816 | extern bool blk_end_request(struct request *rq, int error, | 818 | extern bool blk_end_request(struct request *rq, int error, |
817 | unsigned int nr_bytes); | 819 | unsigned int nr_bytes); |
818 | extern void blk_end_request_all(struct request *rq, int error); | 820 | extern void blk_end_request_all(struct request *rq, int error); |
819 | extern bool blk_end_request_cur(struct request *rq, int error); | 821 | extern bool blk_end_request_cur(struct request *rq, int error); |
820 | extern bool blk_end_request_err(struct request *rq, int error); | 822 | extern bool blk_end_request_err(struct request *rq, int error); |
821 | extern bool __blk_end_request(struct request *rq, int error, | 823 | extern bool __blk_end_request(struct request *rq, int error, |
822 | unsigned int nr_bytes); | 824 | unsigned int nr_bytes); |
823 | extern void __blk_end_request_all(struct request *rq, int error); | 825 | extern void __blk_end_request_all(struct request *rq, int error); |
824 | extern bool __blk_end_request_cur(struct request *rq, int error); | 826 | extern bool __blk_end_request_cur(struct request *rq, int error); |
825 | extern bool __blk_end_request_err(struct request *rq, int error); | 827 | extern bool __blk_end_request_err(struct request *rq, int error); |
826 | 828 | ||
827 | extern void blk_complete_request(struct request *); | 829 | extern void blk_complete_request(struct request *); |
828 | extern void __blk_complete_request(struct request *); | 830 | extern void __blk_complete_request(struct request *); |
829 | extern void blk_abort_request(struct request *); | 831 | extern void blk_abort_request(struct request *); |
830 | extern void blk_abort_queue(struct request_queue *); | 832 | extern void blk_abort_queue(struct request_queue *); |
831 | extern void blk_unprep_request(struct request *); | 833 | extern void blk_unprep_request(struct request *); |
832 | 834 | ||
833 | /* | 835 | /* |
834 | * Access functions for manipulating queue properties | 836 | * Access functions for manipulating queue properties |
835 | */ | 837 | */ |
836 | extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, | 838 | extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, |
837 | spinlock_t *lock, int node_id); | 839 | spinlock_t *lock, int node_id); |
838 | extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); | 840 | extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); |
839 | extern struct request_queue *blk_init_allocated_queue(struct request_queue *, | 841 | extern struct request_queue *blk_init_allocated_queue(struct request_queue *, |
840 | request_fn_proc *, spinlock_t *); | 842 | request_fn_proc *, spinlock_t *); |
841 | extern void blk_cleanup_queue(struct request_queue *); | 843 | extern void blk_cleanup_queue(struct request_queue *); |
842 | extern void blk_queue_make_request(struct request_queue *, make_request_fn *); | 844 | extern void blk_queue_make_request(struct request_queue *, make_request_fn *); |
843 | extern void blk_queue_bounce_limit(struct request_queue *, u64); | 845 | extern void blk_queue_bounce_limit(struct request_queue *, u64); |
844 | extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); | 846 | extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); |
845 | extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); | 847 | extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); |
846 | extern void blk_queue_max_segments(struct request_queue *, unsigned short); | 848 | extern void blk_queue_max_segments(struct request_queue *, unsigned short); |
847 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); | 849 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); |
848 | extern void blk_queue_max_discard_sectors(struct request_queue *q, | 850 | extern void blk_queue_max_discard_sectors(struct request_queue *q, |
849 | unsigned int max_discard_sectors); | 851 | unsigned int max_discard_sectors); |
850 | extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); | 852 | extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); |
851 | extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); | 853 | extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); |
852 | extern void blk_queue_alignment_offset(struct request_queue *q, | 854 | extern void blk_queue_alignment_offset(struct request_queue *q, |
853 | unsigned int alignment); | 855 | unsigned int alignment); |
854 | extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); | 856 | extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); |
855 | extern void blk_queue_io_min(struct request_queue *q, unsigned int min); | 857 | extern void blk_queue_io_min(struct request_queue *q, unsigned int min); |
856 | extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); | 858 | extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); |
857 | extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); | 859 | extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); |
858 | extern void blk_set_default_limits(struct queue_limits *lim); | 860 | extern void blk_set_default_limits(struct queue_limits *lim); |
859 | extern void blk_set_stacking_limits(struct queue_limits *lim); | 861 | extern void blk_set_stacking_limits(struct queue_limits *lim); |
860 | extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, | 862 | extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, |
861 | sector_t offset); | 863 | sector_t offset); |
862 | extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, | 864 | extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, |
863 | sector_t offset); | 865 | sector_t offset); |
864 | extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, | 866 | extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, |
865 | sector_t offset); | 867 | sector_t offset); |
866 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); | 868 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); |
867 | extern void blk_queue_dma_pad(struct request_queue *, unsigned int); | 869 | extern void blk_queue_dma_pad(struct request_queue *, unsigned int); |
868 | extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); | 870 | extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); |
869 | extern int blk_queue_dma_drain(struct request_queue *q, | 871 | extern int blk_queue_dma_drain(struct request_queue *q, |
870 | dma_drain_needed_fn *dma_drain_needed, | 872 | dma_drain_needed_fn *dma_drain_needed, |
871 | void *buf, unsigned int size); | 873 | void *buf, unsigned int size); |
872 | extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); | 874 | extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); |
873 | extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); | 875 | extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); |
874 | extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); | 876 | extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); |
875 | extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); | 877 | extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); |
876 | extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); | 878 | extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); |
877 | extern void blk_queue_dma_alignment(struct request_queue *, int); | 879 | extern void blk_queue_dma_alignment(struct request_queue *, int); |
878 | extern void blk_queue_update_dma_alignment(struct request_queue *, int); | 880 | extern void blk_queue_update_dma_alignment(struct request_queue *, int); |
879 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); | 881 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); |
880 | extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); | 882 | extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); |
881 | extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); | 883 | extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); |
882 | extern void blk_queue_flush(struct request_queue *q, unsigned int flush); | 884 | extern void blk_queue_flush(struct request_queue *q, unsigned int flush); |
883 | extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); | 885 | extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); |
884 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); | 886 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); |
885 | 887 | ||
886 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); | 888 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); |
887 | extern void blk_dump_rq_flags(struct request *, char *); | 889 | extern void blk_dump_rq_flags(struct request *, char *); |
888 | extern long nr_blockdev_pages(void); | 890 | extern long nr_blockdev_pages(void); |
889 | 891 | ||
890 | bool __must_check blk_get_queue(struct request_queue *); | 892 | bool __must_check blk_get_queue(struct request_queue *); |
891 | struct request_queue *blk_alloc_queue(gfp_t); | 893 | struct request_queue *blk_alloc_queue(gfp_t); |
892 | struct request_queue *blk_alloc_queue_node(gfp_t, int); | 894 | struct request_queue *blk_alloc_queue_node(gfp_t, int); |
893 | extern void blk_put_queue(struct request_queue *); | 895 | extern void blk_put_queue(struct request_queue *); |
894 | 896 | ||
895 | /* | 897 | /* |
896 | * blk_plug permits building a queue of related requests by holding the I/O | 898 | * blk_plug permits building a queue of related requests by holding the I/O |
897 | * fragments for a short period. This allows merging of sequential requests | 899 | * fragments for a short period. This allows merging of sequential requests |
898 | * into single larger request. As the requests are moved from a per-task list to | 900 | * into single larger request. As the requests are moved from a per-task list to |
899 | * the device's request_queue in a batch, this results in improved scalability | 901 | * the device's request_queue in a batch, this results in improved scalability |
900 | * as the lock contention for request_queue lock is reduced. | 902 | * as the lock contention for request_queue lock is reduced. |
901 | * | 903 | * |
902 | * It is ok not to disable preemption when adding the request to the plug list | 904 | * It is ok not to disable preemption when adding the request to the plug list |
903 | * or when attempting a merge, because blk_schedule_flush_list() will only flush | 905 | * or when attempting a merge, because blk_schedule_flush_list() will only flush |
904 | * the plug list when the task sleeps by itself. For details, please see | 906 | * the plug list when the task sleeps by itself. For details, please see |
905 | * schedule() where blk_schedule_flush_plug() is called. | 907 | * schedule() where blk_schedule_flush_plug() is called. |
906 | */ | 908 | */ |
907 | struct blk_plug { | 909 | struct blk_plug { |
908 | unsigned long magic; /* detect uninitialized use-cases */ | 910 | unsigned long magic; /* detect uninitialized use-cases */ |
909 | struct list_head list; /* requests */ | 911 | struct list_head list; /* requests */ |
910 | struct list_head cb_list; /* md requires an unplug callback */ | 912 | struct list_head cb_list; /* md requires an unplug callback */ |
911 | unsigned int should_sort; /* list to be sorted before flushing? */ | 913 | unsigned int should_sort; /* list to be sorted before flushing? */ |
912 | }; | 914 | }; |
913 | #define BLK_MAX_REQUEST_COUNT 16 | 915 | #define BLK_MAX_REQUEST_COUNT 16 |
914 | 916 | ||
915 | struct blk_plug_cb { | 917 | struct blk_plug_cb { |
916 | struct list_head list; | 918 | struct list_head list; |
917 | void (*callback)(struct blk_plug_cb *); | 919 | void (*callback)(struct blk_plug_cb *); |
918 | }; | 920 | }; |
919 | 921 | ||
920 | extern void blk_start_plug(struct blk_plug *); | 922 | extern void blk_start_plug(struct blk_plug *); |
921 | extern void blk_finish_plug(struct blk_plug *); | 923 | extern void blk_finish_plug(struct blk_plug *); |
922 | extern void blk_flush_plug_list(struct blk_plug *, bool); | 924 | extern void blk_flush_plug_list(struct blk_plug *, bool); |
923 | 925 | ||
924 | static inline void blk_flush_plug(struct task_struct *tsk) | 926 | static inline void blk_flush_plug(struct task_struct *tsk) |
925 | { | 927 | { |
926 | struct blk_plug *plug = tsk->plug; | 928 | struct blk_plug *plug = tsk->plug; |
927 | 929 | ||
928 | if (plug) | 930 | if (plug) |
929 | blk_flush_plug_list(plug, false); | 931 | blk_flush_plug_list(plug, false); |
930 | } | 932 | } |
931 | 933 | ||
932 | static inline void blk_schedule_flush_plug(struct task_struct *tsk) | 934 | static inline void blk_schedule_flush_plug(struct task_struct *tsk) |
933 | { | 935 | { |
934 | struct blk_plug *plug = tsk->plug; | 936 | struct blk_plug *plug = tsk->plug; |
935 | 937 | ||
936 | if (plug) | 938 | if (plug) |
937 | blk_flush_plug_list(plug, true); | 939 | blk_flush_plug_list(plug, true); |
938 | } | 940 | } |
939 | 941 | ||
940 | static inline bool blk_needs_flush_plug(struct task_struct *tsk) | 942 | static inline bool blk_needs_flush_plug(struct task_struct *tsk) |
941 | { | 943 | { |
942 | struct blk_plug *plug = tsk->plug; | 944 | struct blk_plug *plug = tsk->plug; |
943 | 945 | ||
944 | return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list)); | 946 | return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list)); |
945 | } | 947 | } |
946 | 948 | ||
947 | /* | 949 | /* |
948 | * tag stuff | 950 | * tag stuff |
949 | */ | 951 | */ |
950 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) | 952 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) |
951 | extern int blk_queue_start_tag(struct request_queue *, struct request *); | 953 | extern int blk_queue_start_tag(struct request_queue *, struct request *); |
952 | extern struct request *blk_queue_find_tag(struct request_queue *, int); | 954 | extern struct request *blk_queue_find_tag(struct request_queue *, int); |
953 | extern void blk_queue_end_tag(struct request_queue *, struct request *); | 955 | extern void blk_queue_end_tag(struct request_queue *, struct request *); |
954 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); | 956 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); |
955 | extern void blk_queue_free_tags(struct request_queue *); | 957 | extern void blk_queue_free_tags(struct request_queue *); |
956 | extern int blk_queue_resize_tags(struct request_queue *, int); | 958 | extern int blk_queue_resize_tags(struct request_queue *, int); |
957 | extern void blk_queue_invalidate_tags(struct request_queue *); | 959 | extern void blk_queue_invalidate_tags(struct request_queue *); |
958 | extern struct blk_queue_tag *blk_init_tags(int); | 960 | extern struct blk_queue_tag *blk_init_tags(int); |
959 | extern void blk_free_tags(struct blk_queue_tag *); | 961 | extern void blk_free_tags(struct blk_queue_tag *); |
960 | 962 | ||
961 | static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, | 963 | static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, |
962 | int tag) | 964 | int tag) |
963 | { | 965 | { |
964 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) | 966 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) |
965 | return NULL; | 967 | return NULL; |
966 | return bqt->tag_index[tag]; | 968 | return bqt->tag_index[tag]; |
967 | } | 969 | } |
968 | 970 | ||
969 | #define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ | 971 | #define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ |
970 | 972 | ||
971 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); | 973 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); |
972 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | 974 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, |
973 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | 975 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); |
974 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 976 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
975 | sector_t nr_sects, gfp_t gfp_mask); | 977 | sector_t nr_sects, gfp_t gfp_mask); |
976 | static inline int sb_issue_discard(struct super_block *sb, sector_t block, | 978 | static inline int sb_issue_discard(struct super_block *sb, sector_t block, |
977 | sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) | 979 | sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) |
978 | { | 980 | { |
979 | return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), | 981 | return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), |
980 | nr_blocks << (sb->s_blocksize_bits - 9), | 982 | nr_blocks << (sb->s_blocksize_bits - 9), |
981 | gfp_mask, flags); | 983 | gfp_mask, flags); |
982 | } | 984 | } |
983 | static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, | 985 | static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, |
984 | sector_t nr_blocks, gfp_t gfp_mask) | 986 | sector_t nr_blocks, gfp_t gfp_mask) |
985 | { | 987 | { |
986 | return blkdev_issue_zeroout(sb->s_bdev, | 988 | return blkdev_issue_zeroout(sb->s_bdev, |
987 | block << (sb->s_blocksize_bits - 9), | 989 | block << (sb->s_blocksize_bits - 9), |
988 | nr_blocks << (sb->s_blocksize_bits - 9), | 990 | nr_blocks << (sb->s_blocksize_bits - 9), |
989 | gfp_mask); | 991 | gfp_mask); |
990 | } | 992 | } |
991 | 993 | ||
992 | extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); | 994 | extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); |
993 | 995 | ||
994 | enum blk_default_limits { | 996 | enum blk_default_limits { |
995 | BLK_MAX_SEGMENTS = 128, | 997 | BLK_MAX_SEGMENTS = 128, |
996 | BLK_SAFE_MAX_SECTORS = 255, | 998 | BLK_SAFE_MAX_SECTORS = 255, |
997 | BLK_DEF_MAX_SECTORS = 1024, | 999 | BLK_DEF_MAX_SECTORS = 1024, |
998 | BLK_MAX_SEGMENT_SIZE = 65536, | 1000 | BLK_MAX_SEGMENT_SIZE = 65536, |
999 | BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, | 1001 | BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, |
1000 | }; | 1002 | }; |
1001 | 1003 | ||
1002 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) | 1004 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) |
1003 | 1005 | ||
1004 | static inline unsigned long queue_bounce_pfn(struct request_queue *q) | 1006 | static inline unsigned long queue_bounce_pfn(struct request_queue *q) |
1005 | { | 1007 | { |
1006 | return q->limits.bounce_pfn; | 1008 | return q->limits.bounce_pfn; |
1007 | } | 1009 | } |
1008 | 1010 | ||
1009 | static inline unsigned long queue_segment_boundary(struct request_queue *q) | 1011 | static inline unsigned long queue_segment_boundary(struct request_queue *q) |
1010 | { | 1012 | { |
1011 | return q->limits.seg_boundary_mask; | 1013 | return q->limits.seg_boundary_mask; |
1012 | } | 1014 | } |
1013 | 1015 | ||
1014 | static inline unsigned int queue_max_sectors(struct request_queue *q) | 1016 | static inline unsigned int queue_max_sectors(struct request_queue *q) |
1015 | { | 1017 | { |
1016 | return q->limits.max_sectors; | 1018 | return q->limits.max_sectors; |
1017 | } | 1019 | } |
1018 | 1020 | ||
1019 | static inline unsigned int queue_max_hw_sectors(struct request_queue *q) | 1021 | static inline unsigned int queue_max_hw_sectors(struct request_queue *q) |
1020 | { | 1022 | { |
1021 | return q->limits.max_hw_sectors; | 1023 | return q->limits.max_hw_sectors; |
1022 | } | 1024 | } |
1023 | 1025 | ||
1024 | static inline unsigned short queue_max_segments(struct request_queue *q) | 1026 | static inline unsigned short queue_max_segments(struct request_queue *q) |
1025 | { | 1027 | { |
1026 | return q->limits.max_segments; | 1028 | return q->limits.max_segments; |
1027 | } | 1029 | } |
1028 | 1030 | ||
1029 | static inline unsigned int queue_max_segment_size(struct request_queue *q) | 1031 | static inline unsigned int queue_max_segment_size(struct request_queue *q) |
1030 | { | 1032 | { |
1031 | return q->limits.max_segment_size; | 1033 | return q->limits.max_segment_size; |
1032 | } | 1034 | } |
1033 | 1035 | ||
1034 | static inline unsigned short queue_logical_block_size(struct request_queue *q) | 1036 | static inline unsigned short queue_logical_block_size(struct request_queue *q) |
1035 | { | 1037 | { |
1036 | int retval = 512; | 1038 | int retval = 512; |
1037 | 1039 | ||
1038 | if (q && q->limits.logical_block_size) | 1040 | if (q && q->limits.logical_block_size) |
1039 | retval = q->limits.logical_block_size; | 1041 | retval = q->limits.logical_block_size; |
1040 | 1042 | ||
1041 | return retval; | 1043 | return retval; |
1042 | } | 1044 | } |
1043 | 1045 | ||
1044 | static inline unsigned short bdev_logical_block_size(struct block_device *bdev) | 1046 | static inline unsigned short bdev_logical_block_size(struct block_device *bdev) |
1045 | { | 1047 | { |
1046 | return queue_logical_block_size(bdev_get_queue(bdev)); | 1048 | return queue_logical_block_size(bdev_get_queue(bdev)); |
1047 | } | 1049 | } |
1048 | 1050 | ||
1049 | static inline unsigned int queue_physical_block_size(struct request_queue *q) | 1051 | static inline unsigned int queue_physical_block_size(struct request_queue *q) |
1050 | { | 1052 | { |
1051 | return q->limits.physical_block_size; | 1053 | return q->limits.physical_block_size; |
1052 | } | 1054 | } |
1053 | 1055 | ||
1054 | static inline unsigned int bdev_physical_block_size(struct block_device *bdev) | 1056 | static inline unsigned int bdev_physical_block_size(struct block_device *bdev) |
1055 | { | 1057 | { |
1056 | return queue_physical_block_size(bdev_get_queue(bdev)); | 1058 | return queue_physical_block_size(bdev_get_queue(bdev)); |
1057 | } | 1059 | } |
1058 | 1060 | ||
1059 | static inline unsigned int queue_io_min(struct request_queue *q) | 1061 | static inline unsigned int queue_io_min(struct request_queue *q) |
1060 | { | 1062 | { |
1061 | return q->limits.io_min; | 1063 | return q->limits.io_min; |
1062 | } | 1064 | } |
1063 | 1065 | ||
1064 | static inline int bdev_io_min(struct block_device *bdev) | 1066 | static inline int bdev_io_min(struct block_device *bdev) |
1065 | { | 1067 | { |
1066 | return queue_io_min(bdev_get_queue(bdev)); | 1068 | return queue_io_min(bdev_get_queue(bdev)); |
1067 | } | 1069 | } |
1068 | 1070 | ||
1069 | static inline unsigned int queue_io_opt(struct request_queue *q) | 1071 | static inline unsigned int queue_io_opt(struct request_queue *q) |
1070 | { | 1072 | { |
1071 | return q->limits.io_opt; | 1073 | return q->limits.io_opt; |
1072 | } | 1074 | } |
1073 | 1075 | ||
1074 | static inline int bdev_io_opt(struct block_device *bdev) | 1076 | static inline int bdev_io_opt(struct block_device *bdev) |
1075 | { | 1077 | { |
1076 | return queue_io_opt(bdev_get_queue(bdev)); | 1078 | return queue_io_opt(bdev_get_queue(bdev)); |
1077 | } | 1079 | } |
1078 | 1080 | ||
1079 | static inline int queue_alignment_offset(struct request_queue *q) | 1081 | static inline int queue_alignment_offset(struct request_queue *q) |
1080 | { | 1082 | { |
1081 | if (q->limits.misaligned) | 1083 | if (q->limits.misaligned) |
1082 | return -1; | 1084 | return -1; |
1083 | 1085 | ||
1084 | return q->limits.alignment_offset; | 1086 | return q->limits.alignment_offset; |
1085 | } | 1087 | } |
1086 | 1088 | ||
1087 | static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) | 1089 | static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) |
1088 | { | 1090 | { |
1089 | unsigned int granularity = max(lim->physical_block_size, lim->io_min); | 1091 | unsigned int granularity = max(lim->physical_block_size, lim->io_min); |
1090 | unsigned int alignment = (sector << 9) & (granularity - 1); | 1092 | unsigned int alignment = (sector << 9) & (granularity - 1); |
1091 | 1093 | ||
1092 | return (granularity + lim->alignment_offset - alignment) | 1094 | return (granularity + lim->alignment_offset - alignment) |
1093 | & (granularity - 1); | 1095 | & (granularity - 1); |
1094 | } | 1096 | } |
1095 | 1097 | ||
1096 | static inline int bdev_alignment_offset(struct block_device *bdev) | 1098 | static inline int bdev_alignment_offset(struct block_device *bdev) |
1097 | { | 1099 | { |
1098 | struct request_queue *q = bdev_get_queue(bdev); | 1100 | struct request_queue *q = bdev_get_queue(bdev); |
1099 | 1101 | ||
1100 | if (q->limits.misaligned) | 1102 | if (q->limits.misaligned) |
1101 | return -1; | 1103 | return -1; |
1102 | 1104 | ||
1103 | if (bdev != bdev->bd_contains) | 1105 | if (bdev != bdev->bd_contains) |
1104 | return bdev->bd_part->alignment_offset; | 1106 | return bdev->bd_part->alignment_offset; |
1105 | 1107 | ||
1106 | return q->limits.alignment_offset; | 1108 | return q->limits.alignment_offset; |
1107 | } | 1109 | } |
1108 | 1110 | ||
1109 | static inline int queue_discard_alignment(struct request_queue *q) | 1111 | static inline int queue_discard_alignment(struct request_queue *q) |
1110 | { | 1112 | { |
1111 | if (q->limits.discard_misaligned) | 1113 | if (q->limits.discard_misaligned) |
1112 | return -1; | 1114 | return -1; |
1113 | 1115 | ||
1114 | return q->limits.discard_alignment; | 1116 | return q->limits.discard_alignment; |
1115 | } | 1117 | } |
1116 | 1118 | ||
1117 | static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) | 1119 | static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) |
1118 | { | 1120 | { |
1119 | unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); | 1121 | unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); |
1120 | 1122 | ||
1121 | if (!lim->max_discard_sectors) | 1123 | if (!lim->max_discard_sectors) |
1122 | return 0; | 1124 | return 0; |
1123 | 1125 | ||
1124 | return (lim->discard_granularity + lim->discard_alignment - alignment) | 1126 | return (lim->discard_granularity + lim->discard_alignment - alignment) |
1125 | & (lim->discard_granularity - 1); | 1127 | & (lim->discard_granularity - 1); |
1126 | } | 1128 | } |
1127 | 1129 | ||
1128 | static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) | 1130 | static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) |
1129 | { | 1131 | { |
1130 | if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1) | 1132 | if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1) |
1131 | return 1; | 1133 | return 1; |
1132 | 1134 | ||
1133 | return 0; | 1135 | return 0; |
1134 | } | 1136 | } |
1135 | 1137 | ||
1136 | static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) | 1138 | static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) |
1137 | { | 1139 | { |
1138 | return queue_discard_zeroes_data(bdev_get_queue(bdev)); | 1140 | return queue_discard_zeroes_data(bdev_get_queue(bdev)); |
1139 | } | 1141 | } |
1140 | 1142 | ||
1141 | static inline int queue_dma_alignment(struct request_queue *q) | 1143 | static inline int queue_dma_alignment(struct request_queue *q) |
1142 | { | 1144 | { |
1143 | return q ? q->dma_alignment : 511; | 1145 | return q ? q->dma_alignment : 511; |
1144 | } | 1146 | } |
1145 | 1147 | ||
1146 | static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, | 1148 | static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, |
1147 | unsigned int len) | 1149 | unsigned int len) |
1148 | { | 1150 | { |
1149 | unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; | 1151 | unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; |
1150 | return !(addr & alignment) && !(len & alignment); | 1152 | return !(addr & alignment) && !(len & alignment); |
1151 | } | 1153 | } |
1152 | 1154 | ||
1153 | /* assumes size > 256 */ | 1155 | /* assumes size > 256 */ |
1154 | static inline unsigned int blksize_bits(unsigned int size) | 1156 | static inline unsigned int blksize_bits(unsigned int size) |
1155 | { | 1157 | { |
1156 | unsigned int bits = 8; | 1158 | unsigned int bits = 8; |
1157 | do { | 1159 | do { |
1158 | bits++; | 1160 | bits++; |
1159 | size >>= 1; | 1161 | size >>= 1; |
1160 | } while (size > 256); | 1162 | } while (size > 256); |
1161 | return bits; | 1163 | return bits; |
1162 | } | 1164 | } |
1163 | 1165 | ||
1164 | static inline unsigned int block_size(struct block_device *bdev) | 1166 | static inline unsigned int block_size(struct block_device *bdev) |
1165 | { | 1167 | { |
1166 | return bdev->bd_block_size; | 1168 | return bdev->bd_block_size; |
1167 | } | 1169 | } |
1168 | 1170 | ||
1169 | static inline bool queue_flush_queueable(struct request_queue *q) | 1171 | static inline bool queue_flush_queueable(struct request_queue *q) |
1170 | { | 1172 | { |
1171 | return !q->flush_not_queueable; | 1173 | return !q->flush_not_queueable; |
1172 | } | 1174 | } |
1173 | 1175 | ||
1174 | typedef struct {struct page *v;} Sector; | 1176 | typedef struct {struct page *v;} Sector; |
1175 | 1177 | ||
1176 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); | 1178 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); |
1177 | 1179 | ||
1178 | static inline void put_dev_sector(Sector p) | 1180 | static inline void put_dev_sector(Sector p) |
1179 | { | 1181 | { |
1180 | page_cache_release(p.v); | 1182 | page_cache_release(p.v); |
1181 | } | 1183 | } |
1182 | 1184 | ||
1183 | struct work_struct; | 1185 | struct work_struct; |
1184 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); | 1186 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); |
1185 | 1187 | ||
1186 | #ifdef CONFIG_BLK_CGROUP | 1188 | #ifdef CONFIG_BLK_CGROUP |
1187 | /* | 1189 | /* |
1188 | * This should not be using sched_clock(). A real patch is in progress | 1190 | * This should not be using sched_clock(). A real patch is in progress |
1189 | * to fix this up, until that is in place we need to disable preemption | 1191 | * to fix this up, until that is in place we need to disable preemption |
1190 | * around sched_clock() in this function and set_io_start_time_ns(). | 1192 | * around sched_clock() in this function and set_io_start_time_ns(). |
1191 | */ | 1193 | */ |
1192 | static inline void set_start_time_ns(struct request *req) | 1194 | static inline void set_start_time_ns(struct request *req) |
1193 | { | 1195 | { |
1194 | preempt_disable(); | 1196 | preempt_disable(); |
1195 | req->start_time_ns = sched_clock(); | 1197 | req->start_time_ns = sched_clock(); |
1196 | preempt_enable(); | 1198 | preempt_enable(); |
1197 | } | 1199 | } |
1198 | 1200 | ||
1199 | static inline void set_io_start_time_ns(struct request *req) | 1201 | static inline void set_io_start_time_ns(struct request *req) |
1200 | { | 1202 | { |
1201 | preempt_disable(); | 1203 | preempt_disable(); |
1202 | req->io_start_time_ns = sched_clock(); | 1204 | req->io_start_time_ns = sched_clock(); |
1203 | preempt_enable(); | 1205 | preempt_enable(); |
1204 | } | 1206 | } |
1205 | 1207 | ||
1206 | static inline uint64_t rq_start_time_ns(struct request *req) | 1208 | static inline uint64_t rq_start_time_ns(struct request *req) |
1207 | { | 1209 | { |
1208 | return req->start_time_ns; | 1210 | return req->start_time_ns; |
1209 | } | 1211 | } |
1210 | 1212 | ||
1211 | static inline uint64_t rq_io_start_time_ns(struct request *req) | 1213 | static inline uint64_t rq_io_start_time_ns(struct request *req) |
1212 | { | 1214 | { |
1213 | return req->io_start_time_ns; | 1215 | return req->io_start_time_ns; |
1214 | } | 1216 | } |
1215 | #else | 1217 | #else |
1216 | static inline void set_start_time_ns(struct request *req) {} | 1218 | static inline void set_start_time_ns(struct request *req) {} |
1217 | static inline void set_io_start_time_ns(struct request *req) {} | 1219 | static inline void set_io_start_time_ns(struct request *req) {} |
1218 | static inline uint64_t rq_start_time_ns(struct request *req) | 1220 | static inline uint64_t rq_start_time_ns(struct request *req) |
1219 | { | 1221 | { |
1220 | return 0; | 1222 | return 0; |
1221 | } | 1223 | } |
1222 | static inline uint64_t rq_io_start_time_ns(struct request *req) | 1224 | static inline uint64_t rq_io_start_time_ns(struct request *req) |
1223 | { | 1225 | { |
1224 | return 0; | 1226 | return 0; |
1225 | } | 1227 | } |
1226 | #endif | 1228 | #endif |
1227 | 1229 | ||
1228 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ | 1230 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ |
1229 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) | 1231 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) |
1230 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ | 1232 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ |
1231 | MODULE_ALIAS("block-major-" __stringify(major) "-*") | 1233 | MODULE_ALIAS("block-major-" __stringify(major) "-*") |
1232 | 1234 | ||
1233 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | 1235 | #if defined(CONFIG_BLK_DEV_INTEGRITY) |
1234 | 1236 | ||
1235 | #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ | 1237 | #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ |
1236 | #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ | 1238 | #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ |
1237 | 1239 | ||
1238 | struct blk_integrity_exchg { | 1240 | struct blk_integrity_exchg { |
1239 | void *prot_buf; | 1241 | void *prot_buf; |
1240 | void *data_buf; | 1242 | void *data_buf; |
1241 | sector_t sector; | 1243 | sector_t sector; |
1242 | unsigned int data_size; | 1244 | unsigned int data_size; |
1243 | unsigned short sector_size; | 1245 | unsigned short sector_size; |
1244 | const char *disk_name; | 1246 | const char *disk_name; |
1245 | }; | 1247 | }; |
1246 | 1248 | ||
1247 | typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); | 1249 | typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); |
1248 | typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); | 1250 | typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); |
1249 | typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); | 1251 | typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); |
1250 | typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); | 1252 | typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); |
1251 | 1253 | ||
1252 | struct blk_integrity { | 1254 | struct blk_integrity { |
1253 | integrity_gen_fn *generate_fn; | 1255 | integrity_gen_fn *generate_fn; |
1254 | integrity_vrfy_fn *verify_fn; | 1256 | integrity_vrfy_fn *verify_fn; |
1255 | integrity_set_tag_fn *set_tag_fn; | 1257 | integrity_set_tag_fn *set_tag_fn; |
1256 | integrity_get_tag_fn *get_tag_fn; | 1258 | integrity_get_tag_fn *get_tag_fn; |
1257 | 1259 | ||
1258 | unsigned short flags; | 1260 | unsigned short flags; |
1259 | unsigned short tuple_size; | 1261 | unsigned short tuple_size; |
1260 | unsigned short sector_size; | 1262 | unsigned short sector_size; |
1261 | unsigned short tag_size; | 1263 | unsigned short tag_size; |
1262 | 1264 | ||
1263 | const char *name; | 1265 | const char *name; |
1264 | 1266 | ||
1265 | struct kobject kobj; | 1267 | struct kobject kobj; |
1266 | }; | 1268 | }; |
1267 | 1269 | ||
1268 | extern bool blk_integrity_is_initialized(struct gendisk *); | 1270 | extern bool blk_integrity_is_initialized(struct gendisk *); |
1269 | extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); | 1271 | extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); |
1270 | extern void blk_integrity_unregister(struct gendisk *); | 1272 | extern void blk_integrity_unregister(struct gendisk *); |
1271 | extern int blk_integrity_compare(struct gendisk *, struct gendisk *); | 1273 | extern int blk_integrity_compare(struct gendisk *, struct gendisk *); |
1272 | extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, | 1274 | extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, |
1273 | struct scatterlist *); | 1275 | struct scatterlist *); |
1274 | extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); | 1276 | extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); |
1275 | extern int blk_integrity_merge_rq(struct request_queue *, struct request *, | 1277 | extern int blk_integrity_merge_rq(struct request_queue *, struct request *, |
1276 | struct request *); | 1278 | struct request *); |
1277 | extern int blk_integrity_merge_bio(struct request_queue *, struct request *, | 1279 | extern int blk_integrity_merge_bio(struct request_queue *, struct request *, |
1278 | struct bio *); | 1280 | struct bio *); |
1279 | 1281 | ||
1280 | static inline | 1282 | static inline |
1281 | struct blk_integrity *bdev_get_integrity(struct block_device *bdev) | 1283 | struct blk_integrity *bdev_get_integrity(struct block_device *bdev) |
1282 | { | 1284 | { |
1283 | return bdev->bd_disk->integrity; | 1285 | return bdev->bd_disk->integrity; |
1284 | } | 1286 | } |
1285 | 1287 | ||
1286 | static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) | 1288 | static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) |
1287 | { | 1289 | { |
1288 | return disk->integrity; | 1290 | return disk->integrity; |
1289 | } | 1291 | } |
1290 | 1292 | ||
1291 | static inline int blk_integrity_rq(struct request *rq) | 1293 | static inline int blk_integrity_rq(struct request *rq) |
1292 | { | 1294 | { |
1293 | if (rq->bio == NULL) | 1295 | if (rq->bio == NULL) |
1294 | return 0; | 1296 | return 0; |
1295 | 1297 | ||
1296 | return bio_integrity(rq->bio); | 1298 | return bio_integrity(rq->bio); |
1297 | } | 1299 | } |
1298 | 1300 | ||
1299 | static inline void blk_queue_max_integrity_segments(struct request_queue *q, | 1301 | static inline void blk_queue_max_integrity_segments(struct request_queue *q, |
1300 | unsigned int segs) | 1302 | unsigned int segs) |
1301 | { | 1303 | { |
1302 | q->limits.max_integrity_segments = segs; | 1304 | q->limits.max_integrity_segments = segs; |
1303 | } | 1305 | } |
1304 | 1306 | ||
1305 | static inline unsigned short | 1307 | static inline unsigned short |
1306 | queue_max_integrity_segments(struct request_queue *q) | 1308 | queue_max_integrity_segments(struct request_queue *q) |
1307 | { | 1309 | { |
1308 | return q->limits.max_integrity_segments; | 1310 | return q->limits.max_integrity_segments; |
1309 | } | 1311 | } |
1310 | 1312 | ||
1311 | #else /* CONFIG_BLK_DEV_INTEGRITY */ | 1313 | #else /* CONFIG_BLK_DEV_INTEGRITY */ |
1312 | 1314 | ||
1313 | struct bio; | 1315 | struct bio; |
1314 | struct block_device; | 1316 | struct block_device; |
1315 | struct gendisk; | 1317 | struct gendisk; |
1316 | struct blk_integrity; | 1318 | struct blk_integrity; |
1317 | 1319 | ||
1318 | static inline int blk_integrity_rq(struct request *rq) | 1320 | static inline int blk_integrity_rq(struct request *rq) |
1319 | { | 1321 | { |
1320 | return 0; | 1322 | return 0; |
1321 | } | 1323 | } |
1322 | static inline int blk_rq_count_integrity_sg(struct request_queue *q, | 1324 | static inline int blk_rq_count_integrity_sg(struct request_queue *q, |
1323 | struct bio *b) | 1325 | struct bio *b) |
1324 | { | 1326 | { |
1325 | return 0; | 1327 | return 0; |
1326 | } | 1328 | } |
1327 | static inline int blk_rq_map_integrity_sg(struct request_queue *q, | 1329 | static inline int blk_rq_map_integrity_sg(struct request_queue *q, |
1328 | struct bio *b, | 1330 | struct bio *b, |
1329 | struct scatterlist *s) | 1331 | struct scatterlist *s) |
1330 | { | 1332 | { |
1331 | return 0; | 1333 | return 0; |
1332 | } | 1334 | } |
1333 | static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) | 1335 | static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) |
1334 | { | 1336 | { |
1335 | return 0; | 1337 | return 0; |
1336 | } | 1338 | } |
1337 | static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) | 1339 | static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) |
1338 | { | 1340 | { |
1339 | return NULL; | 1341 | return NULL; |
1340 | } | 1342 | } |
1341 | static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) | 1343 | static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) |
1342 | { | 1344 | { |
1343 | return 0; | 1345 | return 0; |
1344 | } | 1346 | } |
1345 | static inline int blk_integrity_register(struct gendisk *d, | 1347 | static inline int blk_integrity_register(struct gendisk *d, |
1346 | struct blk_integrity *b) | 1348 | struct blk_integrity *b) |
1347 | { | 1349 | { |
1348 | return 0; | 1350 | return 0; |
1349 | } | 1351 | } |
1350 | static inline void blk_integrity_unregister(struct gendisk *d) | 1352 | static inline void blk_integrity_unregister(struct gendisk *d) |
1351 | { | 1353 | { |
1352 | } | 1354 | } |
1353 | static inline void blk_queue_max_integrity_segments(struct request_queue *q, | 1355 | static inline void blk_queue_max_integrity_segments(struct request_queue *q, |
1354 | unsigned int segs) | 1356 | unsigned int segs) |
1355 | { | 1357 | { |
1356 | } | 1358 | } |
1357 | static inline unsigned short queue_max_integrity_segments(struct request_queue *q) | 1359 | static inline unsigned short queue_max_integrity_segments(struct request_queue *q) |
1358 | { | 1360 | { |
1359 | return 0; | 1361 | return 0; |
1360 | } | 1362 | } |
1361 | static inline int blk_integrity_merge_rq(struct request_queue *rq, | 1363 | static inline int blk_integrity_merge_rq(struct request_queue *rq, |
1362 | struct request *r1, | 1364 | struct request *r1, |
1363 | struct request *r2) | 1365 | struct request *r2) |
1364 | { | 1366 | { |
1365 | return 0; | 1367 | return 0; |
1366 | } | 1368 | } |
1367 | static inline int blk_integrity_merge_bio(struct request_queue *rq, | 1369 | static inline int blk_integrity_merge_bio(struct request_queue *rq, |
1368 | struct request *r, | 1370 | struct request *r, |
1369 | struct bio *b) | 1371 | struct bio *b) |
1370 | { | 1372 | { |
1371 | return 0; | 1373 | return 0; |
1372 | } | 1374 | } |
1373 | static inline bool blk_integrity_is_initialized(struct gendisk *g) | 1375 | static inline bool blk_integrity_is_initialized(struct gendisk *g) |
1374 | { | 1376 | { |
1375 | return 0; | 1377 | return 0; |
1376 | } | 1378 | } |
1377 | 1379 | ||
1378 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ | 1380 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ |
1379 | 1381 | ||
1380 | struct block_device_operations { | 1382 | struct block_device_operations { |
1381 | int (*open) (struct block_device *, fmode_t); | 1383 | int (*open) (struct block_device *, fmode_t); |
1382 | int (*release) (struct gendisk *, fmode_t); | 1384 | int (*release) (struct gendisk *, fmode_t); |
1383 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 1385 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
1384 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 1386 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
1385 | int (*direct_access) (struct block_device *, sector_t, | 1387 | int (*direct_access) (struct block_device *, sector_t, |
1386 | void **, unsigned long *); | 1388 | void **, unsigned long *); |
1387 | unsigned int (*check_events) (struct gendisk *disk, | 1389 | unsigned int (*check_events) (struct gendisk *disk, |
1388 | unsigned int clearing); | 1390 | unsigned int clearing); |
1389 | /* ->media_changed() is DEPRECATED, use ->check_events() instead */ | 1391 | /* ->media_changed() is DEPRECATED, use ->check_events() instead */ |
1390 | int (*media_changed) (struct gendisk *); | 1392 | int (*media_changed) (struct gendisk *); |
1391 | void (*unlock_native_capacity) (struct gendisk *); | 1393 | void (*unlock_native_capacity) (struct gendisk *); |
1392 | int (*revalidate_disk) (struct gendisk *); | 1394 | int (*revalidate_disk) (struct gendisk *); |
1393 | int (*getgeo)(struct block_device *, struct hd_geometry *); | 1395 | int (*getgeo)(struct block_device *, struct hd_geometry *); |
1394 | /* this callback is with swap_lock and sometimes page table lock held */ | 1396 | /* this callback is with swap_lock and sometimes page table lock held */ |
1395 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | 1397 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); |
1396 | struct module *owner; | 1398 | struct module *owner; |
1397 | }; | 1399 | }; |
1398 | 1400 | ||
1399 | extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, | 1401 | extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, |
1400 | unsigned long); | 1402 | unsigned long); |
1401 | #else /* CONFIG_BLOCK */ | 1403 | #else /* CONFIG_BLOCK */ |
1402 | /* | 1404 | /* |
1403 | * stubs for when the block layer is configured out | 1405 | * stubs for when the block layer is configured out |
1404 | */ | 1406 | */ |
1405 | #define buffer_heads_over_limit 0 | 1407 | #define buffer_heads_over_limit 0 |
1406 | 1408 | ||
1407 | static inline long nr_blockdev_pages(void) | 1409 | static inline long nr_blockdev_pages(void) |
1408 | { | 1410 | { |
1409 | return 0; | 1411 | return 0; |
1410 | } | 1412 | } |
1411 | 1413 | ||
1412 | struct blk_plug { | 1414 | struct blk_plug { |
1413 | }; | 1415 | }; |
1414 | 1416 | ||
1415 | static inline void blk_start_plug(struct blk_plug *plug) | 1417 | static inline void blk_start_plug(struct blk_plug *plug) |
1416 | { | 1418 | { |
1417 | } | 1419 | } |
1418 | 1420 | ||
1419 | static inline void blk_finish_plug(struct blk_plug *plug) | 1421 | static inline void blk_finish_plug(struct blk_plug *plug) |
1420 | { | 1422 | { |
1421 | } | 1423 | } |
1422 | 1424 | ||
1423 | static inline void blk_flush_plug(struct task_struct *task) | 1425 | static inline void blk_flush_plug(struct task_struct *task) |
1424 | { | 1426 | { |
1425 | } | 1427 | } |
1426 | 1428 | ||
1427 | static inline void blk_schedule_flush_plug(struct task_struct *task) | 1429 | static inline void blk_schedule_flush_plug(struct task_struct *task) |
1428 | { | 1430 | { |
1429 | } | 1431 | } |
1430 | 1432 | ||
1431 | 1433 | ||
1432 | static inline bool blk_needs_flush_plug(struct task_struct *tsk) | 1434 | static inline bool blk_needs_flush_plug(struct task_struct *tsk) |
1433 | { | 1435 | { |
1434 | return false; | 1436 | return false; |
1435 | } | 1437 | } |
1436 | 1438 | ||
1437 | #endif /* CONFIG_BLOCK */ | 1439 | #endif /* CONFIG_BLOCK */ |
1438 | 1440 | ||
1439 | #endif | 1441 | #endif |
1440 | 1442 |