Blame view
block/blk-wbt.c
19.8 KB
3dcf60bcb block: add SPDX t... |
1 |
// SPDX-License-Identifier: GPL-2.0 |
e34cbd307 blk-wbt: add gene... |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
/* * buffered writeback throttling. loosely based on CoDel. We can't drop * packets for IO scheduling, so the logic is something like this: * * - Monitor latencies in a defined window of time. * - If the minimum latency in the above window exceeds some target, increment * scaling step and scale down queue depth by a factor of 2x. The monitoring * window is then shrunk to 100 / sqrt(scaling step + 1). * - For any window where we don't have solid data on what the latencies * look like, retain status quo. * - If latencies look good, decrement scaling step. * - If we're only doing writes, allow the scaling step to go negative. This * will temporarily boost write performance, snapping back to a stable * scaling step of 0 if reads show up or the heavy writers finish. Unlike * positive scaling steps where we shrink the monitoring window, a negative * scaling step retains the default step==0 window size. * * Copyright (C) 2016 Jens Axboe * */ #include <linux/kernel.h> #include <linux/blk_types.h> #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/swap.h> #include "blk-wbt.h" |
a79050434 blk-rq-qos: refac... |
29 |
#include "blk-rq-qos.h" |
e34cbd307 blk-wbt: add gene... |
30 31 32 |
#define CREATE_TRACE_POINTS #include <trace/events/wbt.h> |
a8a459417 block: pass struc... |
33 |
static inline void wbt_clear_state(struct request *rq) |
934031a12 block: move some ... |
34 |
{ |
544ccc8dc block: get rid of... |
35 |
rq->wbt_flags = 0; |
934031a12 block: move some ... |
36 |
} |
a8a459417 block: pass struc... |
37 |
static inline enum wbt_flags wbt_flags(struct request *rq) |
934031a12 block: move some ... |
38 |
{ |
544ccc8dc block: get rid of... |
39 |
return rq->wbt_flags; |
934031a12 block: move some ... |
40 |
} |
a8a459417 block: pass struc... |
41 |
static inline bool wbt_is_tracked(struct request *rq) |
934031a12 block: move some ... |
42 |
{ |
544ccc8dc block: get rid of... |
43 |
return rq->wbt_flags & WBT_TRACKED; |
934031a12 block: move some ... |
44 |
} |
a8a459417 block: pass struc... |
45 |
static inline bool wbt_is_read(struct request *rq) |
934031a12 block: move some ... |
46 |
{ |
544ccc8dc block: get rid of... |
47 |
return rq->wbt_flags & WBT_READ; |
934031a12 block: move some ... |
48 |
} |
e34cbd307 blk-wbt: add gene... |
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
enum { /* * Default setting, we'll scale up (to 75% of QD max) or down (min 1) * from here depending on device stats */ RWB_DEF_DEPTH = 16, /* * 100msec window */ RWB_WINDOW_NSEC = 100 * 1000 * 1000ULL, /* * Disregard stats, if we don't meet this minimum */ RWB_MIN_WRITE_SAMPLES = 3, /* * If we have this number of consecutive windows with not enough * information to scale up or down, scale up. */ RWB_UNKNOWN_BUMP = 5, }; static inline bool rwb_enabled(struct rq_wb *rwb) { return rwb && rwb->wb_normal != 0; } |
e34cbd307 blk-wbt: add gene... |
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) { if (rwb_enabled(rwb)) { const unsigned long cur = jiffies; if (cur != *var) *var = cur; } } /* * If a task was rate throttled in balance_dirty_pages() within the last * second or so, use that to indicate a higher cleaning rate. */ static bool wb_recent_wait(struct rq_wb *rwb) { |
a79050434 blk-rq-qos: refac... |
93 |
struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb; |
e34cbd307 blk-wbt: add gene... |
94 95 96 |
return time_before(jiffies, wb->dirty_sleep + HZ); } |
8bea60901 blk-wbt: pass in ... |
97 98 |
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, enum wbt_flags wb_acct) |
e34cbd307 blk-wbt: add gene... |
99 |
{ |
8bea60901 blk-wbt: pass in ... |
100 101 |
if (wb_acct & WBT_KSWAPD) return &rwb->rq_wait[WBT_RWQ_KSWAPD]; |
782f56977 blk-wbt: throttle... |
102 103 |
else if (wb_acct & WBT_DISCARD) return &rwb->rq_wait[WBT_RWQ_DISCARD]; |
8bea60901 blk-wbt: pass in ... |
104 105 |
return &rwb->rq_wait[WBT_RWQ_BG]; |
e34cbd307 blk-wbt: add gene... |
106 107 108 109 110 111 112 113 |
} static void rwb_wake_all(struct rq_wb *rwb) { int i; for (i = 0; i < WBT_NUM_RWQ; i++) { struct rq_wait *rqw = &rwb->rq_wait[i]; |
b78820937 blk-wbt: use wq_h... |
114 |
if (wq_has_sleeper(&rqw->wait)) |
e34cbd307 blk-wbt: add gene... |
115 116 117 |
wake_up_all(&rqw->wait); } } |
061a54275 blk-wbt: abstract... |
118 119 |
static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, enum wbt_flags wb_acct) |
e34cbd307 blk-wbt: add gene... |
120 |
{ |
e34cbd307 blk-wbt: add gene... |
121 |
int inflight, limit; |
e34cbd307 blk-wbt: add gene... |
122 123 124 125 126 127 128 129 130 131 132 133 |
inflight = atomic_dec_return(&rqw->inflight); /* * wbt got disabled with IO in flight. Wake up any potential * waiters, we don't have to do more than that. */ if (unlikely(!rwb_enabled(rwb))) { rwb_wake_all(rwb); return; } /* |
782f56977 blk-wbt: throttle... |
134 135 136 |
* For discards, our limit is always the background. For writes, if * the device does write back caching, drop further down before we * wake people up. |
e34cbd307 blk-wbt: add gene... |
137 |
*/ |
782f56977 blk-wbt: throttle... |
138 139 140 |
if (wb_acct & WBT_DISCARD) limit = rwb->wb_background; else if (rwb->wc && !wb_recent_wait(rwb)) |
e34cbd307 blk-wbt: add gene... |
141 142 143 144 145 146 147 148 149 |
limit = 0; else limit = rwb->wb_normal; /* * Don't wake anyone up if we are above the normal limit. */ if (inflight && inflight >= limit) return; |
b78820937 blk-wbt: use wq_h... |
150 |
if (wq_has_sleeper(&rqw->wait)) { |
e34cbd307 blk-wbt: add gene... |
151 152 153 |
int diff = limit - inflight; if (!inflight || diff >= rwb->wb_background / 2) |
38cfb5a45 blk-wbt: improve ... |
154 |
wake_up_all(&rqw->wait); |
e34cbd307 blk-wbt: add gene... |
155 156 |
} } |
061a54275 blk-wbt: abstract... |
157 158 159 160 161 162 163 164 165 166 167 |
static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) { struct rq_wb *rwb = RQWB(rqos); struct rq_wait *rqw; if (!(wb_acct & WBT_TRACKED)) return; rqw = get_rq_wait(rwb, wb_acct); wbt_rqw_done(rwb, rqw, wb_acct); } |
e34cbd307 blk-wbt: add gene... |
168 169 170 171 |
/* * Called on completion of a request. Note that it's also called when * a request is merged, when the request gets freed. */ |
a79050434 blk-rq-qos: refac... |
172 |
static void wbt_done(struct rq_qos *rqos, struct request *rq) |
e34cbd307 blk-wbt: add gene... |
173 |
{ |
a79050434 blk-rq-qos: refac... |
174 |
struct rq_wb *rwb = RQWB(rqos); |
e34cbd307 blk-wbt: add gene... |
175 |
|
a8a459417 block: pass struc... |
176 177 |
if (!wbt_is_tracked(rq)) { if (rwb->sync_cookie == rq) { |
e34cbd307 blk-wbt: add gene... |
178 179 180 |
rwb->sync_issue = 0; rwb->sync_cookie = NULL; } |
a8a459417 block: pass struc... |
181 |
if (wbt_is_read(rq)) |
e34cbd307 blk-wbt: add gene... |
182 |
wb_timestamp(rwb, &rwb->last_comp); |
e34cbd307 blk-wbt: add gene... |
183 |
} else { |
a8a459417 block: pass struc... |
184 |
WARN_ON_ONCE(rq == rwb->sync_cookie); |
a79050434 blk-rq-qos: refac... |
185 |
__wbt_done(rqos, wbt_flags(rq)); |
e34cbd307 blk-wbt: add gene... |
186 |
} |
a8a459417 block: pass struc... |
187 |
wbt_clear_state(rq); |
e34cbd307 blk-wbt: add gene... |
188 |
} |
4121d385f blk-wbt: fix old-... |
189 |
static inline bool stat_sample_valid(struct blk_rq_stat *stat) |
e34cbd307 blk-wbt: add gene... |
190 191 192 193 194 195 196 |
{ /* * We need at least one read sample, and a minimum of * RWB_MIN_WRITE_SAMPLES. We require some write samples to know * that it's writes impacting us, and not just some sole read on * a device that is in a lower power state. */ |
fa2e39cb9 blk-stat: use REA... |
197 198 |
return (stat[READ].nr_samples >= 1 && stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES); |
e34cbd307 blk-wbt: add gene... |
199 200 201 202 |
} static u64 rwb_sync_issue_lat(struct rq_wb *rwb) { |
6aa7de059 locking/atomics: ... |
203 |
u64 now, issue = READ_ONCE(rwb->sync_issue); |
e34cbd307 blk-wbt: add gene... |
204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
if (!issue || !rwb->sync_cookie) return 0; now = ktime_to_ns(ktime_get()); return now - issue; } enum { LAT_OK = 1, LAT_UNKNOWN, LAT_UNKNOWN_WRITES, LAT_EXCEEDED, }; |
34dbad5d2 blk-stat: convert... |
218 |
static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) |
e34cbd307 blk-wbt: add gene... |
219 |
{ |
a79050434 blk-rq-qos: refac... |
220 221 |
struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; struct rq_depth *rqd = &rwb->rq_depth; |
e34cbd307 blk-wbt: add gene... |
222 223 224 225 226 227 228 229 230 231 232 233 234 |
u64 thislat; /* * If our stored sync issue exceeds the window size, or it * exceeds our min target AND we haven't logged any entries, * flag the latency as exceeded. wbt works off completion latencies, * but for a flooded device, a single sync IO can take a long time * to complete after being issued. If this time exceeds our * monitoring window AND we didn't see any other completions in that * window, then count that sync IO as a violation of the latency. */ thislat = rwb_sync_issue_lat(rwb); if (thislat > rwb->cur_win_nsec || |
fa2e39cb9 blk-stat: use REA... |
235 |
(thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) { |
d8a0cbfd7 blk-wbt: store qu... |
236 |
trace_wbt_lat(bdi, thislat); |
e34cbd307 blk-wbt: add gene... |
237 238 239 240 241 242 243 244 245 246 247 248 249 |
return LAT_EXCEEDED; } /* * No read/write mix, if stat isn't valid */ if (!stat_sample_valid(stat)) { /* * If we had writes in this stat window and the window is * current, we're only doing writes. If a task recently * waited or still has writes in flights, consider us doing * just writes as well. */ |
34dbad5d2 blk-stat: convert... |
250 251 |
if (stat[WRITE].nr_samples || wb_recent_wait(rwb) || wbt_inflight(rwb)) |
e34cbd307 blk-wbt: add gene... |
252 253 254 255 256 257 258 |
return LAT_UNKNOWN_WRITES; return LAT_UNKNOWN; } /* * If the 'min' latency exceeds our target, step down. */ |
fa2e39cb9 blk-stat: use REA... |
259 260 |
if (stat[READ].min > rwb->min_lat_nsec) { trace_wbt_lat(bdi, stat[READ].min); |
d8a0cbfd7 blk-wbt: store qu... |
261 |
trace_wbt_stat(bdi, stat); |
e34cbd307 blk-wbt: add gene... |
262 263 |
return LAT_EXCEEDED; } |
a79050434 blk-rq-qos: refac... |
264 |
if (rqd->scale_step) |
d8a0cbfd7 blk-wbt: store qu... |
265 |
trace_wbt_stat(bdi, stat); |
e34cbd307 blk-wbt: add gene... |
266 267 268 |
return LAT_OK; } |
e34cbd307 blk-wbt: add gene... |
269 270 |
static void rwb_trace_step(struct rq_wb *rwb, const char *msg) { |
a79050434 blk-rq-qos: refac... |
271 272 |
struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info; struct rq_depth *rqd = &rwb->rq_depth; |
d8a0cbfd7 blk-wbt: store qu... |
273 |
|
a79050434 blk-rq-qos: refac... |
274 275 |
trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, rwb->wb_background, rwb->wb_normal, rqd->max_depth); |
e34cbd307 blk-wbt: add gene... |
276 |
} |
a79050434 blk-rq-qos: refac... |
277 |
static void calc_wb_limits(struct rq_wb *rwb) |
e34cbd307 blk-wbt: add gene... |
278 |
{ |
a79050434 blk-rq-qos: refac... |
279 280 281 282 283 284 285 286 287 288 |
if (rwb->min_lat_nsec == 0) { rwb->wb_normal = rwb->wb_background = 0; } else if (rwb->rq_depth.max_depth <= 2) { rwb->wb_normal = rwb->rq_depth.max_depth; rwb->wb_background = 1; } else { rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2; rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4; } } |
e34cbd307 blk-wbt: add gene... |
289 |
|
a79050434 blk-rq-qos: refac... |
290 291 |
static void scale_up(struct rq_wb *rwb) { |
b84477d3e blk-wbt: fix perf... |
292 293 |
if (!rq_depth_scale_up(&rwb->rq_depth)) return; |
a79050434 blk-rq-qos: refac... |
294 |
calc_wb_limits(rwb); |
e34cbd307 blk-wbt: add gene... |
295 |
rwb->unknown_cnt = 0; |
5e65a2034 blk-wbt: wake up ... |
296 |
rwb_wake_all(rwb); |
3a89c25d9 blk-wbt: Use trac... |
297 |
rwb_trace_step(rwb, tracepoint_string("scale up")); |
e34cbd307 blk-wbt: add gene... |
298 |
} |
e34cbd307 blk-wbt: add gene... |
299 300 |
static void scale_down(struct rq_wb *rwb, bool hard_throttle) { |
b84477d3e blk-wbt: fix perf... |
301 302 |
if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle)) return; |
e34cbd307 blk-wbt: add gene... |
303 |
calc_wb_limits(rwb); |
a79050434 blk-rq-qos: refac... |
304 |
rwb->unknown_cnt = 0; |
3a89c25d9 blk-wbt: Use trac... |
305 |
rwb_trace_step(rwb, tracepoint_string("scale down")); |
e34cbd307 blk-wbt: add gene... |
306 307 308 309 |
} static void rwb_arm_timer(struct rq_wb *rwb) { |
a79050434 blk-rq-qos: refac... |
310 311 312 |
struct rq_depth *rqd = &rwb->rq_depth; if (rqd->scale_step > 0) { |
e34cbd307 blk-wbt: add gene... |
313 314 315 316 317 318 319 |
/* * We should speed this up, using some variant of a fast * integer inverse square root calculation. Since we only do * this for every window expiration, it's not a huge deal, * though. */ rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, |
a79050434 blk-rq-qos: refac... |
320 |
int_sqrt((rqd->scale_step + 1) << 8)); |
e34cbd307 blk-wbt: add gene... |
321 322 323 324 325 326 327 |
} else { /* * For step < 0, we don't want to increase/decrease the * window size. */ rwb->cur_win_nsec = rwb->win_nsec; } |
34dbad5d2 blk-stat: convert... |
328 |
blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec); |
e34cbd307 blk-wbt: add gene... |
329 |
} |
34dbad5d2 blk-stat: convert... |
330 |
static void wb_timer_fn(struct blk_stat_callback *cb) |
e34cbd307 blk-wbt: add gene... |
331 |
{ |
34dbad5d2 blk-stat: convert... |
332 |
struct rq_wb *rwb = cb->data; |
a79050434 blk-rq-qos: refac... |
333 |
struct rq_depth *rqd = &rwb->rq_depth; |
e34cbd307 blk-wbt: add gene... |
334 335 |
unsigned int inflight = wbt_inflight(rwb); int status; |
34dbad5d2 blk-stat: convert... |
336 |
status = latency_exceeded(rwb, cb->stat); |
e34cbd307 blk-wbt: add gene... |
337 |
|
a79050434 blk-rq-qos: refac... |
338 |
trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step, |
d8a0cbfd7 blk-wbt: store qu... |
339 |
inflight); |
e34cbd307 blk-wbt: add gene... |
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 |
/* * If we exceeded the latency target, step down. If we did not, * step one level up. If we don't know enough to say either exceeded * or ok, then don't do anything. */ switch (status) { case LAT_EXCEEDED: scale_down(rwb, true); break; case LAT_OK: scale_up(rwb); break; case LAT_UNKNOWN_WRITES: /* * We started a the center step, but don't have a valid * read/write sample, but we do have writes going on. * Allow step to go negative, to increase write perf. */ scale_up(rwb); break; case LAT_UNKNOWN: if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP) break; /* * We get here when previously scaled reduced depth, and we * currently don't have a valid read/write sample. For that * case, slowly return to center state (step == 0). */ |
a79050434 blk-rq-qos: refac... |
369 |
if (rqd->scale_step > 0) |
e34cbd307 blk-wbt: add gene... |
370 |
scale_up(rwb); |
a79050434 blk-rq-qos: refac... |
371 |
else if (rqd->scale_step < 0) |
e34cbd307 blk-wbt: add gene... |
372 373 374 375 376 377 378 379 380 |
scale_down(rwb, false); break; default: break; } /* * Re-arm timer, if we have IO in flight */ |
a79050434 blk-rq-qos: refac... |
381 |
if (rqd->scale_step || inflight) |
e34cbd307 blk-wbt: add gene... |
382 383 |
rwb_arm_timer(rwb); } |
4d89e1d11 blk-wbt: rename _... |
384 |
static void wbt_update_limits(struct rq_wb *rwb) |
e34cbd307 blk-wbt: add gene... |
385 |
{ |
a79050434 blk-rq-qos: refac... |
386 387 388 389 390 391 |
struct rq_depth *rqd = &rwb->rq_depth; rqd->scale_step = 0; rqd->scaled_max = false; rq_depth_calc_max_depth(rqd); |
e34cbd307 blk-wbt: add gene... |
392 393 394 395 |
calc_wb_limits(rwb); rwb_wake_all(rwb); } |
a79050434 blk-rq-qos: refac... |
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 |
u64 wbt_get_min_lat(struct request_queue *q) { struct rq_qos *rqos = wbt_rq_qos(q); if (!rqos) return 0; return RQWB(rqos)->min_lat_nsec; } void wbt_set_min_lat(struct request_queue *q, u64 val) { struct rq_qos *rqos = wbt_rq_qos(q); if (!rqos) return; RQWB(rqos)->min_lat_nsec = val; RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; |
4d89e1d11 blk-wbt: rename _... |
411 |
wbt_update_limits(RQWB(rqos)); |
a79050434 blk-rq-qos: refac... |
412 |
} |
e34cbd307 blk-wbt: add gene... |
413 414 415 416 417 418 419 420 421 422 423 424 425 |
static bool close_io(struct rq_wb *rwb) { const unsigned long now = jiffies; return time_before(now, rwb->last_issue + HZ / 10) || time_before(now, rwb->last_comp + HZ / 10); } #define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) { unsigned int limit; |
ffa358dca blk-wbt: move dis... |
426 427 428 429 430 431 |
/* * If we got disabled, just return UINT_MAX. This ensures that * we'll properly inc a new IO, and dec+wakeup at the end. */ if (!rwb_enabled(rwb)) return UINT_MAX; |
782f56977 blk-wbt: throttle... |
432 433 |
if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD) return rwb->wb_background; |
e34cbd307 blk-wbt: add gene... |
434 435 |
/* * At this point we know it's a buffered write. If this is |
3dfbdc44d blk-wbt: fix comm... |
436 |
* kswapd trying to free memory, or REQ_SYNC is set, then |
e34cbd307 blk-wbt: add gene... |
437 438 439 440 441 442 |
* it's WB_SYNC_ALL writeback, and we'll use the max limit for * that. If the write is marked as a background write, then use * the idle limit, or go to normal if we haven't had competing * IO for a bit. */ if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) |
a79050434 blk-rq-qos: refac... |
443 |
limit = rwb->rq_depth.max_depth; |
e34cbd307 blk-wbt: add gene... |
444 445 446 447 448 449 450 451 452 453 454 |
else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { /* * If less than 100ms since we completed unrelated IO, * limit us to half the depth for background writeback. */ limit = rwb->wb_background; } else limit = rwb->wb_normal; return limit; } |
38cfb5a45 blk-wbt: improve ... |
455 |
struct wbt_wait_data { |
38cfb5a45 blk-wbt: improve ... |
456 |
struct rq_wb *rwb; |
b6c7b58f5 block: convert wb... |
457 |
enum wbt_flags wb_acct; |
38cfb5a45 blk-wbt: improve ... |
458 |
unsigned long rw; |
38cfb5a45 blk-wbt: improve ... |
459 |
}; |
b6c7b58f5 block: convert wb... |
460 |
static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data) |
38cfb5a45 blk-wbt: improve ... |
461 |
{ |
b6c7b58f5 block: convert wb... |
462 463 464 |
struct wbt_wait_data *data = private_data; return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw)); } |
38cfb5a45 blk-wbt: improve ... |
465 |
|
b6c7b58f5 block: convert wb... |
466 467 468 469 |
static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) { struct wbt_wait_data *data = private_data; wbt_rqw_done(data->rwb, rqw, data->wb_acct); |
38cfb5a45 blk-wbt: improve ... |
470 |
} |
e34cbd307 blk-wbt: add gene... |
471 472 473 474 |
/* * Block if we will exceed our limit, or if we are currently waiting for * the timer to kick off queuing again. */ |
8bea60901 blk-wbt: pass in ... |
475 |
static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, |
d53375608 block: remove the... |
476 |
unsigned long rw) |
e34cbd307 blk-wbt: add gene... |
477 |
{ |
8bea60901 blk-wbt: pass in ... |
478 |
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); |
38cfb5a45 blk-wbt: improve ... |
479 |
struct wbt_wait_data data = { |
38cfb5a45 blk-wbt: improve ... |
480 |
.rwb = rwb, |
b6c7b58f5 block: convert wb... |
481 |
.wb_acct = wb_acct, |
38cfb5a45 blk-wbt: improve ... |
482 483 |
.rw = rw, }; |
e34cbd307 blk-wbt: add gene... |
484 |
|
b6c7b58f5 block: convert wb... |
485 |
rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb); |
e34cbd307 blk-wbt: add gene... |
486 487 488 489 |
} static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) { |
782f56977 blk-wbt: throttle... |
490 491 492 493 494 495 496 497 |
switch (bio_op(bio)) { case REQ_OP_WRITE: /* * Don't throttle WRITE_ODIRECT */ if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == (REQ_SYNC | REQ_IDLE)) return false; |
df561f668 treewide: Use fal... |
498 |
fallthrough; |
782f56977 blk-wbt: throttle... |
499 500 501 |
case REQ_OP_DISCARD: return true; default: |
e34cbd307 blk-wbt: add gene... |
502 |
return false; |
782f56977 blk-wbt: throttle... |
503 |
} |
e34cbd307 blk-wbt: add gene... |
504 |
} |
c1c80384c block: remove ext... |
505 506 507 |
static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) { enum wbt_flags flags = 0; |
c125311d9 blk-wbt: don't ma... |
508 509 |
if (!rwb_enabled(rwb)) return 0; |
c1c80384c block: remove ext... |
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 |
if (bio_op(bio) == REQ_OP_READ) { flags = WBT_READ; } else if (wbt_should_throttle(rwb, bio)) { if (current_is_kswapd()) flags |= WBT_KSWAPD; if (bio_op(bio) == REQ_OP_DISCARD) flags |= WBT_DISCARD; flags |= WBT_TRACKED; } return flags; } static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio) { struct rq_wb *rwb = RQWB(rqos); enum wbt_flags flags = bio_to_wbt_flags(rwb, bio); __wbt_done(rqos, flags); } |
e34cbd307 blk-wbt: add gene... |
528 529 530 531 532 533 |
/* * Returns true if the IO request should be accounted, false if not. * May sleep, if we have exceeded the writeback limits. Caller can pass * in an irq held spinlock, if it holds one when calling this function. * If we do sleep, we'll release and re-grab it. */ |
d53375608 block: remove the... |
534 |
static void wbt_wait(struct rq_qos *rqos, struct bio *bio) |
e34cbd307 blk-wbt: add gene... |
535 |
{ |
a79050434 blk-rq-qos: refac... |
536 |
struct rq_wb *rwb = RQWB(rqos); |
c1c80384c block: remove ext... |
537 |
enum wbt_flags flags; |
e34cbd307 blk-wbt: add gene... |
538 |
|
c1c80384c block: remove ext... |
539 |
flags = bio_to_wbt_flags(rwb, bio); |
df60f6e83 blk-wbt: fix IO h... |
540 |
if (!(flags & WBT_TRACKED)) { |
c1c80384c block: remove ext... |
541 |
if (flags & WBT_READ) |
e34cbd307 blk-wbt: add gene... |
542 |
wb_timestamp(rwb, &rwb->last_issue); |
c1c80384c block: remove ext... |
543 |
return; |
e34cbd307 blk-wbt: add gene... |
544 |
} |
d53375608 block: remove the... |
545 |
__wbt_wait(rwb, flags, bio->bi_opf); |
e34cbd307 blk-wbt: add gene... |
546 |
|
34dbad5d2 blk-stat: convert... |
547 |
if (!blk_stat_is_active(rwb->cb)) |
e34cbd307 blk-wbt: add gene... |
548 |
rwb_arm_timer(rwb); |
c1c80384c block: remove ext... |
549 |
} |
e34cbd307 blk-wbt: add gene... |
550 |
|
c1c80384c block: remove ext... |
551 552 553 554 |
static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) { struct rq_wb *rwb = RQWB(rqos); rq->wbt_flags |= bio_to_wbt_flags(rwb, bio); |
e34cbd307 blk-wbt: add gene... |
555 |
} |
c83f536a8 blk-wbt: Declare ... |
556 |
static void wbt_issue(struct rq_qos *rqos, struct request *rq) |
e34cbd307 blk-wbt: add gene... |
557 |
{ |
a79050434 blk-rq-qos: refac... |
558 |
struct rq_wb *rwb = RQWB(rqos); |
e34cbd307 blk-wbt: add gene... |
559 560 561 562 |
if (!rwb_enabled(rwb)) return; /* |
a8a459417 block: pass struc... |
563 564 565 566 567 |
* Track sync issue, in case it takes a long time to complete. Allows us * to react quicker, if a sync IO takes a long time to complete. Note * that this is just a hint. The request can go away when it completes, * so it's important we never dereference it. We only use the address to * compare with, which is why we store the sync_issue time locally. |
e34cbd307 blk-wbt: add gene... |
568 |
*/ |
a8a459417 block: pass struc... |
569 570 |
if (wbt_is_read(rq) && !rwb->sync_issue) { rwb->sync_cookie = rq; |
544ccc8dc block: get rid of... |
571 |
rwb->sync_issue = rq->io_start_time_ns; |
e34cbd307 blk-wbt: add gene... |
572 573 |
} } |
c83f536a8 blk-wbt: Declare ... |
574 |
static void wbt_requeue(struct rq_qos *rqos, struct request *rq) |
e34cbd307 blk-wbt: add gene... |
575 |
{ |
a79050434 blk-rq-qos: refac... |
576 |
struct rq_wb *rwb = RQWB(rqos); |
e34cbd307 blk-wbt: add gene... |
577 578 |
if (!rwb_enabled(rwb)) return; |
a8a459417 block: pass struc... |
579 |
if (rq == rwb->sync_cookie) { |
e34cbd307 blk-wbt: add gene... |
580 581 582 583 |
rwb->sync_issue = 0; rwb->sync_cookie = NULL; } } |
a79050434 blk-rq-qos: refac... |
584 |
void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) |
e34cbd307 blk-wbt: add gene... |
585 |
{ |
a79050434 blk-rq-qos: refac... |
586 587 588 |
struct rq_qos *rqos = wbt_rq_qos(q); if (rqos) RQWB(rqos)->wc = write_cache_on; |
e34cbd307 blk-wbt: add gene... |
589 |
} |
e34cbd307 blk-wbt: add gene... |
590 |
|
8330cdb0f block: Make write... |
591 592 593 594 595 |
/* * Enable wbt if defaults are configured that way */ void wbt_enable_default(struct request_queue *q) { |
a79050434 blk-rq-qos: refac... |
596 |
struct rq_qos *rqos = wbt_rq_qos(q); |
8330cdb0f block: Make write... |
597 |
/* Throttling already enabled? */ |
a79050434 blk-rq-qos: refac... |
598 |
if (rqos) |
8330cdb0f block: Make write... |
599 600 601 |
return; /* Queue not registered? Maybe shutting down... */ |
58c898ba3 block: add helper... |
602 |
if (!blk_queue_registered(q)) |
8330cdb0f block: Make write... |
603 |
return; |
344e9ffcb block: add queue_... |
604 |
if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ)) |
8330cdb0f block: Make write... |
605 606 607 |
wbt_init(q); } EXPORT_SYMBOL_GPL(wbt_enable_default); |
80e091d10 blk-wbt: allow re... |
608 609 610 611 612 613 614 615 616 617 618 |
u64 wbt_default_latency_nsec(struct request_queue *q) { /* * We default to 2msec for non-rotational storage, and 75msec * for rotational storage. */ if (blk_queue_nonrot(q)) return 2000000ULL; else return 75000000ULL; } |
99c749a4c blk-stat: kill bl... |
619 620 |
static int wbt_data_dir(const struct request *rq) { |
5235553d8 blk-wbt: account ... |
621 622 623 624 |
const int op = req_op(rq); if (op == REQ_OP_READ) return READ; |
825843b0a blk-wbt: account ... |
625 |
else if (op_is_write(op)) |
5235553d8 blk-wbt: account ... |
626 627 628 629 |
return WRITE; /* don't account */ return -1; |
99c749a4c blk-stat: kill bl... |
630 |
} |
9677a3e01 block/rq_qos: imp... |
631 632 633 |
static void wbt_queue_depth_changed(struct rq_qos *rqos) { RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q); |
4d89e1d11 blk-wbt: rename _... |
634 |
wbt_update_limits(RQWB(rqos)); |
9677a3e01 block/rq_qos: imp... |
635 |
} |
a79050434 blk-rq-qos: refac... |
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 |
static void wbt_exit(struct rq_qos *rqos) { struct rq_wb *rwb = RQWB(rqos); struct request_queue *q = rqos->q; blk_stat_remove_callback(q, rwb->cb); blk_stat_free_callback(rwb->cb); kfree(rwb); } /* * Disable wbt, if enabled by default. */ void wbt_disable_default(struct request_queue *q) { struct rq_qos *rqos = wbt_rq_qos(q); struct rq_wb *rwb; if (!rqos) return; rwb = RQWB(rqos); |
544fbd16a block: deactivate... |
656 657 |
if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { blk_stat_deactivate(rwb->cb); |
a79050434 blk-rq-qos: refac... |
658 |
rwb->wb_normal = 0; |
544fbd16a block: deactivate... |
659 |
} |
a79050434 blk-rq-qos: refac... |
660 |
} |
e815f404a block: add wbt_di... |
661 |
EXPORT_SYMBOL_GPL(wbt_disable_default); |
a79050434 blk-rq-qos: refac... |
662 |
|
d19afebca blk-wbt: export i... |
663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 |
#ifdef CONFIG_BLK_DEBUG_FS static int wbt_curr_win_nsec_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%llu ", rwb->cur_win_nsec); return 0; } static int wbt_enabled_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%d ", rwb->enable_state); return 0; } static int wbt_id_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; seq_printf(m, "%u ", rqos->id); return 0; } static int wbt_inflight_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); int i; for (i = 0; i < WBT_NUM_RWQ; i++) seq_printf(m, "%d: inflight %d ", i, atomic_read(&rwb->rq_wait[i].inflight)); return 0; } static int wbt_min_lat_nsec_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%lu ", rwb->min_lat_nsec); return 0; } static int wbt_unknown_cnt_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%u ", rwb->unknown_cnt); return 0; } static int wbt_normal_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%u ", rwb->wb_normal); return 0; } static int wbt_background_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%u ", rwb->wb_background); return 0; } static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = { {"curr_win_nsec", 0400, wbt_curr_win_nsec_show}, {"enabled", 0400, wbt_enabled_show}, {"id", 0400, wbt_id_show}, {"inflight", 0400, wbt_inflight_show}, {"min_lat_nsec", 0400, wbt_min_lat_nsec_show}, {"unknown_cnt", 0400, wbt_unknown_cnt_show}, {"wb_normal", 0400, wbt_normal_show}, {"wb_background", 0400, wbt_background_show}, {}, }; #endif |
a79050434 blk-rq-qos: refac... |
758 759 760 |
static struct rq_qos_ops wbt_rqos_ops = { .throttle = wbt_wait, .issue = wbt_issue, |
c1c80384c block: remove ext... |
761 |
.track = wbt_track, |
a79050434 blk-rq-qos: refac... |
762 763 |
.requeue = wbt_requeue, .done = wbt_done, |
c1c80384c block: remove ext... |
764 |
.cleanup = wbt_cleanup, |
9677a3e01 block/rq_qos: imp... |
765 |
.queue_depth_changed = wbt_queue_depth_changed, |
a79050434 blk-rq-qos: refac... |
766 |
.exit = wbt_exit, |
d19afebca blk-wbt: export i... |
767 768 769 |
#ifdef CONFIG_BLK_DEBUG_FS .debugfs_attrs = wbt_debugfs_attrs, #endif |
a79050434 blk-rq-qos: refac... |
770 |
}; |
8054b89f8 blk-wbt: remove s... |
771 |
int wbt_init(struct request_queue *q) |
e34cbd307 blk-wbt: add gene... |
772 773 774 |
{ struct rq_wb *rwb; int i; |
e34cbd307 blk-wbt: add gene... |
775 776 777 |
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); if (!rwb) return -ENOMEM; |
99c749a4c blk-stat: kill bl... |
778 |
rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb); |
34dbad5d2 blk-stat: convert... |
779 780 781 782 |
if (!rwb->cb) { kfree(rwb); return -ENOMEM; } |
a79050434 blk-rq-qos: refac... |
783 784 |
for (i = 0; i < WBT_NUM_RWQ; i++) rq_wait_init(&rwb->rq_wait[i]); |
e34cbd307 blk-wbt: add gene... |
785 |
|
a79050434 blk-rq-qos: refac... |
786 787 788 |
rwb->rqos.id = RQ_QOS_WBT; rwb->rqos.ops = &wbt_rqos_ops; rwb->rqos.q = q; |
e34cbd307 blk-wbt: add gene... |
789 |
rwb->last_comp = rwb->last_issue = jiffies; |
e34cbd307 blk-wbt: add gene... |
790 |
rwb->win_nsec = RWB_WINDOW_NSEC; |
d62118b6d blk-wbt: allow wb... |
791 |
rwb->enable_state = WBT_STATE_ON_DEFAULT; |
a79050434 blk-rq-qos: refac... |
792 793 |
rwb->wc = 1; rwb->rq_depth.default_depth = RWB_DEF_DEPTH; |
4d89e1d11 blk-wbt: rename _... |
794 |
wbt_update_limits(rwb); |
e34cbd307 blk-wbt: add gene... |
795 796 |
/* |
34dbad5d2 blk-stat: convert... |
797 |
* Assign rwb and add the stats callback. |
e34cbd307 blk-wbt: add gene... |
798 |
*/ |
a79050434 blk-rq-qos: refac... |
799 |
rq_qos_add(q, &rwb->rqos); |
34dbad5d2 blk-stat: convert... |
800 |
blk_stat_add_callback(q, rwb->cb); |
e34cbd307 blk-wbt: add gene... |
801 |
|
80e091d10 blk-wbt: allow re... |
802 |
rwb->min_lat_nsec = wbt_default_latency_nsec(q); |
e34cbd307 blk-wbt: add gene... |
803 |
|
9677a3e01 block/rq_qos: imp... |
804 |
wbt_queue_depth_changed(&rwb->rqos); |
a79050434 blk-rq-qos: refac... |
805 |
wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); |
e34cbd307 blk-wbt: add gene... |
806 807 808 |
return 0; } |