Blame view

block/blk-wbt.c 19.8 KB
3dcf60bcb   Christoph Hellwig   block: add SPDX t...
1
  // SPDX-License-Identifier: GPL-2.0
e34cbd307   Jens Axboe   blk-wbt: add gene...
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
  /*
   * buffered writeback throttling. loosely based on CoDel. We can't drop
   * packets for IO scheduling, so the logic is something like this:
   *
   * - Monitor latencies in a defined window of time.
   * - If the minimum latency in the above window exceeds some target, increment
   *   scaling step and scale down queue depth by a factor of 2x. The monitoring
   *   window is then shrunk to 100 / sqrt(scaling step + 1).
   * - For any window where we don't have solid data on what the latencies
   *   look like, retain status quo.
   * - If latencies look good, decrement scaling step.
   * - If we're only doing writes, allow the scaling step to go negative. This
   *   will temporarily boost write performance, snapping back to a stable
   *   scaling step of 0 if reads show up or the heavy writers finish. Unlike
   *   positive scaling steps where we shrink the monitoring window, a negative
   *   scaling step retains the default step==0 window size.
   *
   * Copyright (C) 2016 Jens Axboe
   *
   */
  #include <linux/kernel.h>
  #include <linux/blk_types.h>
  #include <linux/slab.h>
  #include <linux/backing-dev.h>
  #include <linux/swap.h>
  
  #include "blk-wbt.h"
a79050434   Josef Bacik   blk-rq-qos: refac...
29
  #include "blk-rq-qos.h"
e34cbd307   Jens Axboe   blk-wbt: add gene...
30
31
32
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/wbt.h>
a8a459417   Omar Sandoval   block: pass struc...
33
  static inline void wbt_clear_state(struct request *rq)
934031a12   Omar Sandoval   block: move some ...
34
  {
544ccc8dc   Omar Sandoval   block: get rid of...
35
  	rq->wbt_flags = 0;
934031a12   Omar Sandoval   block: move some ...
36
  }
a8a459417   Omar Sandoval   block: pass struc...
37
  static inline enum wbt_flags wbt_flags(struct request *rq)
934031a12   Omar Sandoval   block: move some ...
38
  {
544ccc8dc   Omar Sandoval   block: get rid of...
39
  	return rq->wbt_flags;
934031a12   Omar Sandoval   block: move some ...
40
  }
a8a459417   Omar Sandoval   block: pass struc...
41
  static inline bool wbt_is_tracked(struct request *rq)
934031a12   Omar Sandoval   block: move some ...
42
  {
544ccc8dc   Omar Sandoval   block: get rid of...
43
  	return rq->wbt_flags & WBT_TRACKED;
934031a12   Omar Sandoval   block: move some ...
44
  }
a8a459417   Omar Sandoval   block: pass struc...
45
  static inline bool wbt_is_read(struct request *rq)
934031a12   Omar Sandoval   block: move some ...
46
  {
544ccc8dc   Omar Sandoval   block: get rid of...
47
  	return rq->wbt_flags & WBT_READ;
934031a12   Omar Sandoval   block: move some ...
48
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  enum {
  	/*
  	 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
  	 * from here depending on device stats
  	 */
  	RWB_DEF_DEPTH	= 16,
  
  	/*
  	 * 100msec window
  	 */
  	RWB_WINDOW_NSEC		= 100 * 1000 * 1000ULL,
  
  	/*
  	 * Disregard stats, if we don't meet this minimum
  	 */
  	RWB_MIN_WRITE_SAMPLES	= 3,
  
  	/*
  	 * If we have this number of consecutive windows with not enough
  	 * information to scale up or down, scale up.
  	 */
  	RWB_UNKNOWN_BUMP	= 5,
  };
  
  static inline bool rwb_enabled(struct rq_wb *rwb)
  {
  	return rwb && rwb->wb_normal != 0;
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
  {
  	if (rwb_enabled(rwb)) {
  		const unsigned long cur = jiffies;
  
  		if (cur != *var)
  			*var = cur;
  	}
  }
  
  /*
   * If a task was rate throttled in balance_dirty_pages() within the last
   * second or so, use that to indicate a higher cleaning rate.
   */
  static bool wb_recent_wait(struct rq_wb *rwb)
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
93
  	struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
e34cbd307   Jens Axboe   blk-wbt: add gene...
94
95
96
  
  	return time_before(jiffies, wb->dirty_sleep + HZ);
  }
8bea60901   Jens Axboe   blk-wbt: pass in ...
97
98
  static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
  					  enum wbt_flags wb_acct)
e34cbd307   Jens Axboe   blk-wbt: add gene...
99
  {
8bea60901   Jens Axboe   blk-wbt: pass in ...
100
101
  	if (wb_acct & WBT_KSWAPD)
  		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
782f56977   Jens Axboe   blk-wbt: throttle...
102
103
  	else if (wb_acct & WBT_DISCARD)
  		return &rwb->rq_wait[WBT_RWQ_DISCARD];
8bea60901   Jens Axboe   blk-wbt: pass in ...
104
105
  
  	return &rwb->rq_wait[WBT_RWQ_BG];
e34cbd307   Jens Axboe   blk-wbt: add gene...
106
107
108
109
110
111
112
113
  }
  
  static void rwb_wake_all(struct rq_wb *rwb)
  {
  	int i;
  
  	for (i = 0; i < WBT_NUM_RWQ; i++) {
  		struct rq_wait *rqw = &rwb->rq_wait[i];
b78820937   Jens Axboe   blk-wbt: use wq_h...
114
  		if (wq_has_sleeper(&rqw->wait))
e34cbd307   Jens Axboe   blk-wbt: add gene...
115
116
117
  			wake_up_all(&rqw->wait);
  	}
  }
061a54275   Jens Axboe   blk-wbt: abstract...
118
119
  static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
  			 enum wbt_flags wb_acct)
e34cbd307   Jens Axboe   blk-wbt: add gene...
120
  {
e34cbd307   Jens Axboe   blk-wbt: add gene...
121
  	int inflight, limit;
e34cbd307   Jens Axboe   blk-wbt: add gene...
122
123
124
125
126
127
128
129
130
131
132
133
  	inflight = atomic_dec_return(&rqw->inflight);
  
  	/*
  	 * wbt got disabled with IO in flight. Wake up any potential
  	 * waiters, we don't have to do more than that.
  	 */
  	if (unlikely(!rwb_enabled(rwb))) {
  		rwb_wake_all(rwb);
  		return;
  	}
  
  	/*
782f56977   Jens Axboe   blk-wbt: throttle...
134
135
136
  	 * For discards, our limit is always the background. For writes, if
  	 * the device does write back caching, drop further down before we
  	 * wake people up.
e34cbd307   Jens Axboe   blk-wbt: add gene...
137
  	 */
782f56977   Jens Axboe   blk-wbt: throttle...
138
139
140
  	if (wb_acct & WBT_DISCARD)
  		limit = rwb->wb_background;
  	else if (rwb->wc && !wb_recent_wait(rwb))
e34cbd307   Jens Axboe   blk-wbt: add gene...
141
142
143
144
145
146
147
148
149
  		limit = 0;
  	else
  		limit = rwb->wb_normal;
  
  	/*
  	 * Don't wake anyone up if we are above the normal limit.
  	 */
  	if (inflight && inflight >= limit)
  		return;
b78820937   Jens Axboe   blk-wbt: use wq_h...
150
  	if (wq_has_sleeper(&rqw->wait)) {
e34cbd307   Jens Axboe   blk-wbt: add gene...
151
152
153
  		int diff = limit - inflight;
  
  		if (!inflight || diff >= rwb->wb_background / 2)
38cfb5a45   Jens Axboe   blk-wbt: improve ...
154
  			wake_up_all(&rqw->wait);
e34cbd307   Jens Axboe   blk-wbt: add gene...
155
156
  	}
  }
061a54275   Jens Axboe   blk-wbt: abstract...
157
158
159
160
161
162
163
164
165
166
167
  static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
  {
  	struct rq_wb *rwb = RQWB(rqos);
  	struct rq_wait *rqw;
  
  	if (!(wb_acct & WBT_TRACKED))
  		return;
  
  	rqw = get_rq_wait(rwb, wb_acct);
  	wbt_rqw_done(rwb, rqw, wb_acct);
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
168
169
170
171
  /*
   * Called on completion of a request. Note that it's also called when
   * a request is merged, when the request gets freed.
   */
a79050434   Josef Bacik   blk-rq-qos: refac...
172
  static void wbt_done(struct rq_qos *rqos, struct request *rq)
e34cbd307   Jens Axboe   blk-wbt: add gene...
173
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
174
  	struct rq_wb *rwb = RQWB(rqos);
e34cbd307   Jens Axboe   blk-wbt: add gene...
175

a8a459417   Omar Sandoval   block: pass struc...
176
177
  	if (!wbt_is_tracked(rq)) {
  		if (rwb->sync_cookie == rq) {
e34cbd307   Jens Axboe   blk-wbt: add gene...
178
179
180
  			rwb->sync_issue = 0;
  			rwb->sync_cookie = NULL;
  		}
a8a459417   Omar Sandoval   block: pass struc...
181
  		if (wbt_is_read(rq))
e34cbd307   Jens Axboe   blk-wbt: add gene...
182
  			wb_timestamp(rwb, &rwb->last_comp);
e34cbd307   Jens Axboe   blk-wbt: add gene...
183
  	} else {
a8a459417   Omar Sandoval   block: pass struc...
184
  		WARN_ON_ONCE(rq == rwb->sync_cookie);
a79050434   Josef Bacik   blk-rq-qos: refac...
185
  		__wbt_done(rqos, wbt_flags(rq));
e34cbd307   Jens Axboe   blk-wbt: add gene...
186
  	}
a8a459417   Omar Sandoval   block: pass struc...
187
  	wbt_clear_state(rq);
e34cbd307   Jens Axboe   blk-wbt: add gene...
188
  }
4121d385f   Arnd Bergmann   blk-wbt: fix old-...
189
  static inline bool stat_sample_valid(struct blk_rq_stat *stat)
e34cbd307   Jens Axboe   blk-wbt: add gene...
190
191
192
193
194
195
196
  {
  	/*
  	 * We need at least one read sample, and a minimum of
  	 * RWB_MIN_WRITE_SAMPLES. We require some write samples to know
  	 * that it's writes impacting us, and not just some sole read on
  	 * a device that is in a lower power state.
  	 */
fa2e39cb9   Omar Sandoval   blk-stat: use REA...
197
198
  	return (stat[READ].nr_samples >= 1 &&
  		stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES);
e34cbd307   Jens Axboe   blk-wbt: add gene...
199
200
201
202
  }
  
  static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
  {
6aa7de059   Mark Rutland   locking/atomics: ...
203
  	u64 now, issue = READ_ONCE(rwb->sync_issue);
e34cbd307   Jens Axboe   blk-wbt: add gene...
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  
  	if (!issue || !rwb->sync_cookie)
  		return 0;
  
  	now = ktime_to_ns(ktime_get());
  	return now - issue;
  }
  
  enum {
  	LAT_OK = 1,
  	LAT_UNKNOWN,
  	LAT_UNKNOWN_WRITES,
  	LAT_EXCEEDED,
  };
34dbad5d2   Omar Sandoval   blk-stat: convert...
218
  static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
e34cbd307   Jens Axboe   blk-wbt: add gene...
219
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
220
221
  	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
  	struct rq_depth *rqd = &rwb->rq_depth;
e34cbd307   Jens Axboe   blk-wbt: add gene...
222
223
224
225
226
227
228
229
230
231
232
233
234
  	u64 thislat;
  
  	/*
  	 * If our stored sync issue exceeds the window size, or it
  	 * exceeds our min target AND we haven't logged any entries,
  	 * flag the latency as exceeded. wbt works off completion latencies,
  	 * but for a flooded device, a single sync IO can take a long time
  	 * to complete after being issued. If this time exceeds our
  	 * monitoring window AND we didn't see any other completions in that
  	 * window, then count that sync IO as a violation of the latency.
  	 */
  	thislat = rwb_sync_issue_lat(rwb);
  	if (thislat > rwb->cur_win_nsec ||
fa2e39cb9   Omar Sandoval   blk-stat: use REA...
235
  	    (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) {
d8a0cbfd7   Jens Axboe   blk-wbt: store qu...
236
  		trace_wbt_lat(bdi, thislat);
e34cbd307   Jens Axboe   blk-wbt: add gene...
237
238
239
240
241
242
243
244
245
246
247
248
249
  		return LAT_EXCEEDED;
  	}
  
  	/*
  	 * No read/write mix, if stat isn't valid
  	 */
  	if (!stat_sample_valid(stat)) {
  		/*
  		 * If we had writes in this stat window and the window is
  		 * current, we're only doing writes. If a task recently
  		 * waited or still has writes in flights, consider us doing
  		 * just writes as well.
  		 */
34dbad5d2   Omar Sandoval   blk-stat: convert...
250
251
  		if (stat[WRITE].nr_samples || wb_recent_wait(rwb) ||
  		    wbt_inflight(rwb))
e34cbd307   Jens Axboe   blk-wbt: add gene...
252
253
254
255
256
257
258
  			return LAT_UNKNOWN_WRITES;
  		return LAT_UNKNOWN;
  	}
  
  	/*
  	 * If the 'min' latency exceeds our target, step down.
  	 */
fa2e39cb9   Omar Sandoval   blk-stat: use REA...
259
260
  	if (stat[READ].min > rwb->min_lat_nsec) {
  		trace_wbt_lat(bdi, stat[READ].min);
d8a0cbfd7   Jens Axboe   blk-wbt: store qu...
261
  		trace_wbt_stat(bdi, stat);
e34cbd307   Jens Axboe   blk-wbt: add gene...
262
263
  		return LAT_EXCEEDED;
  	}
a79050434   Josef Bacik   blk-rq-qos: refac...
264
  	if (rqd->scale_step)
d8a0cbfd7   Jens Axboe   blk-wbt: store qu...
265
  		trace_wbt_stat(bdi, stat);
e34cbd307   Jens Axboe   blk-wbt: add gene...
266
267
268
  
  	return LAT_OK;
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
269
270
  static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
271
272
  	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
  	struct rq_depth *rqd = &rwb->rq_depth;
d8a0cbfd7   Jens Axboe   blk-wbt: store qu...
273

a79050434   Josef Bacik   blk-rq-qos: refac...
274
275
  	trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
  			rwb->wb_background, rwb->wb_normal, rqd->max_depth);
e34cbd307   Jens Axboe   blk-wbt: add gene...
276
  }
a79050434   Josef Bacik   blk-rq-qos: refac...
277
  static void calc_wb_limits(struct rq_wb *rwb)
e34cbd307   Jens Axboe   blk-wbt: add gene...
278
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
279
280
281
282
283
284
285
286
287
288
  	if (rwb->min_lat_nsec == 0) {
  		rwb->wb_normal = rwb->wb_background = 0;
  	} else if (rwb->rq_depth.max_depth <= 2) {
  		rwb->wb_normal = rwb->rq_depth.max_depth;
  		rwb->wb_background = 1;
  	} else {
  		rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2;
  		rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4;
  	}
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
289

a79050434   Josef Bacik   blk-rq-qos: refac...
290
291
  static void scale_up(struct rq_wb *rwb)
  {
b84477d3e   Harshad Shirwadkar   blk-wbt: fix perf...
292
293
  	if (!rq_depth_scale_up(&rwb->rq_depth))
  		return;
a79050434   Josef Bacik   blk-rq-qos: refac...
294
  	calc_wb_limits(rwb);
e34cbd307   Jens Axboe   blk-wbt: add gene...
295
  	rwb->unknown_cnt = 0;
5e65a2034   Josef Bacik   blk-wbt: wake up ...
296
  	rwb_wake_all(rwb);
3a89c25d9   Tommi Rantala   blk-wbt: Use trac...
297
  	rwb_trace_step(rwb, tracepoint_string("scale up"));
e34cbd307   Jens Axboe   blk-wbt: add gene...
298
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
299
300
  static void scale_down(struct rq_wb *rwb, bool hard_throttle)
  {
b84477d3e   Harshad Shirwadkar   blk-wbt: fix perf...
301
302
  	if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
  		return;
e34cbd307   Jens Axboe   blk-wbt: add gene...
303
  	calc_wb_limits(rwb);
a79050434   Josef Bacik   blk-rq-qos: refac...
304
  	rwb->unknown_cnt = 0;
3a89c25d9   Tommi Rantala   blk-wbt: Use trac...
305
  	rwb_trace_step(rwb, tracepoint_string("scale down"));
e34cbd307   Jens Axboe   blk-wbt: add gene...
306
307
308
309
  }
  
  static void rwb_arm_timer(struct rq_wb *rwb)
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
310
311
312
  	struct rq_depth *rqd = &rwb->rq_depth;
  
  	if (rqd->scale_step > 0) {
e34cbd307   Jens Axboe   blk-wbt: add gene...
313
314
315
316
317
318
319
  		/*
  		 * We should speed this up, using some variant of a fast
  		 * integer inverse square root calculation. Since we only do
  		 * this for every window expiration, it's not a huge deal,
  		 * though.
  		 */
  		rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
a79050434   Josef Bacik   blk-rq-qos: refac...
320
  					int_sqrt((rqd->scale_step + 1) << 8));
e34cbd307   Jens Axboe   blk-wbt: add gene...
321
322
323
324
325
326
327
  	} else {
  		/*
  		 * For step < 0, we don't want to increase/decrease the
  		 * window size.
  		 */
  		rwb->cur_win_nsec = rwb->win_nsec;
  	}
34dbad5d2   Omar Sandoval   blk-stat: convert...
328
  	blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec);
e34cbd307   Jens Axboe   blk-wbt: add gene...
329
  }
34dbad5d2   Omar Sandoval   blk-stat: convert...
330
  static void wb_timer_fn(struct blk_stat_callback *cb)
e34cbd307   Jens Axboe   blk-wbt: add gene...
331
  {
34dbad5d2   Omar Sandoval   blk-stat: convert...
332
  	struct rq_wb *rwb = cb->data;
a79050434   Josef Bacik   blk-rq-qos: refac...
333
  	struct rq_depth *rqd = &rwb->rq_depth;
e34cbd307   Jens Axboe   blk-wbt: add gene...
334
335
  	unsigned int inflight = wbt_inflight(rwb);
  	int status;
34dbad5d2   Omar Sandoval   blk-stat: convert...
336
  	status = latency_exceeded(rwb, cb->stat);
e34cbd307   Jens Axboe   blk-wbt: add gene...
337

a79050434   Josef Bacik   blk-rq-qos: refac...
338
  	trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
d8a0cbfd7   Jens Axboe   blk-wbt: store qu...
339
  			inflight);
e34cbd307   Jens Axboe   blk-wbt: add gene...
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
  
  	/*
  	 * If we exceeded the latency target, step down. If we did not,
  	 * step one level up. If we don't know enough to say either exceeded
  	 * or ok, then don't do anything.
  	 */
  	switch (status) {
  	case LAT_EXCEEDED:
  		scale_down(rwb, true);
  		break;
  	case LAT_OK:
  		scale_up(rwb);
  		break;
  	case LAT_UNKNOWN_WRITES:
  		/*
  		 * We started a the center step, but don't have a valid
  		 * read/write sample, but we do have writes going on.
  		 * Allow step to go negative, to increase write perf.
  		 */
  		scale_up(rwb);
  		break;
  	case LAT_UNKNOWN:
  		if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP)
  			break;
  		/*
  		 * We get here when previously scaled reduced depth, and we
  		 * currently don't have a valid read/write sample. For that
  		 * case, slowly return to center state (step == 0).
  		 */
a79050434   Josef Bacik   blk-rq-qos: refac...
369
  		if (rqd->scale_step > 0)
e34cbd307   Jens Axboe   blk-wbt: add gene...
370
  			scale_up(rwb);
a79050434   Josef Bacik   blk-rq-qos: refac...
371
  		else if (rqd->scale_step < 0)
e34cbd307   Jens Axboe   blk-wbt: add gene...
372
373
374
375
376
377
378
379
380
  			scale_down(rwb, false);
  		break;
  	default:
  		break;
  	}
  
  	/*
  	 * Re-arm timer, if we have IO in flight
  	 */
a79050434   Josef Bacik   blk-rq-qos: refac...
381
  	if (rqd->scale_step || inflight)
e34cbd307   Jens Axboe   blk-wbt: add gene...
382
383
  		rwb_arm_timer(rwb);
  }
4d89e1d11   Guoqing Jiang   blk-wbt: rename _...
384
  static void wbt_update_limits(struct rq_wb *rwb)
e34cbd307   Jens Axboe   blk-wbt: add gene...
385
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
386
387
388
389
390
391
  	struct rq_depth *rqd = &rwb->rq_depth;
  
  	rqd->scale_step = 0;
  	rqd->scaled_max = false;
  
  	rq_depth_calc_max_depth(rqd);
e34cbd307   Jens Axboe   blk-wbt: add gene...
392
393
394
395
  	calc_wb_limits(rwb);
  
  	rwb_wake_all(rwb);
  }
a79050434   Josef Bacik   blk-rq-qos: refac...
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
  u64 wbt_get_min_lat(struct request_queue *q)
  {
  	struct rq_qos *rqos = wbt_rq_qos(q);
  	if (!rqos)
  		return 0;
  	return RQWB(rqos)->min_lat_nsec;
  }
  
  void wbt_set_min_lat(struct request_queue *q, u64 val)
  {
  	struct rq_qos *rqos = wbt_rq_qos(q);
  	if (!rqos)
  		return;
  	RQWB(rqos)->min_lat_nsec = val;
  	RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
4d89e1d11   Guoqing Jiang   blk-wbt: rename _...
411
  	wbt_update_limits(RQWB(rqos));
a79050434   Josef Bacik   blk-rq-qos: refac...
412
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
413
414
415
416
417
418
419
420
421
422
423
424
425
  static bool close_io(struct rq_wb *rwb)
  {
  	const unsigned long now = jiffies;
  
  	return time_before(now, rwb->last_issue + HZ / 10) ||
  		time_before(now, rwb->last_comp + HZ / 10);
  }
  
  #define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
  
  static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
  {
  	unsigned int limit;
ffa358dca   Jens Axboe   blk-wbt: move dis...
426
427
428
429
430
431
  	/*
  	 * If we got disabled, just return UINT_MAX. This ensures that
  	 * we'll properly inc a new IO, and dec+wakeup at the end.
  	 */
  	if (!rwb_enabled(rwb))
  		return UINT_MAX;
782f56977   Jens Axboe   blk-wbt: throttle...
432
433
  	if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
  		return rwb->wb_background;
e34cbd307   Jens Axboe   blk-wbt: add gene...
434
435
  	/*
  	 * At this point we know it's a buffered write. If this is
3dfbdc44d   weiping zhang   blk-wbt: fix comm...
436
  	 * kswapd trying to free memory, or REQ_SYNC is set, then
e34cbd307   Jens Axboe   blk-wbt: add gene...
437
438
439
440
441
442
  	 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
  	 * that. If the write is marked as a background write, then use
  	 * the idle limit, or go to normal if we haven't had competing
  	 * IO for a bit.
  	 */
  	if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
a79050434   Josef Bacik   blk-rq-qos: refac...
443
  		limit = rwb->rq_depth.max_depth;
e34cbd307   Jens Axboe   blk-wbt: add gene...
444
445
446
447
448
449
450
451
452
453
454
  	else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
  		/*
  		 * If less than 100ms since we completed unrelated IO,
  		 * limit us to half the depth for background writeback.
  		 */
  		limit = rwb->wb_background;
  	} else
  		limit = rwb->wb_normal;
  
  	return limit;
  }
38cfb5a45   Jens Axboe   blk-wbt: improve ...
455
  struct wbt_wait_data {
38cfb5a45   Jens Axboe   blk-wbt: improve ...
456
  	struct rq_wb *rwb;
b6c7b58f5   Josef Bacik   block: convert wb...
457
  	enum wbt_flags wb_acct;
38cfb5a45   Jens Axboe   blk-wbt: improve ...
458
  	unsigned long rw;
38cfb5a45   Jens Axboe   blk-wbt: improve ...
459
  };
b6c7b58f5   Josef Bacik   block: convert wb...
460
  static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
38cfb5a45   Jens Axboe   blk-wbt: improve ...
461
  {
b6c7b58f5   Josef Bacik   block: convert wb...
462
463
464
  	struct wbt_wait_data *data = private_data;
  	return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw));
  }
38cfb5a45   Jens Axboe   blk-wbt: improve ...
465

b6c7b58f5   Josef Bacik   block: convert wb...
466
467
468
469
  static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
  {
  	struct wbt_wait_data *data = private_data;
  	wbt_rqw_done(data->rwb, rqw, data->wb_acct);
38cfb5a45   Jens Axboe   blk-wbt: improve ...
470
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
471
472
473
474
  /*
   * Block if we will exceed our limit, or if we are currently waiting for
   * the timer to kick off queuing again.
   */
8bea60901   Jens Axboe   blk-wbt: pass in ...
475
  static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
d53375608   Christoph Hellwig   block: remove the...
476
  		       unsigned long rw)
e34cbd307   Jens Axboe   blk-wbt: add gene...
477
  {
8bea60901   Jens Axboe   blk-wbt: pass in ...
478
  	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
38cfb5a45   Jens Axboe   blk-wbt: improve ...
479
  	struct wbt_wait_data data = {
38cfb5a45   Jens Axboe   blk-wbt: improve ...
480
  		.rwb = rwb,
b6c7b58f5   Josef Bacik   block: convert wb...
481
  		.wb_acct = wb_acct,
38cfb5a45   Jens Axboe   blk-wbt: improve ...
482
483
  		.rw = rw,
  	};
e34cbd307   Jens Axboe   blk-wbt: add gene...
484

b6c7b58f5   Josef Bacik   block: convert wb...
485
  	rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
e34cbd307   Jens Axboe   blk-wbt: add gene...
486
487
488
489
  }
  
  static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
  {
782f56977   Jens Axboe   blk-wbt: throttle...
490
491
492
493
494
495
496
497
  	switch (bio_op(bio)) {
  	case REQ_OP_WRITE:
  		/*
  		 * Don't throttle WRITE_ODIRECT
  		 */
  		if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
  		    (REQ_SYNC | REQ_IDLE))
  			return false;
df561f668   Gustavo A. R. Silva   treewide: Use fal...
498
  		fallthrough;
782f56977   Jens Axboe   blk-wbt: throttle...
499
500
501
  	case REQ_OP_DISCARD:
  		return true;
  	default:
e34cbd307   Jens Axboe   blk-wbt: add gene...
502
  		return false;
782f56977   Jens Axboe   blk-wbt: throttle...
503
  	}
e34cbd307   Jens Axboe   blk-wbt: add gene...
504
  }
c1c80384c   Josef Bacik   block: remove ext...
505
506
507
  static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
  {
  	enum wbt_flags flags = 0;
c125311d9   Jens Axboe   blk-wbt: don't ma...
508
509
  	if (!rwb_enabled(rwb))
  		return 0;
c1c80384c   Josef Bacik   block: remove ext...
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
  	if (bio_op(bio) == REQ_OP_READ) {
  		flags = WBT_READ;
  	} else if (wbt_should_throttle(rwb, bio)) {
  		if (current_is_kswapd())
  			flags |= WBT_KSWAPD;
  		if (bio_op(bio) == REQ_OP_DISCARD)
  			flags |= WBT_DISCARD;
  		flags |= WBT_TRACKED;
  	}
  	return flags;
  }
  
  static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
  {
  	struct rq_wb *rwb = RQWB(rqos);
  	enum wbt_flags flags = bio_to_wbt_flags(rwb, bio);
  	__wbt_done(rqos, flags);
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
528
529
530
531
532
533
  /*
   * Returns true if the IO request should be accounted, false if not.
   * May sleep, if we have exceeded the writeback limits. Caller can pass
   * in an irq held spinlock, if it holds one when calling this function.
   * If we do sleep, we'll release and re-grab it.
   */
d53375608   Christoph Hellwig   block: remove the...
534
  static void wbt_wait(struct rq_qos *rqos, struct bio *bio)
e34cbd307   Jens Axboe   blk-wbt: add gene...
535
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
536
  	struct rq_wb *rwb = RQWB(rqos);
c1c80384c   Josef Bacik   block: remove ext...
537
  	enum wbt_flags flags;
e34cbd307   Jens Axboe   blk-wbt: add gene...
538

c1c80384c   Josef Bacik   block: remove ext...
539
  	flags = bio_to_wbt_flags(rwb, bio);
df60f6e83   Ming Lei   blk-wbt: fix IO h...
540
  	if (!(flags & WBT_TRACKED)) {
c1c80384c   Josef Bacik   block: remove ext...
541
  		if (flags & WBT_READ)
e34cbd307   Jens Axboe   blk-wbt: add gene...
542
  			wb_timestamp(rwb, &rwb->last_issue);
c1c80384c   Josef Bacik   block: remove ext...
543
  		return;
e34cbd307   Jens Axboe   blk-wbt: add gene...
544
  	}
d53375608   Christoph Hellwig   block: remove the...
545
  	__wbt_wait(rwb, flags, bio->bi_opf);
e34cbd307   Jens Axboe   blk-wbt: add gene...
546

34dbad5d2   Omar Sandoval   blk-stat: convert...
547
  	if (!blk_stat_is_active(rwb->cb))
e34cbd307   Jens Axboe   blk-wbt: add gene...
548
  		rwb_arm_timer(rwb);
c1c80384c   Josef Bacik   block: remove ext...
549
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
550

c1c80384c   Josef Bacik   block: remove ext...
551
552
553
554
  static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio)
  {
  	struct rq_wb *rwb = RQWB(rqos);
  	rq->wbt_flags |= bio_to_wbt_flags(rwb, bio);
e34cbd307   Jens Axboe   blk-wbt: add gene...
555
  }
c83f536a8   Bart Van Assche   blk-wbt: Declare ...
556
  static void wbt_issue(struct rq_qos *rqos, struct request *rq)
e34cbd307   Jens Axboe   blk-wbt: add gene...
557
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
558
  	struct rq_wb *rwb = RQWB(rqos);
e34cbd307   Jens Axboe   blk-wbt: add gene...
559
560
561
562
  	if (!rwb_enabled(rwb))
  		return;
  
  	/*
a8a459417   Omar Sandoval   block: pass struc...
563
564
565
566
567
  	 * Track sync issue, in case it takes a long time to complete. Allows us
  	 * to react quicker, if a sync IO takes a long time to complete. Note
  	 * that this is just a hint. The request can go away when it completes,
  	 * so it's important we never dereference it. We only use the address to
  	 * compare with, which is why we store the sync_issue time locally.
e34cbd307   Jens Axboe   blk-wbt: add gene...
568
  	 */
a8a459417   Omar Sandoval   block: pass struc...
569
570
  	if (wbt_is_read(rq) && !rwb->sync_issue) {
  		rwb->sync_cookie = rq;
544ccc8dc   Omar Sandoval   block: get rid of...
571
  		rwb->sync_issue = rq->io_start_time_ns;
e34cbd307   Jens Axboe   blk-wbt: add gene...
572
573
  	}
  }
c83f536a8   Bart Van Assche   blk-wbt: Declare ...
574
  static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
e34cbd307   Jens Axboe   blk-wbt: add gene...
575
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
576
  	struct rq_wb *rwb = RQWB(rqos);
e34cbd307   Jens Axboe   blk-wbt: add gene...
577
578
  	if (!rwb_enabled(rwb))
  		return;
a8a459417   Omar Sandoval   block: pass struc...
579
  	if (rq == rwb->sync_cookie) {
e34cbd307   Jens Axboe   blk-wbt: add gene...
580
581
582
583
  		rwb->sync_issue = 0;
  		rwb->sync_cookie = NULL;
  	}
  }
a79050434   Josef Bacik   blk-rq-qos: refac...
584
  void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
e34cbd307   Jens Axboe   blk-wbt: add gene...
585
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
586
587
588
  	struct rq_qos *rqos = wbt_rq_qos(q);
  	if (rqos)
  		RQWB(rqos)->wc = write_cache_on;
e34cbd307   Jens Axboe   blk-wbt: add gene...
589
  }
e34cbd307   Jens Axboe   blk-wbt: add gene...
590

8330cdb0f   Jan Kara   block: Make write...
591
592
593
594
595
  /*
   * Enable wbt if defaults are configured that way
   */
  void wbt_enable_default(struct request_queue *q)
  {
a79050434   Josef Bacik   blk-rq-qos: refac...
596
  	struct rq_qos *rqos = wbt_rq_qos(q);
8330cdb0f   Jan Kara   block: Make write...
597
  	/* Throttling already enabled? */
a79050434   Josef Bacik   blk-rq-qos: refac...
598
  	if (rqos)
8330cdb0f   Jan Kara   block: Make write...
599
600
601
  		return;
  
  	/* Queue not registered? Maybe shutting down... */
58c898ba3   Ming Lei   block: add helper...
602
  	if (!blk_queue_registered(q))
8330cdb0f   Jan Kara   block: Make write...
603
  		return;
344e9ffcb   Jens Axboe   block: add queue_...
604
  	if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
8330cdb0f   Jan Kara   block: Make write...
605
606
607
  		wbt_init(q);
  }
  EXPORT_SYMBOL_GPL(wbt_enable_default);
80e091d10   Jens Axboe   blk-wbt: allow re...
608
609
610
611
612
613
614
615
616
617
618
  u64 wbt_default_latency_nsec(struct request_queue *q)
  {
  	/*
  	 * We default to 2msec for non-rotational storage, and 75msec
  	 * for rotational storage.
  	 */
  	if (blk_queue_nonrot(q))
  		return 2000000ULL;
  	else
  		return 75000000ULL;
  }
99c749a4c   Jens Axboe   blk-stat: kill bl...
619
620
  static int wbt_data_dir(const struct request *rq)
  {
5235553d8   Jens Axboe   blk-wbt: account ...
621
622
623
624
  	const int op = req_op(rq);
  
  	if (op == REQ_OP_READ)
  		return READ;
825843b0a   Jens Axboe   blk-wbt: account ...
625
  	else if (op_is_write(op))
5235553d8   Jens Axboe   blk-wbt: account ...
626
627
628
629
  		return WRITE;
  
  	/* don't account */
  	return -1;
99c749a4c   Jens Axboe   blk-stat: kill bl...
630
  }
9677a3e01   Tejun Heo   block/rq_qos: imp...
631
632
633
  static void wbt_queue_depth_changed(struct rq_qos *rqos)
  {
  	RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
4d89e1d11   Guoqing Jiang   blk-wbt: rename _...
634
  	wbt_update_limits(RQWB(rqos));
9677a3e01   Tejun Heo   block/rq_qos: imp...
635
  }
a79050434   Josef Bacik   blk-rq-qos: refac...
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
  static void wbt_exit(struct rq_qos *rqos)
  {
  	struct rq_wb *rwb = RQWB(rqos);
  	struct request_queue *q = rqos->q;
  
  	blk_stat_remove_callback(q, rwb->cb);
  	blk_stat_free_callback(rwb->cb);
  	kfree(rwb);
  }
  
  /*
   * Disable wbt, if enabled by default.
   */
  void wbt_disable_default(struct request_queue *q)
  {
  	struct rq_qos *rqos = wbt_rq_qos(q);
  	struct rq_wb *rwb;
  	if (!rqos)
  		return;
  	rwb = RQWB(rqos);
544fbd16a   Ming Lei   block: deactivate...
656
657
  	if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
  		blk_stat_deactivate(rwb->cb);
a79050434   Josef Bacik   blk-rq-qos: refac...
658
  		rwb->wb_normal = 0;
544fbd16a   Ming Lei   block: deactivate...
659
  	}
a79050434   Josef Bacik   blk-rq-qos: refac...
660
  }
e815f404a   Jens Axboe   block: add wbt_di...
661
  EXPORT_SYMBOL_GPL(wbt_disable_default);
a79050434   Josef Bacik   blk-rq-qos: refac...
662

d19afebca   Ming Lei   blk-wbt: export i...
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
  #ifdef CONFIG_BLK_DEBUG_FS
  static int wbt_curr_win_nsec_show(void *data, struct seq_file *m)
  {
  	struct rq_qos *rqos = data;
  	struct rq_wb *rwb = RQWB(rqos);
  
  	seq_printf(m, "%llu
  ", rwb->cur_win_nsec);
  	return 0;
  }
  
  static int wbt_enabled_show(void *data, struct seq_file *m)
  {
  	struct rq_qos *rqos = data;
  	struct rq_wb *rwb = RQWB(rqos);
  
  	seq_printf(m, "%d
  ", rwb->enable_state);
  	return 0;
  }
  
  static int wbt_id_show(void *data, struct seq_file *m)
  {
  	struct rq_qos *rqos = data;
  
  	seq_printf(m, "%u
  ", rqos->id);
  	return 0;
  }
  
  static int wbt_inflight_show(void *data, struct seq_file *m)
  {
  	struct rq_qos *rqos = data;
  	struct rq_wb *rwb = RQWB(rqos);
  	int i;
  
  	for (i = 0; i < WBT_NUM_RWQ; i++)
  		seq_printf(m, "%d: inflight %d
  ", i,
  			   atomic_read(&rwb->rq_wait[i].inflight));
  	return 0;
  }
  
  static int wbt_min_lat_nsec_show(void *data, struct seq_file *m)
  {
  	struct rq_qos *rqos = data;
  	struct rq_wb *rwb = RQWB(rqos);
  
  	seq_printf(m, "%lu
  ", rwb->min_lat_nsec);
  	return 0;
  }
  
  static int wbt_unknown_cnt_show(void *data, struct seq_file *m)
  {
  	struct rq_qos *rqos = data;
  	struct rq_wb *rwb = RQWB(rqos);
  
  	seq_printf(m, "%u
  ", rwb->unknown_cnt);
  	return 0;
  }
  
  static int wbt_normal_show(void *data, struct seq_file *m)
  {
  	struct rq_qos *rqos = data;
  	struct rq_wb *rwb = RQWB(rqos);
  
  	seq_printf(m, "%u
  ", rwb->wb_normal);
  	return 0;
  }
  
  static int wbt_background_show(void *data, struct seq_file *m)
  {
  	struct rq_qos *rqos = data;
  	struct rq_wb *rwb = RQWB(rqos);
  
  	seq_printf(m, "%u
  ", rwb->wb_background);
  	return 0;
  }
  
  static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
  	{"curr_win_nsec", 0400, wbt_curr_win_nsec_show},
  	{"enabled", 0400, wbt_enabled_show},
  	{"id", 0400, wbt_id_show},
  	{"inflight", 0400, wbt_inflight_show},
  	{"min_lat_nsec", 0400, wbt_min_lat_nsec_show},
  	{"unknown_cnt", 0400, wbt_unknown_cnt_show},
  	{"wb_normal", 0400, wbt_normal_show},
  	{"wb_background", 0400, wbt_background_show},
  	{},
  };
  #endif
a79050434   Josef Bacik   blk-rq-qos: refac...
758
759
760
  static struct rq_qos_ops wbt_rqos_ops = {
  	.throttle = wbt_wait,
  	.issue = wbt_issue,
c1c80384c   Josef Bacik   block: remove ext...
761
  	.track = wbt_track,
a79050434   Josef Bacik   blk-rq-qos: refac...
762
763
  	.requeue = wbt_requeue,
  	.done = wbt_done,
c1c80384c   Josef Bacik   block: remove ext...
764
  	.cleanup = wbt_cleanup,
9677a3e01   Tejun Heo   block/rq_qos: imp...
765
  	.queue_depth_changed = wbt_queue_depth_changed,
a79050434   Josef Bacik   blk-rq-qos: refac...
766
  	.exit = wbt_exit,
d19afebca   Ming Lei   blk-wbt: export i...
767
768
769
  #ifdef CONFIG_BLK_DEBUG_FS
  	.debugfs_attrs = wbt_debugfs_attrs,
  #endif
a79050434   Josef Bacik   blk-rq-qos: refac...
770
  };
8054b89f8   Jens Axboe   blk-wbt: remove s...
771
  int wbt_init(struct request_queue *q)
e34cbd307   Jens Axboe   blk-wbt: add gene...
772
773
774
  {
  	struct rq_wb *rwb;
  	int i;
e34cbd307   Jens Axboe   blk-wbt: add gene...
775
776
777
  	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
  	if (!rwb)
  		return -ENOMEM;
99c749a4c   Jens Axboe   blk-stat: kill bl...
778
  	rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
34dbad5d2   Omar Sandoval   blk-stat: convert...
779
780
781
782
  	if (!rwb->cb) {
  		kfree(rwb);
  		return -ENOMEM;
  	}
a79050434   Josef Bacik   blk-rq-qos: refac...
783
784
  	for (i = 0; i < WBT_NUM_RWQ; i++)
  		rq_wait_init(&rwb->rq_wait[i]);
e34cbd307   Jens Axboe   blk-wbt: add gene...
785

a79050434   Josef Bacik   blk-rq-qos: refac...
786
787
788
  	rwb->rqos.id = RQ_QOS_WBT;
  	rwb->rqos.ops = &wbt_rqos_ops;
  	rwb->rqos.q = q;
e34cbd307   Jens Axboe   blk-wbt: add gene...
789
  	rwb->last_comp = rwb->last_issue = jiffies;
e34cbd307   Jens Axboe   blk-wbt: add gene...
790
  	rwb->win_nsec = RWB_WINDOW_NSEC;
d62118b6d   Jens Axboe   blk-wbt: allow wb...
791
  	rwb->enable_state = WBT_STATE_ON_DEFAULT;
a79050434   Josef Bacik   blk-rq-qos: refac...
792
793
  	rwb->wc = 1;
  	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
4d89e1d11   Guoqing Jiang   blk-wbt: rename _...
794
  	wbt_update_limits(rwb);
e34cbd307   Jens Axboe   blk-wbt: add gene...
795
796
  
  	/*
34dbad5d2   Omar Sandoval   blk-stat: convert...
797
  	 * Assign rwb and add the stats callback.
e34cbd307   Jens Axboe   blk-wbt: add gene...
798
  	 */
a79050434   Josef Bacik   blk-rq-qos: refac...
799
  	rq_qos_add(q, &rwb->rqos);
34dbad5d2   Omar Sandoval   blk-stat: convert...
800
  	blk_stat_add_callback(q, rwb->cb);
e34cbd307   Jens Axboe   blk-wbt: add gene...
801

80e091d10   Jens Axboe   blk-wbt: allow re...
802
  	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
e34cbd307   Jens Axboe   blk-wbt: add gene...
803

9677a3e01   Tejun Heo   block/rq_qos: imp...
804
  	wbt_queue_depth_changed(&rwb->rqos);
a79050434   Josef Bacik   blk-rq-qos: refac...
805
  	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
e34cbd307   Jens Axboe   blk-wbt: add gene...
806
807
808
  
  	return 0;
  }