Blame view

block/blk-throttle.c 68.5 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
e43473b7f   Vivek Goyal   blkio: Core imple...
2
3
4
5
6
7
8
9
10
11
12
  /*
   * Interface for controlling IO bandwidth on a request queue
   *
   * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com>
   */
  
  #include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/blkdev.h>
  #include <linux/bio.h>
  #include <linux/blktrace_api.h>
eea8f41cc   Tejun Heo   blkcg: move block...
13
  #include <linux/blk-cgroup.h>
bc9fcbf9c   Tejun Heo   block: move blk_t...
14
  #include "blk.h"
1d156646e   Tejun Heo   blk-cgroup: separ...
15
  #include "blk-cgroup-rwstat.h"
e43473b7f   Vivek Goyal   blkio: Core imple...
16
17
  
  /* Max dispatch from a group in 1 round */
e675df2ad   Baolin Wang   blk-throttle: Def...
18
  #define THROTL_GRP_QUANTUM 8
e43473b7f   Vivek Goyal   blkio: Core imple...
19
20
  
  /* Total max dispatch from all groups in one round */
e675df2ad   Baolin Wang   blk-throttle: Def...
21
  #define THROTL_QUANTUM 32
e43473b7f   Vivek Goyal   blkio: Core imple...
22

d61fcfa4b   Shaohua Li   blk-throttle: cho...
23
24
25
  /* Throttling is performed over a slice and after that slice is renewed */
  #define DFL_THROTL_SLICE_HD (HZ / 10)
  #define DFL_THROTL_SLICE_SSD (HZ / 50)
297e3d854   Shaohua Li   blk-throttle: mak...
26
  #define MAX_THROTL_SLICE (HZ)
9e234eeaf   Shaohua Li   blk-throttle: add...
27
  #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
9bb67aeb9   Shaohua Li   blk-throttle: res...
28
29
  #define MIN_THROTL_BPS (320 * 1024)
  #define MIN_THROTL_IOPS (10)
b4f428ef2   Shaohua Li   blk-throttle: for...
30
31
  #define DFL_LATENCY_TARGET (-1L)
  #define DFL_IDLE_THRESHOLD (0)
6679a90c4   Shaohua Li   blk-throttle: set...
32
33
34
35
36
37
38
  #define DFL_HD_BASELINE_LATENCY (4000L) /* 4ms */
  #define LATENCY_FILTERED_SSD (0)
  /*
   * For HD, very small latency comes from sequential IO. Such IO is helpless to
   * help determine if its IO is impacted by others, hence we ignore the IO
   */
  #define LATENCY_FILTERED_HD (1000L) /* 1ms */
e43473b7f   Vivek Goyal   blkio: Core imple...
39

3c798398e   Tejun Heo   blkcg: mass renam...
40
  static struct blkcg_policy blkcg_policy_throtl;
0381411e4   Tejun Heo   blkcg: let blkcg ...
41

450adcbe5   Vivek Goyal   blk-throttle: Do ...
42
43
  /* A workqueue to queue throttle related work */
  static struct workqueue_struct *kthrotld_workqueue;
450adcbe5   Vivek Goyal   blk-throttle: Do ...
44

c5cc2070b   Tejun Heo   blk-throttle: add...
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  /*
   * To implement hierarchical throttling, throtl_grps form a tree and bios
   * are dispatched upwards level by level until they reach the top and get
   * issued.  When dispatching bios from the children and local group at each
   * level, if the bios are dispatched into a single bio_list, there's a risk
   * of a local or child group which can queue many bios at once filling up
   * the list starving others.
   *
   * To avoid such starvation, dispatched bios are queued separately
   * according to where they came from.  When they are again dispatched to
   * the parent, they're popped in round-robin order so that no single source
   * hogs the dispatch window.
   *
   * throtl_qnode is used to keep the queued bios separated by their sources.
   * Bios are queued to throtl_qnode which in turn is queued to
   * throtl_service_queue and then dispatched in round-robin order.
   *
   * It's also used to track the reference counts on blkg's.  A qnode always
   * belongs to a throtl_grp and gets queued on itself or the parent, so
   * incrementing the reference of the associated throtl_grp when a qnode is
   * queued and decrementing when dequeued is enough to keep the whole blkg
   * tree pinned while bios are in flight.
   */
  struct throtl_qnode {
  	struct list_head	node;		/* service_queue->queued[] */
  	struct bio_list		bios;		/* queued bios */
  	struct throtl_grp	*tg;		/* tg this qnode belongs to */
  };
c9e0332e8   Tejun Heo   blk-throttle: ren...
73
  struct throtl_service_queue {
77216b048   Tejun Heo   blk-throttle: add...
74
  	struct throtl_service_queue *parent_sq;	/* the parent service_queue */
73f0d49a9   Tejun Heo   blk-throttle: mov...
75
76
77
78
  	/*
  	 * Bios queued directly to this service_queue or dispatched from
  	 * children throtl_grp's.
  	 */
c5cc2070b   Tejun Heo   blk-throttle: add...
79
  	struct list_head	queued[2];	/* throtl_qnode [READ/WRITE] */
73f0d49a9   Tejun Heo   blk-throttle: mov...
80
81
82
83
84
85
  	unsigned int		nr_queued[2];	/* number of queued bios */
  
  	/*
  	 * RB tree of active children throtl_grp's, which are sorted by
  	 * their ->disptime.
  	 */
9ff01255a   Liu Bo   Blk-throttle: upd...
86
  	struct rb_root_cached	pending_tree;	/* RB tree of active tgs */
c9e0332e8   Tejun Heo   blk-throttle: ren...
87
88
  	unsigned int		nr_pending;	/* # queued in the tree */
  	unsigned long		first_pending_disptime;	/* disptime of the first tg */
69df0ab03   Tejun Heo   blk-throttle: sep...
89
  	struct timer_list	pending_timer;	/* fires on first_pending_disptime */
e43473b7f   Vivek Goyal   blkio: Core imple...
90
  };
5b2c16aae   Tejun Heo   blk-throttle: sim...
91
92
  enum tg_state_flags {
  	THROTL_TG_PENDING	= 1 << 0,	/* on parent's pending tree */
0e9f4164b   Tejun Heo   blk-throttle: gen...
93
  	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
5b2c16aae   Tejun Heo   blk-throttle: sim...
94
  };
e43473b7f   Vivek Goyal   blkio: Core imple...
95
  #define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
9f626e372   Shaohua Li   blk-throttle: pre...
96
  enum {
cd5ab1b0f   Shaohua Li   blk-throttle: add...
97
  	LIMIT_LOW,
9f626e372   Shaohua Li   blk-throttle: pre...
98
99
100
  	LIMIT_MAX,
  	LIMIT_CNT,
  };
e43473b7f   Vivek Goyal   blkio: Core imple...
101
  struct throtl_grp {
f95a04afa   Tejun Heo   blkcg: embed stru...
102
103
  	/* must be the first member */
  	struct blkg_policy_data pd;
c9e0332e8   Tejun Heo   blk-throttle: ren...
104
  	/* active throtl group service_queue member */
e43473b7f   Vivek Goyal   blkio: Core imple...
105
  	struct rb_node rb_node;
0f3457f60   Tejun Heo   blk-throttle: add...
106
107
  	/* throtl_data this group belongs to */
  	struct throtl_data *td;
49a2f1e3f   Tejun Heo   blk-throttle: add...
108
109
  	/* this group's service queue */
  	struct throtl_service_queue service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
110
  	/*
c5cc2070b   Tejun Heo   blk-throttle: add...
111
112
113
114
115
116
117
118
119
120
121
  	 * qnode_on_self is used when bios are directly queued to this
  	 * throtl_grp so that local bios compete fairly with bios
  	 * dispatched from children.  qnode_on_parent is used when bios are
  	 * dispatched from this throtl_grp into its parent and will compete
  	 * with the sibling qnode_on_parents and the parent's
  	 * qnode_on_self.
  	 */
  	struct throtl_qnode qnode_on_self[2];
  	struct throtl_qnode qnode_on_parent[2];
  
  	/*
e43473b7f   Vivek Goyal   blkio: Core imple...
122
123
124
125
126
  	 * Dispatch time in jiffies. This is the estimated time when group
  	 * will unthrottle and is ready to dispatch more bio. It is used as
  	 * key to sort active groups in service tree.
  	 */
  	unsigned long disptime;
e43473b7f   Vivek Goyal   blkio: Core imple...
127
  	unsigned int flags;
693e751e7   Tejun Heo   blk-throttle: imp...
128
129
  	/* are there any throtl rules between this group and td? */
  	bool has_rules[2];
cd5ab1b0f   Shaohua Li   blk-throttle: add...
130
  	/* internally used bytes per second rate limits */
9f626e372   Shaohua Li   blk-throttle: pre...
131
  	uint64_t bps[2][LIMIT_CNT];
cd5ab1b0f   Shaohua Li   blk-throttle: add...
132
133
  	/* user configured bps limits */
  	uint64_t bps_conf[2][LIMIT_CNT];
e43473b7f   Vivek Goyal   blkio: Core imple...
134

cd5ab1b0f   Shaohua Li   blk-throttle: add...
135
  	/* internally used IOPS limits */
9f626e372   Shaohua Li   blk-throttle: pre...
136
  	unsigned int iops[2][LIMIT_CNT];
cd5ab1b0f   Shaohua Li   blk-throttle: add...
137
138
  	/* user configured IOPS limits */
  	unsigned int iops_conf[2][LIMIT_CNT];
8e89d13f4   Vivek Goyal   blkio: Implementa...
139

b53b072c4   Baolin Wang   blk-throttle: Fix...
140
  	/* Number of bytes dispatched in current slice */
e43473b7f   Vivek Goyal   blkio: Core imple...
141
  	uint64_t bytes_disp[2];
8e89d13f4   Vivek Goyal   blkio: Implementa...
142
143
  	/* Number of bio's dispatched in current slice */
  	unsigned int io_disp[2];
e43473b7f   Vivek Goyal   blkio: Core imple...
144

3f0abd806   Shaohua Li   blk-throttle: add...
145
146
147
148
149
150
  	unsigned long last_low_overflow_time[2];
  
  	uint64_t last_bytes_disp[2];
  	unsigned int last_io_disp[2];
  
  	unsigned long last_check_time;
ec80991d6   Shaohua Li   blk-throttle: add...
151
  	unsigned long latency_target; /* us */
5b81fc3cc   Shaohua Li   blk-throttle: add...
152
  	unsigned long latency_target_conf; /* us */
e43473b7f   Vivek Goyal   blkio: Core imple...
153
154
155
  	/* When did we start a new slice */
  	unsigned long slice_start[2];
  	unsigned long slice_end[2];
9e234eeaf   Shaohua Li   blk-throttle: add...
156
157
158
159
160
  
  	unsigned long last_finish_time; /* ns / 1024 */
  	unsigned long checked_last_finish_time; /* ns / 1024 */
  	unsigned long avg_idletime; /* ns / 1024 */
  	unsigned long idletime_threshold; /* us */
5b81fc3cc   Shaohua Li   blk-throttle: add...
161
  	unsigned long idletime_threshold_conf; /* us */
53696b8d2   Shaohua Li   blk-throttle: add...
162
163
164
165
  
  	unsigned int bio_cnt; /* total bios */
  	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
  	unsigned long bio_cnt_reset_time;
7ca464383   Tejun Heo   blk-throtl: stop ...
166

4f1e9630a   Chunguang Xu   blk-throtl: optim...
167
168
  	atomic_t io_split_cnt[2];
  	atomic_t last_io_split_cnt[2];
7ca464383   Tejun Heo   blk-throtl: stop ...
169
170
  	struct blkg_rwstat stat_bytes;
  	struct blkg_rwstat stat_ios;
e43473b7f   Vivek Goyal   blkio: Core imple...
171
  };
b9147dd1b   Shaohua Li   blk-throttle: add...
172
173
174
175
176
177
178
179
180
181
182
183
  /* We measure latency for request size from <= 4k to >= 1M */
  #define LATENCY_BUCKET_SIZE 9
  
  struct latency_bucket {
  	unsigned long total_latency; /* ns / 1024 */
  	int samples;
  };
  
  struct avg_latency_bucket {
  	unsigned long latency; /* ns / 1024 */
  	bool valid;
  };
e43473b7f   Vivek Goyal   blkio: Core imple...
184
185
  struct throtl_data
  {
e43473b7f   Vivek Goyal   blkio: Core imple...
186
  	/* service tree for active throtl groups */
c9e0332e8   Tejun Heo   blk-throttle: ren...
187
  	struct throtl_service_queue service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
188

e43473b7f   Vivek Goyal   blkio: Core imple...
189
190
191
192
  	struct request_queue *queue;
  
  	/* Total Number of queued bios on READ and WRITE lists */
  	unsigned int nr_queued[2];
297e3d854   Shaohua Li   blk-throttle: mak...
193
  	unsigned int throtl_slice;
e43473b7f   Vivek Goyal   blkio: Core imple...
194
  	/* Work for dispatching throttled bios */
69df0ab03   Tejun Heo   blk-throttle: sep...
195
  	struct work_struct dispatch_work;
9f626e372   Shaohua Li   blk-throttle: pre...
196
197
  	unsigned int limit_index;
  	bool limit_valid[LIMIT_CNT];
3f0abd806   Shaohua Li   blk-throttle: add...
198
199
200
  
  	unsigned long low_upgrade_time;
  	unsigned long low_downgrade_time;
7394e31fa   Shaohua Li   blk-throttle: mak...
201
202
  
  	unsigned int scale;
b9147dd1b   Shaohua Li   blk-throttle: add...
203

b889bf66d   Joseph Qi   blk-throttle: tra...
204
205
206
  	struct latency_bucket tmp_buckets[2][LATENCY_BUCKET_SIZE];
  	struct avg_latency_bucket avg_buckets[2][LATENCY_BUCKET_SIZE];
  	struct latency_bucket __percpu *latency_buckets[2];
b9147dd1b   Shaohua Li   blk-throttle: add...
207
  	unsigned long last_calculate_time;
6679a90c4   Shaohua Li   blk-throttle: set...
208
  	unsigned long filtered_latency;
b9147dd1b   Shaohua Li   blk-throttle: add...
209
210
  
  	bool track_bio_latency;
e43473b7f   Vivek Goyal   blkio: Core imple...
211
  };
e99e88a9d   Kees Cook   treewide: setup_t...
212
  static void throtl_pending_timer_fn(struct timer_list *t);
69df0ab03   Tejun Heo   blk-throttle: sep...
213

f95a04afa   Tejun Heo   blkcg: embed stru...
214
215
216
217
  static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
  {
  	return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
  }
3c798398e   Tejun Heo   blkcg: mass renam...
218
  static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
219
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
220
  	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
0381411e4   Tejun Heo   blkcg: let blkcg ...
221
  }
3c798398e   Tejun Heo   blkcg: mass renam...
222
  static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
223
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
224
  	return pd_to_blkg(&tg->pd);
0381411e4   Tejun Heo   blkcg: let blkcg ...
225
  }
fda6f272c   Tejun Heo   blk-throttle: imp...
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
  /**
   * sq_to_tg - return the throl_grp the specified service queue belongs to
   * @sq: the throtl_service_queue of interest
   *
   * Return the throtl_grp @sq belongs to.  If @sq is the top-level one
   * embedded in throtl_data, %NULL is returned.
   */
  static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq)
  {
  	if (sq && sq->parent_sq)
  		return container_of(sq, struct throtl_grp, service_queue);
  	else
  		return NULL;
  }
  
  /**
   * sq_to_td - return throtl_data the specified service queue belongs to
   * @sq: the throtl_service_queue of interest
   *
b43daedc0   Masahiro Yamada   scripts/spelling....
245
   * A service_queue can be embedded in either a throtl_grp or throtl_data.
fda6f272c   Tejun Heo   blk-throttle: imp...
246
247
248
249
250
251
252
253
254
255
256
   * Determine the associated throtl_data accordingly and return it.
   */
  static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
  {
  	struct throtl_grp *tg = sq_to_tg(sq);
  
  	if (tg)
  		return tg->td;
  	else
  		return container_of(sq, struct throtl_data, service_queue);
  }
7394e31fa   Shaohua Li   blk-throttle: mak...
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
  /*
   * cgroup's limit in LIMIT_MAX is scaled if low limit is set. This scale is to
   * make the IO dispatch more smooth.
   * Scale up: linearly scale up according to lapsed time since upgrade. For
   *           every throtl_slice, the limit scales up 1/2 .low limit till the
   *           limit hits .max limit
   * Scale down: exponentially scale down if a cgroup doesn't hit its .low limit
   */
  static uint64_t throtl_adjusted_limit(uint64_t low, struct throtl_data *td)
  {
  	/* arbitrary value to avoid too big scale */
  	if (td->scale < 4096 && time_after_eq(jiffies,
  	    td->low_upgrade_time + td->scale * td->throtl_slice))
  		td->scale = (jiffies - td->low_upgrade_time) / td->throtl_slice;
  
  	return low + (low >> 1) * td->scale;
  }
9f626e372   Shaohua Li   blk-throttle: pre...
274
275
  static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
  {
b22c417c8   Shaohua Li   blk-throttle: con...
276
  	struct blkcg_gq *blkg = tg_to_blkg(tg);
7394e31fa   Shaohua Li   blk-throttle: mak...
277
  	struct throtl_data *td;
b22c417c8   Shaohua Li   blk-throttle: con...
278
279
280
281
  	uint64_t ret;
  
  	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
  		return U64_MAX;
7394e31fa   Shaohua Li   blk-throttle: mak...
282
283
284
  
  	td = tg->td;
  	ret = tg->bps[rw][td->limit_index];
9bb67aeb9   Shaohua Li   blk-throttle: res...
285
286
287
288
289
290
291
292
  	if (ret == 0 && td->limit_index == LIMIT_LOW) {
  		/* intermediate node or iops isn't 0 */
  		if (!list_empty(&blkg->blkcg->css.children) ||
  		    tg->iops[rw][td->limit_index])
  			return U64_MAX;
  		else
  			return MIN_THROTL_BPS;
  	}
7394e31fa   Shaohua Li   blk-throttle: mak...
293
294
295
296
297
298
299
300
  
  	if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
  	    tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
  		uint64_t adjusted;
  
  		adjusted = throtl_adjusted_limit(tg->bps[rw][LIMIT_LOW], td);
  		ret = min(tg->bps[rw][LIMIT_MAX], adjusted);
  	}
b22c417c8   Shaohua Li   blk-throttle: con...
301
  	return ret;
9f626e372   Shaohua Li   blk-throttle: pre...
302
303
304
305
  }
  
  static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
  {
b22c417c8   Shaohua Li   blk-throttle: con...
306
  	struct blkcg_gq *blkg = tg_to_blkg(tg);
7394e31fa   Shaohua Li   blk-throttle: mak...
307
  	struct throtl_data *td;
b22c417c8   Shaohua Li   blk-throttle: con...
308
309
310
311
  	unsigned int ret;
  
  	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
  		return UINT_MAX;
9bb67aeb9   Shaohua Li   blk-throttle: res...
312

7394e31fa   Shaohua Li   blk-throttle: mak...
313
314
  	td = tg->td;
  	ret = tg->iops[rw][td->limit_index];
9bb67aeb9   Shaohua Li   blk-throttle: res...
315
316
317
318
319
320
321
322
  	if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
  		/* intermediate node or bps isn't 0 */
  		if (!list_empty(&blkg->blkcg->css.children) ||
  		    tg->bps[rw][td->limit_index])
  			return UINT_MAX;
  		else
  			return MIN_THROTL_IOPS;
  	}
7394e31fa   Shaohua Li   blk-throttle: mak...
323
324
325
326
327
328
329
330
331
332
  
  	if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
  	    tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
  		uint64_t adjusted;
  
  		adjusted = throtl_adjusted_limit(tg->iops[rw][LIMIT_LOW], td);
  		if (adjusted > UINT_MAX)
  			adjusted = UINT_MAX;
  		ret = min_t(unsigned int, tg->iops[rw][LIMIT_MAX], adjusted);
  	}
b22c417c8   Shaohua Li   blk-throttle: con...
333
  	return ret;
9f626e372   Shaohua Li   blk-throttle: pre...
334
  }
b9147dd1b   Shaohua Li   blk-throttle: add...
335
336
  #define request_bucket_index(sectors) \
  	clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
fda6f272c   Tejun Heo   blk-throttle: imp...
337
338
339
340
341
342
343
344
  /**
   * throtl_log - log debug message via blktrace
   * @sq: the service_queue being reported
   * @fmt: printf format string
   * @args: printf args
   *
   * The messages are prefixed with "throtl BLKG_NAME" if @sq belongs to a
   * throtl_grp; otherwise, just "throtl".
fda6f272c   Tejun Heo   blk-throttle: imp...
345
346
347
348
349
350
   */
  #define throtl_log(sq, fmt, args...)	do {				\
  	struct throtl_grp *__tg = sq_to_tg((sq));			\
  	struct throtl_data *__td = sq_to_td((sq));			\
  									\
  	(void)__td;							\
59fa0224c   Shaohua Li   blk-throttle: don...
351
352
  	if (likely(!blk_trace_note_message_enabled(__td->queue)))	\
  		break;							\
fda6f272c   Tejun Heo   blk-throttle: imp...
353
  	if ((__tg)) {							\
35fe6d763   Shaohua Li   block: use standa...
354
355
  		blk_add_cgroup_trace_msg(__td->queue,			\
  			tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
fda6f272c   Tejun Heo   blk-throttle: imp...
356
357
358
  	} else {							\
  		blk_add_trace_msg(__td->queue, "throtl " fmt, ##args);	\
  	}								\
54e7ed12b   Tejun Heo   blkcg: remove blk...
359
  } while (0)
e43473b7f   Vivek Goyal   blkio: Core imple...
360

ea0ea2bc6   Shaohua Li   blk-throttle: cap...
361
362
363
364
365
366
367
  static inline unsigned int throtl_bio_data_size(struct bio *bio)
  {
  	/* assume it's one sector */
  	if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
  		return 512;
  	return bio->bi_iter.bi_size;
  }
c5cc2070b   Tejun Heo   blk-throttle: add...
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
  static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
  {
  	INIT_LIST_HEAD(&qn->node);
  	bio_list_init(&qn->bios);
  	qn->tg = tg;
  }
  
  /**
   * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it
   * @bio: bio being added
   * @qn: qnode to add bio to
   * @queued: the service_queue->queued[] list @qn belongs to
   *
   * Add @bio to @qn and put @qn on @queued if it's not already on.
   * @qn->tg's reference count is bumped when @qn is activated.  See the
   * comment on top of throtl_qnode definition for details.
   */
  static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
  				 struct list_head *queued)
  {
  	bio_list_add(&qn->bios, bio);
  	if (list_empty(&qn->node)) {
  		list_add_tail(&qn->node, queued);
  		blkg_get(tg_to_blkg(qn->tg));
  	}
  }
  
  /**
   * throtl_peek_queued - peek the first bio on a qnode list
   * @queued: the qnode list to peek
   */
  static struct bio *throtl_peek_queued(struct list_head *queued)
  {
b7b609de5   Baolin Wang   blk-throttle: Mov...
401
  	struct throtl_qnode *qn;
c5cc2070b   Tejun Heo   blk-throttle: add...
402
403
404
405
  	struct bio *bio;
  
  	if (list_empty(queued))
  		return NULL;
b7b609de5   Baolin Wang   blk-throttle: Mov...
406
  	qn = list_first_entry(queued, struct throtl_qnode, node);
c5cc2070b   Tejun Heo   blk-throttle: add...
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
  	bio = bio_list_peek(&qn->bios);
  	WARN_ON_ONCE(!bio);
  	return bio;
  }
  
  /**
   * throtl_pop_queued - pop the first bio form a qnode list
   * @queued: the qnode list to pop a bio from
   * @tg_to_put: optional out argument for throtl_grp to put
   *
   * Pop the first bio from the qnode list @queued.  After popping, the first
   * qnode is removed from @queued if empty or moved to the end of @queued so
   * that the popping order is round-robin.
   *
   * When the first qnode is removed, its associated throtl_grp should be put
   * too.  If @tg_to_put is NULL, this function automatically puts it;
   * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is
   * responsible for putting it.
   */
  static struct bio *throtl_pop_queued(struct list_head *queued,
  				     struct throtl_grp **tg_to_put)
  {
b7b609de5   Baolin Wang   blk-throttle: Mov...
429
  	struct throtl_qnode *qn;
c5cc2070b   Tejun Heo   blk-throttle: add...
430
431
432
433
  	struct bio *bio;
  
  	if (list_empty(queued))
  		return NULL;
b7b609de5   Baolin Wang   blk-throttle: Mov...
434
  	qn = list_first_entry(queued, struct throtl_qnode, node);
c5cc2070b   Tejun Heo   blk-throttle: add...
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
  	bio = bio_list_pop(&qn->bios);
  	WARN_ON_ONCE(!bio);
  
  	if (bio_list_empty(&qn->bios)) {
  		list_del_init(&qn->node);
  		if (tg_to_put)
  			*tg_to_put = qn->tg;
  		else
  			blkg_put(tg_to_blkg(qn->tg));
  	} else {
  		list_move_tail(&qn->node, queued);
  	}
  
  	return bio;
  }
49a2f1e3f   Tejun Heo   blk-throttle: add...
450
  /* init a service_queue, assumes the caller zeroed it */
b2ce2643c   Tejun Heo   blk-throttle: cle...
451
  static void throtl_service_queue_init(struct throtl_service_queue *sq)
49a2f1e3f   Tejun Heo   blk-throttle: add...
452
  {
c5cc2070b   Tejun Heo   blk-throttle: add...
453
454
  	INIT_LIST_HEAD(&sq->queued[0]);
  	INIT_LIST_HEAD(&sq->queued[1]);
9ff01255a   Liu Bo   Blk-throttle: upd...
455
  	sq->pending_tree = RB_ROOT_CACHED;
e99e88a9d   Kees Cook   treewide: setup_t...
456
  	timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
69df0ab03   Tejun Heo   blk-throttle: sep...
457
  }
cf09a8ee1   Tejun Heo   blkcg: pass @q an...
458
459
460
  static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
  						struct request_queue *q,
  						struct blkcg *blkcg)
001bea73e   Tejun Heo   blkcg: replace bl...
461
  {
4fb72036f   Tejun Heo   blk-throttle: rem...
462
  	struct throtl_grp *tg;
24bdb8ef0   Tejun Heo   blkcg: make blkcg...
463
  	int rw;
4fb72036f   Tejun Heo   blk-throttle: rem...
464

cf09a8ee1   Tejun Heo   blkcg: pass @q an...
465
  	tg = kzalloc_node(sizeof(*tg), gfp, q->node);
4fb72036f   Tejun Heo   blk-throttle: rem...
466
  	if (!tg)
77ea73388   Tejun Heo   blkcg: move io_se...
467
  		return NULL;
4fb72036f   Tejun Heo   blk-throttle: rem...
468

7ca464383   Tejun Heo   blk-throtl: stop ...
469
470
471
472
473
  	if (blkg_rwstat_init(&tg->stat_bytes, gfp))
  		goto err_free_tg;
  
  	if (blkg_rwstat_init(&tg->stat_ios, gfp))
  		goto err_exit_stat_bytes;
b2ce2643c   Tejun Heo   blk-throttle: cle...
474
475
476
477
478
479
480
481
  	throtl_service_queue_init(&tg->service_queue);
  
  	for (rw = READ; rw <= WRITE; rw++) {
  		throtl_qnode_init(&tg->qnode_on_self[rw], tg);
  		throtl_qnode_init(&tg->qnode_on_parent[rw], tg);
  	}
  
  	RB_CLEAR_NODE(&tg->rb_node);
9f626e372   Shaohua Li   blk-throttle: pre...
482
483
484
485
  	tg->bps[READ][LIMIT_MAX] = U64_MAX;
  	tg->bps[WRITE][LIMIT_MAX] = U64_MAX;
  	tg->iops[READ][LIMIT_MAX] = UINT_MAX;
  	tg->iops[WRITE][LIMIT_MAX] = UINT_MAX;
cd5ab1b0f   Shaohua Li   blk-throttle: add...
486
487
488
489
490
  	tg->bps_conf[READ][LIMIT_MAX] = U64_MAX;
  	tg->bps_conf[WRITE][LIMIT_MAX] = U64_MAX;
  	tg->iops_conf[READ][LIMIT_MAX] = UINT_MAX;
  	tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;
  	/* LIMIT_LOW will have default value 0 */
b2ce2643c   Tejun Heo   blk-throttle: cle...
491

ec80991d6   Shaohua Li   blk-throttle: add...
492
  	tg->latency_target = DFL_LATENCY_TARGET;
5b81fc3cc   Shaohua Li   blk-throttle: add...
493
  	tg->latency_target_conf = DFL_LATENCY_TARGET;
b4f428ef2   Shaohua Li   blk-throttle: for...
494
495
  	tg->idletime_threshold = DFL_IDLE_THRESHOLD;
  	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
ec80991d6   Shaohua Li   blk-throttle: add...
496

4fb72036f   Tejun Heo   blk-throttle: rem...
497
  	return &tg->pd;
7ca464383   Tejun Heo   blk-throtl: stop ...
498
499
500
501
502
503
  
  err_exit_stat_bytes:
  	blkg_rwstat_exit(&tg->stat_bytes);
  err_free_tg:
  	kfree(tg);
  	return NULL;
001bea73e   Tejun Heo   blkcg: replace bl...
504
  }
a9520cd6f   Tejun Heo   blkcg: make blkcg...
505
  static void throtl_pd_init(struct blkg_policy_data *pd)
a29a171e7   Vivek Goyal   blk-throttle: Do ...
506
  {
a9520cd6f   Tejun Heo   blkcg: make blkcg...
507
508
  	struct throtl_grp *tg = pd_to_tg(pd);
  	struct blkcg_gq *blkg = tg_to_blkg(tg);
77216b048   Tejun Heo   blk-throttle: add...
509
  	struct throtl_data *td = blkg->q->td;
b2ce2643c   Tejun Heo   blk-throttle: cle...
510
  	struct throtl_service_queue *sq = &tg->service_queue;
cd1604fab   Tejun Heo   blkcg: factor out...
511

9138125be   Tejun Heo   blk-throttle: imp...
512
  	/*
aa6ec29be   Tejun Heo   cgroup: remove sa...
513
  	 * If on the default hierarchy, we switch to properly hierarchical
9138125be   Tejun Heo   blk-throttle: imp...
514
515
516
517
518
  	 * behavior where limits on a given throtl_grp are applied to the
  	 * whole subtree rather than just the group itself.  e.g. If 16M
  	 * read_bps limit is set on the root group, the whole system can't
  	 * exceed 16M for the device.
  	 *
aa6ec29be   Tejun Heo   cgroup: remove sa...
519
  	 * If not on the default hierarchy, the broken flat hierarchy
9138125be   Tejun Heo   blk-throttle: imp...
520
521
522
523
524
  	 * behavior is retained where all throtl_grps are treated as if
  	 * they're all separate root groups right below throtl_data.
  	 * Limits of a group don't interact with limits of other groups
  	 * regardless of the position of the group in the hierarchy.
  	 */
b2ce2643c   Tejun Heo   blk-throttle: cle...
525
  	sq->parent_sq = &td->service_queue;
9e10a130d   Tejun Heo   cgroup: replace c...
526
  	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
b2ce2643c   Tejun Heo   blk-throttle: cle...
527
  		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
77216b048   Tejun Heo   blk-throttle: add...
528
  	tg->td = td;
8a3d26151   Tejun Heo   blkcg: move blkio...
529
  }
693e751e7   Tejun Heo   blk-throttle: imp...
530
531
532
533
534
535
536
537
  /*
   * Set has_rules[] if @tg or any of its parents have limits configured.
   * This doesn't require walking up to the top of the hierarchy as the
   * parent's has_rules[] is guaranteed to be correct.
   */
  static void tg_update_has_rules(struct throtl_grp *tg)
  {
  	struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
9f626e372   Shaohua Li   blk-throttle: pre...
538
  	struct throtl_data *td = tg->td;
693e751e7   Tejun Heo   blk-throttle: imp...
539
540
541
542
  	int rw;
  
  	for (rw = READ; rw <= WRITE; rw++)
  		tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
9f626e372   Shaohua Li   blk-throttle: pre...
543
544
545
  			(td->limit_valid[td->limit_index] &&
  			 (tg_bps_limit(tg, rw) != U64_MAX ||
  			  tg_iops_limit(tg, rw) != UINT_MAX));
693e751e7   Tejun Heo   blk-throttle: imp...
546
  }
a9520cd6f   Tejun Heo   blkcg: make blkcg...
547
  static void throtl_pd_online(struct blkg_policy_data *pd)
693e751e7   Tejun Heo   blk-throttle: imp...
548
  {
aec242468   Shaohua Li   blk-throttle: det...
549
  	struct throtl_grp *tg = pd_to_tg(pd);
693e751e7   Tejun Heo   blk-throttle: imp...
550
551
552
553
  	/*
  	 * We don't want new groups to escape the limits of its ancestors.
  	 * Update has_rules[] after a new group is brought online.
  	 */
aec242468   Shaohua Li   blk-throttle: det...
554
  	tg_update_has_rules(tg);
693e751e7   Tejun Heo   blk-throttle: imp...
555
  }
acaf523a7   Yu Kuai   blk-throttle: don...
556
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
cd5ab1b0f   Shaohua Li   blk-throttle: add...
557
558
559
560
561
562
563
564
565
566
567
  static void blk_throtl_update_limit_valid(struct throtl_data *td)
  {
  	struct cgroup_subsys_state *pos_css;
  	struct blkcg_gq *blkg;
  	bool low_valid = false;
  
  	rcu_read_lock();
  	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
  		struct throtl_grp *tg = blkg_to_tg(blkg);
  
  		if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||
43ada7878   Liu Bo   Block: blk-thrott...
568
  		    tg->iops[READ][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) {
cd5ab1b0f   Shaohua Li   blk-throttle: add...
569
  			low_valid = true;
43ada7878   Liu Bo   Block: blk-thrott...
570
571
  			break;
  		}
cd5ab1b0f   Shaohua Li   blk-throttle: add...
572
573
574
575
576
  	}
  	rcu_read_unlock();
  
  	td->limit_valid[LIMIT_LOW] = low_valid;
  }
acaf523a7   Yu Kuai   blk-throttle: don...
577
578
579
580
581
  #else
  static inline void blk_throtl_update_limit_valid(struct throtl_data *td)
  {
  }
  #endif
cd5ab1b0f   Shaohua Li   blk-throttle: add...
582

c79892c55   Shaohua Li   blk-throttle: add...
583
  static void throtl_upgrade_state(struct throtl_data *td);
cd5ab1b0f   Shaohua Li   blk-throttle: add...
584
585
586
587
588
589
590
591
592
593
  static void throtl_pd_offline(struct blkg_policy_data *pd)
  {
  	struct throtl_grp *tg = pd_to_tg(pd);
  
  	tg->bps[READ][LIMIT_LOW] = 0;
  	tg->bps[WRITE][LIMIT_LOW] = 0;
  	tg->iops[READ][LIMIT_LOW] = 0;
  	tg->iops[WRITE][LIMIT_LOW] = 0;
  
  	blk_throtl_update_limit_valid(tg->td);
c79892c55   Shaohua Li   blk-throttle: add...
594
595
  	if (!tg->td->limit_valid[tg->td->limit_index])
  		throtl_upgrade_state(tg->td);
cd5ab1b0f   Shaohua Li   blk-throttle: add...
596
  }
001bea73e   Tejun Heo   blkcg: replace bl...
597
598
  static void throtl_pd_free(struct blkg_policy_data *pd)
  {
4fb72036f   Tejun Heo   blk-throttle: rem...
599
  	struct throtl_grp *tg = pd_to_tg(pd);
b2ce2643c   Tejun Heo   blk-throttle: cle...
600
  	del_timer_sync(&tg->service_queue.pending_timer);
7ca464383   Tejun Heo   blk-throtl: stop ...
601
602
  	blkg_rwstat_exit(&tg->stat_bytes);
  	blkg_rwstat_exit(&tg->stat_ios);
4fb72036f   Tejun Heo   blk-throttle: rem...
603
  	kfree(tg);
001bea73e   Tejun Heo   blkcg: replace bl...
604
  }
0049af73b   Tejun Heo   blk-throttle: reo...
605
606
  static struct throtl_grp *
  throtl_rb_first(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
607
  {
9ff01255a   Liu Bo   Blk-throttle: upd...
608
  	struct rb_node *n;
e43473b7f   Vivek Goyal   blkio: Core imple...
609

9ff01255a   Liu Bo   Blk-throttle: upd...
610
611
612
613
614
  	n = rb_first_cached(&parent_sq->pending_tree);
  	WARN_ON_ONCE(!n);
  	if (!n)
  		return NULL;
  	return rb_entry_tg(n);
e43473b7f   Vivek Goyal   blkio: Core imple...
615
  }
0049af73b   Tejun Heo   blk-throttle: reo...
616
617
  static void throtl_rb_erase(struct rb_node *n,
  			    struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
618
  {
9ff01255a   Liu Bo   Blk-throttle: upd...
619
620
  	rb_erase_cached(n, &parent_sq->pending_tree);
  	RB_CLEAR_NODE(n);
0049af73b   Tejun Heo   blk-throttle: reo...
621
  	--parent_sq->nr_pending;
e43473b7f   Vivek Goyal   blkio: Core imple...
622
  }
0049af73b   Tejun Heo   blk-throttle: reo...
623
  static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
624
625
  {
  	struct throtl_grp *tg;
0049af73b   Tejun Heo   blk-throttle: reo...
626
  	tg = throtl_rb_first(parent_sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
627
628
  	if (!tg)
  		return;
0049af73b   Tejun Heo   blk-throttle: reo...
629
  	parent_sq->first_pending_disptime = tg->disptime;
e43473b7f   Vivek Goyal   blkio: Core imple...
630
  }
77216b048   Tejun Heo   blk-throttle: add...
631
  static void tg_service_queue_add(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
632
  {
77216b048   Tejun Heo   blk-throttle: add...
633
  	struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq;
9ff01255a   Liu Bo   Blk-throttle: upd...
634
  	struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node;
e43473b7f   Vivek Goyal   blkio: Core imple...
635
636
637
  	struct rb_node *parent = NULL;
  	struct throtl_grp *__tg;
  	unsigned long key = tg->disptime;
9ff01255a   Liu Bo   Blk-throttle: upd...
638
  	bool leftmost = true;
e43473b7f   Vivek Goyal   blkio: Core imple...
639
640
641
642
643
644
645
646
647
  
  	while (*node != NULL) {
  		parent = *node;
  		__tg = rb_entry_tg(parent);
  
  		if (time_before(key, __tg->disptime))
  			node = &parent->rb_left;
  		else {
  			node = &parent->rb_right;
9ff01255a   Liu Bo   Blk-throttle: upd...
648
  			leftmost = false;
e43473b7f   Vivek Goyal   blkio: Core imple...
649
650
  		}
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
651
  	rb_link_node(&tg->rb_node, parent, node);
9ff01255a   Liu Bo   Blk-throttle: upd...
652
653
  	rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree,
  			       leftmost);
e43473b7f   Vivek Goyal   blkio: Core imple...
654
  }
77216b048   Tejun Heo   blk-throttle: add...
655
  static void throtl_enqueue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
656
  {
29379674b   Baolin Wang   blk-throttle: Ope...
657
658
659
660
661
  	if (!(tg->flags & THROTL_TG_PENDING)) {
  		tg_service_queue_add(tg);
  		tg->flags |= THROTL_TG_PENDING;
  		tg->service_queue.parent_sq->nr_pending++;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
662
  }
77216b048   Tejun Heo   blk-throttle: add...
663
  static void throtl_dequeue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
664
  {
29379674b   Baolin Wang   blk-throttle: Ope...
665
666
667
668
  	if (tg->flags & THROTL_TG_PENDING) {
  		throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
  		tg->flags &= ~THROTL_TG_PENDING;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
669
  }
a9131a27e   Tejun Heo   blk-throttle: rel...
670
  /* Call with queue lock held */
69df0ab03   Tejun Heo   blk-throttle: sep...
671
672
  static void throtl_schedule_pending_timer(struct throtl_service_queue *sq,
  					  unsigned long expires)
a9131a27e   Tejun Heo   blk-throttle: rel...
673
  {
a41b816c1   Joseph Qi   blk-throttle: fix...
674
  	unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice;
06cceedcc   Shaohua Li   blk-throttle: mak...
675
676
677
678
679
680
681
682
683
684
  
  	/*
  	 * Since we are adjusting the throttle limit dynamically, the sleep
  	 * time calculated according to previous limit might be invalid. It's
  	 * possible the cgroup sleep time is very long and no other cgroups
  	 * have IO running so notify the limit changes. Make sure the cgroup
  	 * doesn't sleep too long to avoid the missed notification.
  	 */
  	if (time_after(expires, max_expire))
  		expires = max_expire;
69df0ab03   Tejun Heo   blk-throttle: sep...
685
686
687
  	mod_timer(&sq->pending_timer, expires);
  	throtl_log(sq, "schedule timer. delay=%lu jiffies=%lu",
  		   expires - jiffies, jiffies);
a9131a27e   Tejun Heo   blk-throttle: rel...
688
  }
7f52f98c2   Tejun Heo   blk-throttle: imp...
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
  /**
   * throtl_schedule_next_dispatch - schedule the next dispatch cycle
   * @sq: the service_queue to schedule dispatch for
   * @force: force scheduling
   *
   * Arm @sq->pending_timer so that the next dispatch cycle starts on the
   * dispatch time of the first pending child.  Returns %true if either timer
   * is armed or there's no pending child left.  %false if the current
   * dispatch window is still open and the caller should continue
   * dispatching.
   *
   * If @force is %true, the dispatch timer is always scheduled and this
   * function is guaranteed to return %true.  This is to be used when the
   * caller can't dispatch itself and needs to invoke pending_timer
   * unconditionally.  Note that forced scheduling is likely to induce short
   * delay before dispatch starts even if @sq->first_pending_disptime is not
   * in the future and thus shouldn't be used in hot paths.
   */
  static bool throtl_schedule_next_dispatch(struct throtl_service_queue *sq,
  					  bool force)
e43473b7f   Vivek Goyal   blkio: Core imple...
709
  {
6a525600f   Tejun Heo   blk-throttle: rem...
710
  	/* any pending children left? */
c9e0332e8   Tejun Heo   blk-throttle: ren...
711
  	if (!sq->nr_pending)
7f52f98c2   Tejun Heo   blk-throttle: imp...
712
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
713

c9e0332e8   Tejun Heo   blk-throttle: ren...
714
  	update_min_dispatch_time(sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
715

69df0ab03   Tejun Heo   blk-throttle: sep...
716
  	/* is the next dispatch time in the future? */
7f52f98c2   Tejun Heo   blk-throttle: imp...
717
  	if (force || time_after(sq->first_pending_disptime, jiffies)) {
69df0ab03   Tejun Heo   blk-throttle: sep...
718
  		throtl_schedule_pending_timer(sq, sq->first_pending_disptime);
7f52f98c2   Tejun Heo   blk-throttle: imp...
719
  		return true;
69df0ab03   Tejun Heo   blk-throttle: sep...
720
  	}
7f52f98c2   Tejun Heo   blk-throttle: imp...
721
722
  	/* tell the caller to continue dispatching */
  	return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
723
  }
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
724
725
726
727
728
  static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
  		bool rw, unsigned long start)
  {
  	tg->bytes_disp[rw] = 0;
  	tg->io_disp[rw] = 0;
4f1e9630a   Chunguang Xu   blk-throtl: optim...
729
  	atomic_set(&tg->io_split_cnt[rw], 0);
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
730
731
732
733
734
735
736
737
  	/*
  	 * Previous slice has expired. We must have trimmed it after last
  	 * bio dispatch. That means since start of last slice, we never used
  	 * that bandwidth. Do try to make use of that bandwidth while giving
  	 * credit.
  	 */
  	if (time_after_eq(start, tg->slice_start[rw]))
  		tg->slice_start[rw] = start;
297e3d854   Shaohua Li   blk-throttle: mak...
738
  	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
739
740
741
742
743
  	throtl_log(&tg->service_queue,
  		   "[%c] new slice with credit start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
  }
0f3457f60   Tejun Heo   blk-throttle: add...
744
  static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
745
746
  {
  	tg->bytes_disp[rw] = 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
747
  	tg->io_disp[rw] = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
748
  	tg->slice_start[rw] = jiffies;
297e3d854   Shaohua Li   blk-throttle: mak...
749
  	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
4f1e9630a   Chunguang Xu   blk-throtl: optim...
750
751
  
  	atomic_set(&tg->io_split_cnt[rw], 0);
fda6f272c   Tejun Heo   blk-throttle: imp...
752
753
754
755
  	throtl_log(&tg->service_queue,
  		   "[%c] new slice start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
756
  }
0f3457f60   Tejun Heo   blk-throttle: add...
757
758
  static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
  					unsigned long jiffy_end)
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
759
  {
297e3d854   Shaohua Li   blk-throttle: mak...
760
  	tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice);
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
761
  }
0f3457f60   Tejun Heo   blk-throttle: add...
762
763
  static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
  				       unsigned long jiffy_end)
e43473b7f   Vivek Goyal   blkio: Core imple...
764
  {
1da30f952   Baolin Wang   blk-throttle: Re-...
765
  	throtl_set_slice_end(tg, rw, jiffy_end);
fda6f272c   Tejun Heo   blk-throttle: imp...
766
767
768
769
  	throtl_log(&tg->service_queue,
  		   "[%c] extend slice start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
770
771
772
  }
  
  /* Determine if previously allocated or extended slice is complete or not */
0f3457f60   Tejun Heo   blk-throttle: add...
773
  static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
774
775
  {
  	if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
5cf8c2277   Fabian Frederick   block/blk-throttl...
776
  		return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
777

0b6bad7d6   Chengguang Xu   blk-throttle: ret...
778
  	return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
779
780
781
  }
  
  /* Trim the used slices and adjust slice start accordingly */
0f3457f60   Tejun Heo   blk-throttle: add...
782
  static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
783
  {
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
784
785
  	unsigned long nr_slices, time_elapsed, io_trim;
  	u64 bytes_trim, tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
786
787
788
789
790
791
792
793
  
  	BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
  
  	/*
  	 * If bps are unlimited (-1), then time slice don't get
  	 * renewed. Don't try to trim the slice if slice is used. A new
  	 * slice will start when appropriate.
  	 */
0f3457f60   Tejun Heo   blk-throttle: add...
794
  	if (throtl_slice_used(tg, rw))
e43473b7f   Vivek Goyal   blkio: Core imple...
795
  		return;
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
796
797
798
  	/*
  	 * A bio has been dispatched. Also adjust slice_end. It might happen
  	 * that initially cgroup limit was very low resulting in high
b53b072c4   Baolin Wang   blk-throttle: Fix...
799
  	 * slice_end, but later limit was bumped up and bio was dispatched
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
800
801
802
  	 * sooner, then we need to reduce slice_end. A high bogus slice_end
  	 * is bad because it does not allow new slice to start.
  	 */
297e3d854   Shaohua Li   blk-throttle: mak...
803
  	throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
804

e43473b7f   Vivek Goyal   blkio: Core imple...
805
  	time_elapsed = jiffies - tg->slice_start[rw];
297e3d854   Shaohua Li   blk-throttle: mak...
806
  	nr_slices = time_elapsed / tg->td->throtl_slice;
e43473b7f   Vivek Goyal   blkio: Core imple...
807
808
809
  
  	if (!nr_slices)
  		return;
297e3d854   Shaohua Li   blk-throttle: mak...
810
  	tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
811
812
  	do_div(tmp, HZ);
  	bytes_trim = tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
813

297e3d854   Shaohua Li   blk-throttle: mak...
814
815
  	io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /
  		HZ;
e43473b7f   Vivek Goyal   blkio: Core imple...
816

8e89d13f4   Vivek Goyal   blkio: Implementa...
817
  	if (!bytes_trim && !io_trim)
e43473b7f   Vivek Goyal   blkio: Core imple...
818
819
820
821
822
823
  		return;
  
  	if (tg->bytes_disp[rw] >= bytes_trim)
  		tg->bytes_disp[rw] -= bytes_trim;
  	else
  		tg->bytes_disp[rw] = 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
824
825
826
827
  	if (tg->io_disp[rw] >= io_trim)
  		tg->io_disp[rw] -= io_trim;
  	else
  		tg->io_disp[rw] = 0;
297e3d854   Shaohua Li   blk-throttle: mak...
828
  	tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;
e43473b7f   Vivek Goyal   blkio: Core imple...
829

fda6f272c   Tejun Heo   blk-throttle: imp...
830
831
832
833
  	throtl_log(&tg->service_queue,
  		   "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
  		   tg->slice_start[rw], tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
834
  }
0f3457f60   Tejun Heo   blk-throttle: add...
835
  static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
4599ea49d   Baolin Wang   blk-throttle: Avo...
836
  				  u32 iops_limit, unsigned long *wait)
e43473b7f   Vivek Goyal   blkio: Core imple...
837
838
  {
  	bool rw = bio_data_dir(bio);
8e89d13f4   Vivek Goyal   blkio: Implementa...
839
  	unsigned int io_allowed;
e43473b7f   Vivek Goyal   blkio: Core imple...
840
  	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
c49c06e49   Vivek Goyal   blkio-throttle: F...
841
  	u64 tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
842

87fbeb881   Baolin Wang   blk-throttle: Avo...
843
844
845
846
847
  	if (iops_limit == UINT_MAX) {
  		if (wait)
  			*wait = 0;
  		return true;
  	}
3a10f999f   Konstantin Khlebnikov   blk-throttle: fix...
848
  	jiffy_elapsed = jiffies - tg->slice_start[rw];
e43473b7f   Vivek Goyal   blkio: Core imple...
849

3a10f999f   Konstantin Khlebnikov   blk-throttle: fix...
850
851
  	/* Round up to the next throttle slice, wait time must be nonzero */
  	jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
8e89d13f4   Vivek Goyal   blkio: Implementa...
852

c49c06e49   Vivek Goyal   blkio-throttle: F...
853
854
855
856
857
858
  	/*
  	 * jiffy_elapsed_rnd should not be a big value as minimum iops can be
  	 * 1 then at max jiffy elapsed should be equivalent of 1 second as we
  	 * will allow dispatch after 1 second and after that slice should
  	 * have been trimmed.
  	 */
4599ea49d   Baolin Wang   blk-throttle: Avo...
859
  	tmp = (u64)iops_limit * jiffy_elapsed_rnd;
c49c06e49   Vivek Goyal   blkio-throttle: F...
860
861
862
863
864
865
  	do_div(tmp, HZ);
  
  	if (tmp > UINT_MAX)
  		io_allowed = UINT_MAX;
  	else
  		io_allowed = tmp;
8e89d13f4   Vivek Goyal   blkio: Implementa...
866
867
  
  	if (tg->io_disp[rw] + 1 <= io_allowed) {
e43473b7f   Vivek Goyal   blkio: Core imple...
868
869
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
870
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
871
  	}
8e89d13f4   Vivek Goyal   blkio: Implementa...
872
  	/* Calc approx time to dispatch */
991f61fe7   Liu Bo   Blk-throttle: red...
873
  	jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
8e89d13f4   Vivek Goyal   blkio: Implementa...
874
875
876
  
  	if (wait)
  		*wait = jiffy_wait;
0b6bad7d6   Chengguang Xu   blk-throttle: ret...
877
  	return false;
8e89d13f4   Vivek Goyal   blkio: Implementa...
878
  }
0f3457f60   Tejun Heo   blk-throttle: add...
879
  static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
4599ea49d   Baolin Wang   blk-throttle: Avo...
880
  				 u64 bps_limit, unsigned long *wait)
8e89d13f4   Vivek Goyal   blkio: Implementa...
881
882
  {
  	bool rw = bio_data_dir(bio);
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
883
  	u64 bytes_allowed, extra_bytes, tmp;
8e89d13f4   Vivek Goyal   blkio: Implementa...
884
  	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
885
  	unsigned int bio_size = throtl_bio_data_size(bio);
e43473b7f   Vivek Goyal   blkio: Core imple...
886

87fbeb881   Baolin Wang   blk-throttle: Avo...
887
888
889
890
891
  	if (bps_limit == U64_MAX) {
  		if (wait)
  			*wait = 0;
  		return true;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
892
893
894
895
  	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
  
  	/* Slice has just started. Consider one slice interval */
  	if (!jiffy_elapsed)
297e3d854   Shaohua Li   blk-throttle: mak...
896
  		jiffy_elapsed_rnd = tg->td->throtl_slice;
e43473b7f   Vivek Goyal   blkio: Core imple...
897

297e3d854   Shaohua Li   blk-throttle: mak...
898
  	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
e43473b7f   Vivek Goyal   blkio: Core imple...
899

4599ea49d   Baolin Wang   blk-throttle: Avo...
900
  	tmp = bps_limit * jiffy_elapsed_rnd;
5e901a2b9   Vivek Goyal   blkio-throttle: T...
901
  	do_div(tmp, HZ);
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
902
  	bytes_allowed = tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
903

ea0ea2bc6   Shaohua Li   blk-throttle: cap...
904
  	if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
e43473b7f   Vivek Goyal   blkio: Core imple...
905
906
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
907
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
908
909
910
  	}
  
  	/* Calc approx time to dispatch */
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
911
  	extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
4599ea49d   Baolin Wang   blk-throttle: Avo...
912
  	jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit);
e43473b7f   Vivek Goyal   blkio: Core imple...
913
914
915
916
917
918
919
920
921
  
  	if (!jiffy_wait)
  		jiffy_wait = 1;
  
  	/*
  	 * This wait time is without taking into consideration the rounding
  	 * up we did. Add that time also.
  	 */
  	jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
e43473b7f   Vivek Goyal   blkio: Core imple...
922
923
  	if (wait)
  		*wait = jiffy_wait;
0b6bad7d6   Chengguang Xu   blk-throttle: ret...
924
  	return false;
8e89d13f4   Vivek Goyal   blkio: Implementa...
925
926
927
928
929
930
  }
  
  /*
   * Returns whether one can dispatch a bio or not. Also returns approx number
   * of jiffies to wait before this bio is with-in IO rate and can be dispatched
   */
0f3457f60   Tejun Heo   blk-throttle: add...
931
932
  static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
  			    unsigned long *wait)
8e89d13f4   Vivek Goyal   blkio: Implementa...
933
934
935
  {
  	bool rw = bio_data_dir(bio);
  	unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
4599ea49d   Baolin Wang   blk-throttle: Avo...
936
937
  	u64 bps_limit = tg_bps_limit(tg, rw);
  	u32 iops_limit = tg_iops_limit(tg, rw);
8e89d13f4   Vivek Goyal   blkio: Implementa...
938
939
940
941
942
943
944
  
  	/*
   	 * Currently whole state machine of group depends on first bio
  	 * queued in the group bio list. So one should not be calling
  	 * this function with a different bio if there are other bios
  	 * queued.
  	 */
73f0d49a9   Tejun Heo   blk-throttle: mov...
945
  	BUG_ON(tg->service_queue.nr_queued[rw] &&
c5cc2070b   Tejun Heo   blk-throttle: add...
946
  	       bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
e43473b7f   Vivek Goyal   blkio: Core imple...
947

8e89d13f4   Vivek Goyal   blkio: Implementa...
948
  	/* If tg->bps = -1, then BW is unlimited */
4599ea49d   Baolin Wang   blk-throttle: Avo...
949
  	if (bps_limit == U64_MAX && iops_limit == UINT_MAX) {
8e89d13f4   Vivek Goyal   blkio: Implementa...
950
951
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
952
  		return true;
8e89d13f4   Vivek Goyal   blkio: Implementa...
953
954
955
956
957
  	}
  
  	/*
  	 * If previous slice expired, start a new one otherwise renew/extend
  	 * existing slice to make sure it is at least throtl_slice interval
164c80ed8   Vivek Goyal   blk-throttle: Ext...
958
959
960
  	 * long since now. New slice is started only for empty throttle group.
  	 * If there is queued bio, that means there should be an active
  	 * slice and it should be extended instead.
8e89d13f4   Vivek Goyal   blkio: Implementa...
961
  	 */
164c80ed8   Vivek Goyal   blk-throttle: Ext...
962
  	if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
0f3457f60   Tejun Heo   blk-throttle: add...
963
  		throtl_start_new_slice(tg, rw);
8e89d13f4   Vivek Goyal   blkio: Implementa...
964
  	else {
297e3d854   Shaohua Li   blk-throttle: mak...
965
966
967
968
  		if (time_before(tg->slice_end[rw],
  		    jiffies + tg->td->throtl_slice))
  			throtl_extend_slice(tg, rw,
  				jiffies + tg->td->throtl_slice);
8e89d13f4   Vivek Goyal   blkio: Implementa...
969
  	}
4f1e9630a   Chunguang Xu   blk-throtl: optim...
970
971
  	if (iops_limit != UINT_MAX)
  		tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
4599ea49d   Baolin Wang   blk-throttle: Avo...
972
973
  	if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
  	    tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
8e89d13f4   Vivek Goyal   blkio: Implementa...
974
975
  		if (wait)
  			*wait = 0;
0b6bad7d6   Chengguang Xu   blk-throttle: ret...
976
  		return true;
8e89d13f4   Vivek Goyal   blkio: Implementa...
977
978
979
980
981
982
983
984
  	}
  
  	max_wait = max(bps_wait, iops_wait);
  
  	if (wait)
  		*wait = max_wait;
  
  	if (time_before(tg->slice_end[rw], jiffies + max_wait))
0f3457f60   Tejun Heo   blk-throttle: add...
985
  		throtl_extend_slice(tg, rw, jiffies + max_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
986

0b6bad7d6   Chengguang Xu   blk-throttle: ret...
987
  	return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
988
989
990
991
992
  }
  
  static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
  {
  	bool rw = bio_data_dir(bio);
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
993
  	unsigned int bio_size = throtl_bio_data_size(bio);
e43473b7f   Vivek Goyal   blkio: Core imple...
994
995
  
  	/* Charge the bio to the group */
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
996
  	tg->bytes_disp[rw] += bio_size;
8e89d13f4   Vivek Goyal   blkio: Implementa...
997
  	tg->io_disp[rw]++;
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
998
  	tg->last_bytes_disp[rw] += bio_size;
3f0abd806   Shaohua Li   blk-throttle: add...
999
  	tg->last_io_disp[rw]++;
e43473b7f   Vivek Goyal   blkio: Core imple...
1000

2a0f61e6e   Tejun Heo   blk-throttle: set...
1001
  	/*
8d2bbd4c8   Christoph Hellwig   block: replace RE...
1002
  	 * BIO_THROTTLED is used to prevent the same bio to be throttled
2a0f61e6e   Tejun Heo   blk-throttle: set...
1003
1004
1005
  	 * more than once as a throttled bio will go through blk-throtl the
  	 * second time when it eventually gets issued.  Set it when a bio
  	 * is being charged to a tg.
2a0f61e6e   Tejun Heo   blk-throttle: set...
1006
  	 */
8d2bbd4c8   Christoph Hellwig   block: replace RE...
1007
1008
  	if (!bio_flagged(bio, BIO_THROTTLED))
  		bio_set_flag(bio, BIO_THROTTLED);
e43473b7f   Vivek Goyal   blkio: Core imple...
1009
  }
c5cc2070b   Tejun Heo   blk-throttle: add...
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
  /**
   * throtl_add_bio_tg - add a bio to the specified throtl_grp
   * @bio: bio to add
   * @qn: qnode to use
   * @tg: the target throtl_grp
   *
   * Add @bio to @tg's service_queue using @qn.  If @qn is not specified,
   * tg->qnode_on_self[] is used.
   */
  static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
  			      struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
1021
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
1022
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
1023
  	bool rw = bio_data_dir(bio);
c5cc2070b   Tejun Heo   blk-throttle: add...
1024
1025
  	if (!qn)
  		qn = &tg->qnode_on_self[rw];
0e9f4164b   Tejun Heo   blk-throttle: gen...
1026
1027
1028
1029
  	/*
  	 * If @tg doesn't currently have any bios queued in the same
  	 * direction, queueing @bio can change when @tg should be
  	 * dispatched.  Mark that @tg was empty.  This is automatically
b53b072c4   Baolin Wang   blk-throttle: Fix...
1030
  	 * cleared on the next tg_update_disptime().
0e9f4164b   Tejun Heo   blk-throttle: gen...
1031
1032
1033
  	 */
  	if (!sq->nr_queued[rw])
  		tg->flags |= THROTL_TG_WAS_EMPTY;
c5cc2070b   Tejun Heo   blk-throttle: add...
1034
  	throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
73f0d49a9   Tejun Heo   blk-throttle: mov...
1035
  	sq->nr_queued[rw]++;
77216b048   Tejun Heo   blk-throttle: add...
1036
  	throtl_enqueue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1037
  }
77216b048   Tejun Heo   blk-throttle: add...
1038
  static void tg_update_disptime(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
1039
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
1040
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
1041
1042
  	unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
  	struct bio *bio;
d609af3a1   Markus Elfring   blk-throttle: Adj...
1043
1044
  	bio = throtl_peek_queued(&sq->queued[READ]);
  	if (bio)
0f3457f60   Tejun Heo   blk-throttle: add...
1045
  		tg_may_dispatch(tg, bio, &read_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
1046

d609af3a1   Markus Elfring   blk-throttle: Adj...
1047
1048
  	bio = throtl_peek_queued(&sq->queued[WRITE]);
  	if (bio)
0f3457f60   Tejun Heo   blk-throttle: add...
1049
  		tg_may_dispatch(tg, bio, &write_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
1050
1051
1052
  
  	min_wait = min(read_wait, write_wait);
  	disptime = jiffies + min_wait;
e43473b7f   Vivek Goyal   blkio: Core imple...
1053
  	/* Update dispatch time */
77216b048   Tejun Heo   blk-throttle: add...
1054
  	throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1055
  	tg->disptime = disptime;
77216b048   Tejun Heo   blk-throttle: add...
1056
  	throtl_enqueue_tg(tg);
0e9f4164b   Tejun Heo   blk-throttle: gen...
1057
1058
1059
  
  	/* see throtl_add_bio_tg() */
  	tg->flags &= ~THROTL_TG_WAS_EMPTY;
e43473b7f   Vivek Goyal   blkio: Core imple...
1060
  }
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
1061
1062
1063
1064
1065
1066
1067
1068
1069
  static void start_parent_slice_with_credit(struct throtl_grp *child_tg,
  					struct throtl_grp *parent_tg, bool rw)
  {
  	if (throtl_slice_used(parent_tg, rw)) {
  		throtl_start_new_slice_with_credit(parent_tg, rw,
  				child_tg->slice_start[rw]);
  	}
  
  }
77216b048   Tejun Heo   blk-throttle: add...
1070
  static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
1071
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
1072
  	struct throtl_service_queue *sq = &tg->service_queue;
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1073
1074
  	struct throtl_service_queue *parent_sq = sq->parent_sq;
  	struct throtl_grp *parent_tg = sq_to_tg(parent_sq);
c5cc2070b   Tejun Heo   blk-throttle: add...
1075
  	struct throtl_grp *tg_to_put = NULL;
e43473b7f   Vivek Goyal   blkio: Core imple...
1076
  	struct bio *bio;
c5cc2070b   Tejun Heo   blk-throttle: add...
1077
1078
1079
1080
1081
1082
1083
  	/*
  	 * @bio is being transferred from @tg to @parent_sq.  Popping a bio
  	 * from @tg may put its reference and @parent_sq might end up
  	 * getting released prematurely.  Remember the tg to put and put it
  	 * after @bio is transferred to @parent_sq.
  	 */
  	bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
73f0d49a9   Tejun Heo   blk-throttle: mov...
1084
  	sq->nr_queued[rw]--;
e43473b7f   Vivek Goyal   blkio: Core imple...
1085
1086
  
  	throtl_charge_bio(tg, bio);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1087
1088
1089
1090
1091
1092
1093
1094
1095
  
  	/*
  	 * If our parent is another tg, we just need to transfer @bio to
  	 * the parent using throtl_add_bio_tg().  If our parent is
  	 * @td->service_queue, @bio is ready to be issued.  Put it on its
  	 * bio_lists[] and decrease total number queued.  The caller is
  	 * responsible for issuing these bios.
  	 */
  	if (parent_tg) {
c5cc2070b   Tejun Heo   blk-throttle: add...
1096
  		throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg);
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
1097
  		start_parent_slice_with_credit(tg, parent_tg, rw);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1098
  	} else {
c5cc2070b   Tejun Heo   blk-throttle: add...
1099
1100
  		throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw],
  				     &parent_sq->queued[rw]);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1101
1102
1103
  		BUG_ON(tg->td->nr_queued[rw] <= 0);
  		tg->td->nr_queued[rw]--;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
1104

0f3457f60   Tejun Heo   blk-throttle: add...
1105
  	throtl_trim_slice(tg, rw);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1106

c5cc2070b   Tejun Heo   blk-throttle: add...
1107
1108
  	if (tg_to_put)
  		blkg_put(tg_to_blkg(tg_to_put));
e43473b7f   Vivek Goyal   blkio: Core imple...
1109
  }
77216b048   Tejun Heo   blk-throttle: add...
1110
  static int throtl_dispatch_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
1111
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
1112
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
1113
  	unsigned int nr_reads = 0, nr_writes = 0;
e675df2ad   Baolin Wang   blk-throttle: Def...
1114
1115
  	unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4;
  	unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads;
e43473b7f   Vivek Goyal   blkio: Core imple...
1116
1117
1118
  	struct bio *bio;
  
  	/* Try to dispatch 75% READS and 25% WRITES */
c5cc2070b   Tejun Heo   blk-throttle: add...
1119
  	while ((bio = throtl_peek_queued(&sq->queued[READ])) &&
0f3457f60   Tejun Heo   blk-throttle: add...
1120
  	       tg_may_dispatch(tg, bio, NULL)) {
e43473b7f   Vivek Goyal   blkio: Core imple...
1121

77216b048   Tejun Heo   blk-throttle: add...
1122
  		tg_dispatch_one_bio(tg, bio_data_dir(bio));
e43473b7f   Vivek Goyal   blkio: Core imple...
1123
1124
1125
1126
1127
  		nr_reads++;
  
  		if (nr_reads >= max_nr_reads)
  			break;
  	}
c5cc2070b   Tejun Heo   blk-throttle: add...
1128
  	while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&
0f3457f60   Tejun Heo   blk-throttle: add...
1129
  	       tg_may_dispatch(tg, bio, NULL)) {
e43473b7f   Vivek Goyal   blkio: Core imple...
1130

77216b048   Tejun Heo   blk-throttle: add...
1131
  		tg_dispatch_one_bio(tg, bio_data_dir(bio));
e43473b7f   Vivek Goyal   blkio: Core imple...
1132
1133
1134
1135
1136
1137
1138
1139
  		nr_writes++;
  
  		if (nr_writes >= max_nr_writes)
  			break;
  	}
  
  	return nr_reads + nr_writes;
  }
651930bc1   Tejun Heo   blk-throttle: dis...
1140
  static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
1141
1142
  {
  	unsigned int nr_disp = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
1143
1144
  
  	while (1) {
2397611ac   Baolin Wang   blk-throttle: Mov...
1145
  		struct throtl_grp *tg;
2ab74cd29   Liu Bo   blk-throttle: fix...
1146
  		struct throtl_service_queue *sq;
e43473b7f   Vivek Goyal   blkio: Core imple...
1147

2397611ac   Baolin Wang   blk-throttle: Mov...
1148
1149
1150
1151
  		if (!parent_sq->nr_pending)
  			break;
  
  		tg = throtl_rb_first(parent_sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
1152
1153
1154
1155
1156
  		if (!tg)
  			break;
  
  		if (time_before(jiffies, tg->disptime))
  			break;
77216b048   Tejun Heo   blk-throttle: add...
1157
  		throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1158

77216b048   Tejun Heo   blk-throttle: add...
1159
  		nr_disp += throtl_dispatch_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1160

2ab74cd29   Liu Bo   blk-throttle: fix...
1161
  		sq = &tg->service_queue;
73f0d49a9   Tejun Heo   blk-throttle: mov...
1162
  		if (sq->nr_queued[0] || sq->nr_queued[1])
77216b048   Tejun Heo   blk-throttle: add...
1163
  			tg_update_disptime(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1164

e675df2ad   Baolin Wang   blk-throttle: Def...
1165
  		if (nr_disp >= THROTL_QUANTUM)
e43473b7f   Vivek Goyal   blkio: Core imple...
1166
1167
1168
1169
1170
  			break;
  	}
  
  	return nr_disp;
  }
c79892c55   Shaohua Li   blk-throttle: add...
1171
1172
  static bool throtl_can_upgrade(struct throtl_data *td,
  	struct throtl_grp *this_tg);
6e1a5704c   Tejun Heo   blk-throttle: dis...
1173
1174
  /**
   * throtl_pending_timer_fn - timer function for service_queue->pending_timer
216382dcc   Bart Van Assche   block: Fix throtl...
1175
   * @t: the pending_timer member of the throtl_service_queue being serviced
6e1a5704c   Tejun Heo   blk-throttle: dis...
1176
1177
1178
1179
   *
   * This timer is armed when a child throtl_grp with active bio's become
   * pending and queued on the service_queue's pending_tree and expires when
   * the first child throtl_grp should be dispatched.  This function
2e48a530a   Tejun Heo   blk-throttle: mak...
1180
1181
1182
1183
1184
1185
1186
   * dispatches bio's from the children throtl_grps to the parent
   * service_queue.
   *
   * If the parent's parent is another throtl_grp, dispatching is propagated
   * by either arming its pending_timer or repeating dispatch directly.  If
   * the top-level service_tree is reached, throtl_data->dispatch_work is
   * kicked so that the ready bio's are issued.
6e1a5704c   Tejun Heo   blk-throttle: dis...
1187
   */
e99e88a9d   Kees Cook   treewide: setup_t...
1188
  static void throtl_pending_timer_fn(struct timer_list *t)
69df0ab03   Tejun Heo   blk-throttle: sep...
1189
  {
e99e88a9d   Kees Cook   treewide: setup_t...
1190
  	struct throtl_service_queue *sq = from_timer(sq, t, pending_timer);
2e48a530a   Tejun Heo   blk-throttle: mak...
1191
  	struct throtl_grp *tg = sq_to_tg(sq);
69df0ab03   Tejun Heo   blk-throttle: sep...
1192
  	struct throtl_data *td = sq_to_td(sq);
cb76199c3   Tejun Heo   blk-throttle: col...
1193
  	struct request_queue *q = td->queue;
2e48a530a   Tejun Heo   blk-throttle: mak...
1194
1195
  	struct throtl_service_queue *parent_sq;
  	bool dispatched;
6e1a5704c   Tejun Heo   blk-throttle: dis...
1196
  	int ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
1197

0d945c1f9   Christoph Hellwig   block: remove the...
1198
  	spin_lock_irq(&q->queue_lock);
c79892c55   Shaohua Li   blk-throttle: add...
1199
1200
  	if (throtl_can_upgrade(td, NULL))
  		throtl_upgrade_state(td);
2e48a530a   Tejun Heo   blk-throttle: mak...
1201
1202
1203
  again:
  	parent_sq = sq->parent_sq;
  	dispatched = false;
e43473b7f   Vivek Goyal   blkio: Core imple...
1204

7f52f98c2   Tejun Heo   blk-throttle: imp...
1205
1206
  	while (true) {
  		throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u",
2e48a530a   Tejun Heo   blk-throttle: mak...
1207
1208
  			   sq->nr_queued[READ] + sq->nr_queued[WRITE],
  			   sq->nr_queued[READ], sq->nr_queued[WRITE]);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1209
1210
1211
  
  		ret = throtl_select_dispatch(sq);
  		if (ret) {
7f52f98c2   Tejun Heo   blk-throttle: imp...
1212
1213
1214
  			throtl_log(sq, "bios disp=%u", ret);
  			dispatched = true;
  		}
e43473b7f   Vivek Goyal   blkio: Core imple...
1215

7f52f98c2   Tejun Heo   blk-throttle: imp...
1216
1217
  		if (throtl_schedule_next_dispatch(sq, false))
  			break;
e43473b7f   Vivek Goyal   blkio: Core imple...
1218

7f52f98c2   Tejun Heo   blk-throttle: imp...
1219
  		/* this dispatch windows is still open, relax and repeat */
0d945c1f9   Christoph Hellwig   block: remove the...
1220
  		spin_unlock_irq(&q->queue_lock);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1221
  		cpu_relax();
0d945c1f9   Christoph Hellwig   block: remove the...
1222
  		spin_lock_irq(&q->queue_lock);
651930bc1   Tejun Heo   blk-throttle: dis...
1223
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
1224

2e48a530a   Tejun Heo   blk-throttle: mak...
1225
1226
  	if (!dispatched)
  		goto out_unlock;
6e1a5704c   Tejun Heo   blk-throttle: dis...
1227

2e48a530a   Tejun Heo   blk-throttle: mak...
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
  	if (parent_sq) {
  		/* @parent_sq is another throl_grp, propagate dispatch */
  		if (tg->flags & THROTL_TG_WAS_EMPTY) {
  			tg_update_disptime(tg);
  			if (!throtl_schedule_next_dispatch(parent_sq, false)) {
  				/* window is already open, repeat dispatching */
  				sq = parent_sq;
  				tg = sq_to_tg(sq);
  				goto again;
  			}
  		}
  	} else {
b53b072c4   Baolin Wang   blk-throttle: Fix...
1240
  		/* reached the top-level, queue issuing */
2e48a530a   Tejun Heo   blk-throttle: mak...
1241
1242
1243
  		queue_work(kthrotld_workqueue, &td->dispatch_work);
  	}
  out_unlock:
0d945c1f9   Christoph Hellwig   block: remove the...
1244
  	spin_unlock_irq(&q->queue_lock);
6e1a5704c   Tejun Heo   blk-throttle: dis...
1245
  }
e43473b7f   Vivek Goyal   blkio: Core imple...
1246

6e1a5704c   Tejun Heo   blk-throttle: dis...
1247
1248
1249
1250
  /**
   * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
   * @work: work item being executed
   *
b53b072c4   Baolin Wang   blk-throttle: Fix...
1251
1252
   * This function is queued for execution when bios reach the bio_lists[]
   * of throtl_data->service_queue.  Those bios are ready and issued by this
6e1a5704c   Tejun Heo   blk-throttle: dis...
1253
1254
   * function.
   */
8876e140e   Fabian Frederick   block/blk-throttl...
1255
  static void blk_throtl_dispatch_work_fn(struct work_struct *work)
6e1a5704c   Tejun Heo   blk-throttle: dis...
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
  {
  	struct throtl_data *td = container_of(work, struct throtl_data,
  					      dispatch_work);
  	struct throtl_service_queue *td_sq = &td->service_queue;
  	struct request_queue *q = td->queue;
  	struct bio_list bio_list_on_stack;
  	struct bio *bio;
  	struct blk_plug plug;
  	int rw;
  
  	bio_list_init(&bio_list_on_stack);
0d945c1f9   Christoph Hellwig   block: remove the...
1267
  	spin_lock_irq(&q->queue_lock);
c5cc2070b   Tejun Heo   blk-throttle: add...
1268
1269
1270
  	for (rw = READ; rw <= WRITE; rw++)
  		while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL)))
  			bio_list_add(&bio_list_on_stack, bio);
0d945c1f9   Christoph Hellwig   block: remove the...
1271
  	spin_unlock_irq(&q->queue_lock);
6e1a5704c   Tejun Heo   blk-throttle: dis...
1272
1273
  
  	if (!bio_list_empty(&bio_list_on_stack)) {
69d60eb96   Vivek Goyal   blk-throttle: Use...
1274
  		blk_start_plug(&plug);
ed00aabd5   Christoph Hellwig   block: rename gen...
1275
1276
  		while ((bio = bio_list_pop(&bio_list_on_stack)))
  			submit_bio_noacct(bio);
69d60eb96   Vivek Goyal   blk-throttle: Use...
1277
  		blk_finish_plug(&plug);
e43473b7f   Vivek Goyal   blkio: Core imple...
1278
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
1279
  }
f95a04afa   Tejun Heo   blkcg: embed stru...
1280
1281
  static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
  			      int off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1282
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
1283
1284
  	struct throtl_grp *tg = pd_to_tg(pd);
  	u64 v = *(u64 *)((void *)tg + off);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1285

2ab5492de   Shaohua Li   blk-throttle: use...
1286
  	if (v == U64_MAX)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1287
  		return 0;
f95a04afa   Tejun Heo   blkcg: embed stru...
1288
  	return __blkg_prfill_u64(sf, pd, v);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1289
  }
f95a04afa   Tejun Heo   blkcg: embed stru...
1290
1291
  static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
  			       int off)
e43473b7f   Vivek Goyal   blkio: Core imple...
1292
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
1293
1294
  	struct throtl_grp *tg = pd_to_tg(pd);
  	unsigned int v = *(unsigned int *)((void *)tg + off);
fe0714377   Vivek Goyal   blkio: Recalculat...
1295

2ab5492de   Shaohua Li   blk-throttle: use...
1296
  	if (v == UINT_MAX)
af133ceb2   Tejun Heo   blkcg: move blkio...
1297
  		return 0;
f95a04afa   Tejun Heo   blkcg: embed stru...
1298
  	return __blkg_prfill_u64(sf, pd, v);
e43473b7f   Vivek Goyal   blkio: Core imple...
1299
  }
2da8ca822   Tejun Heo   cgroup: replace c...
1300
  static int tg_print_conf_u64(struct seq_file *sf, void *v)
8e89d13f4   Vivek Goyal   blkio: Implementa...
1301
  {
2da8ca822   Tejun Heo   cgroup: replace c...
1302
1303
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
af133ceb2   Tejun Heo   blkcg: move blkio...
1304
  	return 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
1305
  }
2da8ca822   Tejun Heo   cgroup: replace c...
1306
  static int tg_print_conf_uint(struct seq_file *sf, void *v)
8e89d13f4   Vivek Goyal   blkio: Implementa...
1307
  {
2da8ca822   Tejun Heo   cgroup: replace c...
1308
1309
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
af133ceb2   Tejun Heo   blkcg: move blkio...
1310
  	return 0;
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1311
  }
9bb67aeb9   Shaohua Li   blk-throttle: res...
1312
  static void tg_conf_updated(struct throtl_grp *tg, bool global)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1313
  {
69948b070   Tejun Heo   blkcg: separate o...
1314
  	struct throtl_service_queue *sq = &tg->service_queue;
492eb21b9   Tejun Heo   cgroup: make hier...
1315
  	struct cgroup_subsys_state *pos_css;
69948b070   Tejun Heo   blkcg: separate o...
1316
  	struct blkcg_gq *blkg;
af133ceb2   Tejun Heo   blkcg: move blkio...
1317

fda6f272c   Tejun Heo   blk-throttle: imp...
1318
1319
  	throtl_log(&tg->service_queue,
  		   "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
9f626e372   Shaohua Li   blk-throttle: pre...
1320
1321
  		   tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE),
  		   tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE));
632b44935   Tejun Heo   blk-throttle: rem...
1322
1323
  
  	/*
693e751e7   Tejun Heo   blk-throttle: imp...
1324
1325
1326
1327
1328
1329
  	 * Update has_rules[] flags for the updated tg's subtree.  A tg is
  	 * considered to have rules if either the tg itself or any of its
  	 * ancestors has rules.  This identifies groups without any
  	 * restrictions in the whole hierarchy and allows them to bypass
  	 * blk-throttle.
  	 */
9bb67aeb9   Shaohua Li   blk-throttle: res...
1330
1331
  	blkg_for_each_descendant_pre(blkg, pos_css,
  			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
5b81fc3cc   Shaohua Li   blk-throttle: add...
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
  		struct throtl_grp *this_tg = blkg_to_tg(blkg);
  		struct throtl_grp *parent_tg;
  
  		tg_update_has_rules(this_tg);
  		/* ignore root/second level */
  		if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
  		    !blkg->parent->parent)
  			continue;
  		parent_tg = blkg_to_tg(blkg->parent);
  		/*
  		 * make sure all children has lower idle time threshold and
  		 * higher latency target
  		 */
  		this_tg->idletime_threshold = min(this_tg->idletime_threshold,
  				parent_tg->idletime_threshold);
  		this_tg->latency_target = max(this_tg->latency_target,
  				parent_tg->latency_target);
  	}
693e751e7   Tejun Heo   blk-throttle: imp...
1350
1351
  
  	/*
632b44935   Tejun Heo   blk-throttle: rem...
1352
1353
1354
1355
1356
1357
1358
  	 * We're already holding queue_lock and know @tg is valid.  Let's
  	 * apply the new config directly.
  	 *
  	 * Restart the slices for both READ and WRITES. It might happen
  	 * that a group's limit are dropped suddenly and we don't want to
  	 * account recently dispatched IO with new low rate.
  	 */
ff8b22c0f   Baolin Wang   blk-throttle: Use...
1359
1360
  	throtl_start_new_slice(tg, READ);
  	throtl_start_new_slice(tg, WRITE);
632b44935   Tejun Heo   blk-throttle: rem...
1361

5b2c16aae   Tejun Heo   blk-throttle: sim...
1362
  	if (tg->flags & THROTL_TG_PENDING) {
77216b048   Tejun Heo   blk-throttle: add...
1363
  		tg_update_disptime(tg);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1364
  		throtl_schedule_next_dispatch(sq->parent_sq, true);
632b44935   Tejun Heo   blk-throttle: rem...
1365
  	}
69948b070   Tejun Heo   blkcg: separate o...
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
  }
  
  static ssize_t tg_set_conf(struct kernfs_open_file *of,
  			   char *buf, size_t nbytes, loff_t off, bool is_u64)
  {
  	struct blkcg *blkcg = css_to_blkcg(of_css(of));
  	struct blkg_conf_ctx ctx;
  	struct throtl_grp *tg;
  	int ret;
  	u64 v;
  
  	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
  	if (ret)
  		return ret;
  
  	ret = -EINVAL;
  	if (sscanf(ctx.body, "%llu", &v) != 1)
  		goto out_finish;
  	if (!v)
2ab5492de   Shaohua Li   blk-throttle: use...
1385
  		v = U64_MAX;
69948b070   Tejun Heo   blkcg: separate o...
1386
1387
1388
1389
1390
1391
1392
  
  	tg = blkg_to_tg(ctx.blkg);
  
  	if (is_u64)
  		*(u64 *)((void *)tg + of_cft(of)->private) = v;
  	else
  		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1393

9bb67aeb9   Shaohua Li   blk-throttle: res...
1394
  	tg_conf_updated(tg, false);
36aa9e5f5   Tejun Heo   blkcg: move body ...
1395
1396
  	ret = 0;
  out_finish:
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1397
  	blkg_conf_finish(&ctx);
36aa9e5f5   Tejun Heo   blkcg: move body ...
1398
  	return ret ?: nbytes;
8e89d13f4   Vivek Goyal   blkio: Implementa...
1399
  }
451af504d   Tejun Heo   cgroup: replace c...
1400
1401
  static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
  			       char *buf, size_t nbytes, loff_t off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1402
  {
451af504d   Tejun Heo   cgroup: replace c...
1403
  	return tg_set_conf(of, buf, nbytes, off, true);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1404
  }
451af504d   Tejun Heo   cgroup: replace c...
1405
1406
  static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
  				char *buf, size_t nbytes, loff_t off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1407
  {
451af504d   Tejun Heo   cgroup: replace c...
1408
  	return tg_set_conf(of, buf, nbytes, off, false);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1409
  }
7ca464383   Tejun Heo   blk-throtl: stop ...
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
  static int tg_print_rwstat(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
  			  blkg_prfill_rwstat, &blkcg_policy_throtl,
  			  seq_cft(sf)->private, true);
  	return 0;
  }
  
  static u64 tg_prfill_rwstat_recursive(struct seq_file *sf,
  				      struct blkg_policy_data *pd, int off)
  {
  	struct blkg_rwstat_sample sum;
  
  	blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off,
  				  &sum);
  	return __blkg_prfill_rwstat(sf, pd, &sum);
  }
  
  static int tg_print_rwstat_recursive(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
  			  tg_prfill_rwstat_recursive, &blkcg_policy_throtl,
  			  seq_cft(sf)->private, true);
  	return 0;
  }
880f50e22   Tejun Heo   blkcg: mark exist...
1435
  static struct cftype throtl_legacy_files[] = {
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1436
1437
  	{
  		.name = "throttle.read_bps_device",
9f626e372   Shaohua Li   blk-throttle: pre...
1438
  		.private = offsetof(struct throtl_grp, bps[READ][LIMIT_MAX]),
2da8ca822   Tejun Heo   cgroup: replace c...
1439
  		.seq_show = tg_print_conf_u64,
451af504d   Tejun Heo   cgroup: replace c...
1440
  		.write = tg_set_conf_u64,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1441
1442
1443
  	},
  	{
  		.name = "throttle.write_bps_device",
9f626e372   Shaohua Li   blk-throttle: pre...
1444
  		.private = offsetof(struct throtl_grp, bps[WRITE][LIMIT_MAX]),
2da8ca822   Tejun Heo   cgroup: replace c...
1445
  		.seq_show = tg_print_conf_u64,
451af504d   Tejun Heo   cgroup: replace c...
1446
  		.write = tg_set_conf_u64,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1447
1448
1449
  	},
  	{
  		.name = "throttle.read_iops_device",
9f626e372   Shaohua Li   blk-throttle: pre...
1450
  		.private = offsetof(struct throtl_grp, iops[READ][LIMIT_MAX]),
2da8ca822   Tejun Heo   cgroup: replace c...
1451
  		.seq_show = tg_print_conf_uint,
451af504d   Tejun Heo   cgroup: replace c...
1452
  		.write = tg_set_conf_uint,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1453
1454
1455
  	},
  	{
  		.name = "throttle.write_iops_device",
9f626e372   Shaohua Li   blk-throttle: pre...
1456
  		.private = offsetof(struct throtl_grp, iops[WRITE][LIMIT_MAX]),
2da8ca822   Tejun Heo   cgroup: replace c...
1457
  		.seq_show = tg_print_conf_uint,
451af504d   Tejun Heo   cgroup: replace c...
1458
  		.write = tg_set_conf_uint,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1459
1460
1461
  	},
  	{
  		.name = "throttle.io_service_bytes",
7ca464383   Tejun Heo   blk-throtl: stop ...
1462
1463
  		.private = offsetof(struct throtl_grp, stat_bytes),
  		.seq_show = tg_print_rwstat,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1464
1465
  	},
  	{
17534c6f2   weiping zhang   blk-throttle: exp...
1466
  		.name = "throttle.io_service_bytes_recursive",
7ca464383   Tejun Heo   blk-throtl: stop ...
1467
1468
  		.private = offsetof(struct throtl_grp, stat_bytes),
  		.seq_show = tg_print_rwstat_recursive,
17534c6f2   weiping zhang   blk-throttle: exp...
1469
1470
  	},
  	{
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1471
  		.name = "throttle.io_serviced",
7ca464383   Tejun Heo   blk-throtl: stop ...
1472
1473
  		.private = offsetof(struct throtl_grp, stat_ios),
  		.seq_show = tg_print_rwstat,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1474
  	},
17534c6f2   weiping zhang   blk-throttle: exp...
1475
1476
  	{
  		.name = "throttle.io_serviced_recursive",
7ca464383   Tejun Heo   blk-throtl: stop ...
1477
1478
  		.private = offsetof(struct throtl_grp, stat_ios),
  		.seq_show = tg_print_rwstat_recursive,
17534c6f2   weiping zhang   blk-throttle: exp...
1479
  	},
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1480
1481
  	{ }	/* terminate */
  };
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1482
  static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
2ee867dcf   Tejun Heo   blkcg: implement ...
1483
1484
1485
1486
1487
  			 int off)
  {
  	struct throtl_grp *tg = pd_to_tg(pd);
  	const char *dname = blkg_dev_name(pd->blkg);
  	char bufs[4][21] = { "max", "max", "max", "max" };
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1488
1489
  	u64 bps_dft;
  	unsigned int iops_dft;
ada75b6e5   Shaohua Li   blk-throttle: add...
1490
  	char idle_time[26] = "";
ec80991d6   Shaohua Li   blk-throttle: add...
1491
  	char latency_time[26] = "";
2ee867dcf   Tejun Heo   blkcg: implement ...
1492
1493
1494
  
  	if (!dname)
  		return 0;
9f626e372   Shaohua Li   blk-throttle: pre...
1495

cd5ab1b0f   Shaohua Li   blk-throttle: add...
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
  	if (off == LIMIT_LOW) {
  		bps_dft = 0;
  		iops_dft = 0;
  	} else {
  		bps_dft = U64_MAX;
  		iops_dft = UINT_MAX;
  	}
  
  	if (tg->bps_conf[READ][off] == bps_dft &&
  	    tg->bps_conf[WRITE][off] == bps_dft &&
  	    tg->iops_conf[READ][off] == iops_dft &&
ada75b6e5   Shaohua Li   blk-throttle: add...
1507
  	    tg->iops_conf[WRITE][off] == iops_dft &&
ec80991d6   Shaohua Li   blk-throttle: add...
1508
  	    (off != LIMIT_LOW ||
b4f428ef2   Shaohua Li   blk-throttle: for...
1509
  	     (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
5b81fc3cc   Shaohua Li   blk-throttle: add...
1510
  	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
2ee867dcf   Tejun Heo   blkcg: implement ...
1511
  		return 0;
9bb67aeb9   Shaohua Li   blk-throttle: res...
1512
  	if (tg->bps_conf[READ][off] != U64_MAX)
9f626e372   Shaohua Li   blk-throttle: pre...
1513
  		snprintf(bufs[0], sizeof(bufs[0]), "%llu",
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1514
  			tg->bps_conf[READ][off]);
9bb67aeb9   Shaohua Li   blk-throttle: res...
1515
  	if (tg->bps_conf[WRITE][off] != U64_MAX)
9f626e372   Shaohua Li   blk-throttle: pre...
1516
  		snprintf(bufs[1], sizeof(bufs[1]), "%llu",
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1517
  			tg->bps_conf[WRITE][off]);
9bb67aeb9   Shaohua Li   blk-throttle: res...
1518
  	if (tg->iops_conf[READ][off] != UINT_MAX)
9f626e372   Shaohua Li   blk-throttle: pre...
1519
  		snprintf(bufs[2], sizeof(bufs[2]), "%u",
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1520
  			tg->iops_conf[READ][off]);
9bb67aeb9   Shaohua Li   blk-throttle: res...
1521
  	if (tg->iops_conf[WRITE][off] != UINT_MAX)
9f626e372   Shaohua Li   blk-throttle: pre...
1522
  		snprintf(bufs[3], sizeof(bufs[3]), "%u",
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1523
  			tg->iops_conf[WRITE][off]);
ada75b6e5   Shaohua Li   blk-throttle: add...
1524
  	if (off == LIMIT_LOW) {
5b81fc3cc   Shaohua Li   blk-throttle: add...
1525
  		if (tg->idletime_threshold_conf == ULONG_MAX)
ada75b6e5   Shaohua Li   blk-throttle: add...
1526
1527
1528
  			strcpy(idle_time, " idle=max");
  		else
  			snprintf(idle_time, sizeof(idle_time), " idle=%lu",
5b81fc3cc   Shaohua Li   blk-throttle: add...
1529
  				tg->idletime_threshold_conf);
ec80991d6   Shaohua Li   blk-throttle: add...
1530

5b81fc3cc   Shaohua Li   blk-throttle: add...
1531
  		if (tg->latency_target_conf == ULONG_MAX)
ec80991d6   Shaohua Li   blk-throttle: add...
1532
1533
1534
  			strcpy(latency_time, " latency=max");
  		else
  			snprintf(latency_time, sizeof(latency_time),
5b81fc3cc   Shaohua Li   blk-throttle: add...
1535
  				" latency=%lu", tg->latency_target_conf);
ada75b6e5   Shaohua Li   blk-throttle: add...
1536
  	}
2ee867dcf   Tejun Heo   blkcg: implement ...
1537

ec80991d6   Shaohua Li   blk-throttle: add...
1538
1539
1540
1541
  	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s
  ",
  		   dname, bufs[0], bufs[1], bufs[2], bufs[3], idle_time,
  		   latency_time);
2ee867dcf   Tejun Heo   blkcg: implement ...
1542
1543
  	return 0;
  }
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1544
  static int tg_print_limit(struct seq_file *sf, void *v)
2ee867dcf   Tejun Heo   blkcg: implement ...
1545
  {
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1546
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_limit,
2ee867dcf   Tejun Heo   blkcg: implement ...
1547
1548
1549
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
  	return 0;
  }
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1550
  static ssize_t tg_set_limit(struct kernfs_open_file *of,
2ee867dcf   Tejun Heo   blkcg: implement ...
1551
1552
1553
1554
1555
1556
  			  char *buf, size_t nbytes, loff_t off)
  {
  	struct blkcg *blkcg = css_to_blkcg(of_css(of));
  	struct blkg_conf_ctx ctx;
  	struct throtl_grp *tg;
  	u64 v[4];
ada75b6e5   Shaohua Li   blk-throttle: add...
1557
  	unsigned long idle_time;
ec80991d6   Shaohua Li   blk-throttle: add...
1558
  	unsigned long latency_time;
2ee867dcf   Tejun Heo   blkcg: implement ...
1559
  	int ret;
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1560
  	int index = of_cft(of)->private;
2ee867dcf   Tejun Heo   blkcg: implement ...
1561
1562
1563
1564
1565
1566
  
  	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
  	if (ret)
  		return ret;
  
  	tg = blkg_to_tg(ctx.blkg);
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1567
1568
1569
1570
  	v[0] = tg->bps_conf[READ][index];
  	v[1] = tg->bps_conf[WRITE][index];
  	v[2] = tg->iops_conf[READ][index];
  	v[3] = tg->iops_conf[WRITE][index];
2ee867dcf   Tejun Heo   blkcg: implement ...
1571

5b81fc3cc   Shaohua Li   blk-throttle: add...
1572
1573
  	idle_time = tg->idletime_threshold_conf;
  	latency_time = tg->latency_target_conf;
2ee867dcf   Tejun Heo   blkcg: implement ...
1574
1575
1576
  	while (true) {
  		char tok[27];	/* wiops=18446744073709551616 */
  		char *p;
2ab5492de   Shaohua Li   blk-throttle: use...
1577
  		u64 val = U64_MAX;
2ee867dcf   Tejun Heo   blkcg: implement ...
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
  		int len;
  
  		if (sscanf(ctx.body, "%26s%n", tok, &len) != 1)
  			break;
  		if (tok[0] == '\0')
  			break;
  		ctx.body += len;
  
  		ret = -EINVAL;
  		p = tok;
  		strsep(&p, "=");
  		if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max")))
  			goto out_finish;
  
  		ret = -ERANGE;
  		if (!val)
  			goto out_finish;
  
  		ret = -EINVAL;
5b7048b89   Baolin Wang   blk-throttle: Fix...
1597
  		if (!strcmp(tok, "rbps") && val > 1)
2ee867dcf   Tejun Heo   blkcg: implement ...
1598
  			v[0] = val;
5b7048b89   Baolin Wang   blk-throttle: Fix...
1599
  		else if (!strcmp(tok, "wbps") && val > 1)
2ee867dcf   Tejun Heo   blkcg: implement ...
1600
  			v[1] = val;
5b7048b89   Baolin Wang   blk-throttle: Fix...
1601
  		else if (!strcmp(tok, "riops") && val > 1)
2ee867dcf   Tejun Heo   blkcg: implement ...
1602
  			v[2] = min_t(u64, val, UINT_MAX);
5b7048b89   Baolin Wang   blk-throttle: Fix...
1603
  		else if (!strcmp(tok, "wiops") && val > 1)
2ee867dcf   Tejun Heo   blkcg: implement ...
1604
  			v[3] = min_t(u64, val, UINT_MAX);
ada75b6e5   Shaohua Li   blk-throttle: add...
1605
1606
  		else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
  			idle_time = val;
ec80991d6   Shaohua Li   blk-throttle: add...
1607
1608
  		else if (off == LIMIT_LOW && !strcmp(tok, "latency"))
  			latency_time = val;
2ee867dcf   Tejun Heo   blkcg: implement ...
1609
1610
1611
  		else
  			goto out_finish;
  	}
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1612
1613
1614
1615
  	tg->bps_conf[READ][index] = v[0];
  	tg->bps_conf[WRITE][index] = v[1];
  	tg->iops_conf[READ][index] = v[2];
  	tg->iops_conf[WRITE][index] = v[3];
2ee867dcf   Tejun Heo   blkcg: implement ...
1616

cd5ab1b0f   Shaohua Li   blk-throttle: add...
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
  	if (index == LIMIT_MAX) {
  		tg->bps[READ][index] = v[0];
  		tg->bps[WRITE][index] = v[1];
  		tg->iops[READ][index] = v[2];
  		tg->iops[WRITE][index] = v[3];
  	}
  	tg->bps[READ][LIMIT_LOW] = min(tg->bps_conf[READ][LIMIT_LOW],
  		tg->bps_conf[READ][LIMIT_MAX]);
  	tg->bps[WRITE][LIMIT_LOW] = min(tg->bps_conf[WRITE][LIMIT_LOW],
  		tg->bps_conf[WRITE][LIMIT_MAX]);
  	tg->iops[READ][LIMIT_LOW] = min(tg->iops_conf[READ][LIMIT_LOW],
  		tg->iops_conf[READ][LIMIT_MAX]);
  	tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
  		tg->iops_conf[WRITE][LIMIT_MAX]);
b4f428ef2   Shaohua Li   blk-throttle: for...
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
  	tg->idletime_threshold_conf = idle_time;
  	tg->latency_target_conf = latency_time;
  
  	/* force user to configure all settings for low limit  */
  	if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
  	      tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
  	    tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
  	    tg->latency_target_conf == DFL_LATENCY_TARGET) {
  		tg->bps[READ][LIMIT_LOW] = 0;
  		tg->bps[WRITE][LIMIT_LOW] = 0;
  		tg->iops[READ][LIMIT_LOW] = 0;
  		tg->iops[WRITE][LIMIT_LOW] = 0;
  		tg->idletime_threshold = DFL_IDLE_THRESHOLD;
  		tg->latency_target = DFL_LATENCY_TARGET;
  	} else if (index == LIMIT_LOW) {
5b81fc3cc   Shaohua Li   blk-throttle: add...
1646
  		tg->idletime_threshold = tg->idletime_threshold_conf;
5b81fc3cc   Shaohua Li   blk-throttle: add...
1647
  		tg->latency_target = tg->latency_target_conf;
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1648
  	}
b4f428ef2   Shaohua Li   blk-throttle: for...
1649
1650
1651
1652
1653
1654
1655
  
  	blk_throtl_update_limit_valid(tg->td);
  	if (tg->td->limit_valid[LIMIT_LOW]) {
  		if (index == LIMIT_LOW)
  			tg->td->limit_index = LIMIT_LOW;
  	} else
  		tg->td->limit_index = LIMIT_MAX;
9bb67aeb9   Shaohua Li   blk-throttle: res...
1656
1657
  	tg_conf_updated(tg, index == LIMIT_LOW &&
  		tg->td->limit_valid[LIMIT_LOW]);
2ee867dcf   Tejun Heo   blkcg: implement ...
1658
1659
1660
1661
1662
1663
1664
  	ret = 0;
  out_finish:
  	blkg_conf_finish(&ctx);
  	return ret ?: nbytes;
  }
  
  static struct cftype throtl_files[] = {
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1665
1666
1667
1668
1669
1670
1671
1672
1673
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  	{
  		.name = "low",
  		.flags = CFTYPE_NOT_ON_ROOT,
  		.seq_show = tg_print_limit,
  		.write = tg_set_limit,
  		.private = LIMIT_LOW,
  	},
  #endif
2ee867dcf   Tejun Heo   blkcg: implement ...
1674
1675
1676
  	{
  		.name = "max",
  		.flags = CFTYPE_NOT_ON_ROOT,
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1677
1678
1679
  		.seq_show = tg_print_limit,
  		.write = tg_set_limit,
  		.private = LIMIT_MAX,
2ee867dcf   Tejun Heo   blkcg: implement ...
1680
1681
1682
  	},
  	{ }	/* terminate */
  };
da5277700   Vivek Goyal   block: Move blk_t...
1683
  static void throtl_shutdown_wq(struct request_queue *q)
e43473b7f   Vivek Goyal   blkio: Core imple...
1684
1685
  {
  	struct throtl_data *td = q->td;
69df0ab03   Tejun Heo   blk-throttle: sep...
1686
  	cancel_work_sync(&td->dispatch_work);
e43473b7f   Vivek Goyal   blkio: Core imple...
1687
  }
3c798398e   Tejun Heo   blkcg: mass renam...
1688
  static struct blkcg_policy blkcg_policy_throtl = {
2ee867dcf   Tejun Heo   blkcg: implement ...
1689
  	.dfl_cftypes		= throtl_files,
880f50e22   Tejun Heo   blkcg: mark exist...
1690
  	.legacy_cftypes		= throtl_legacy_files,
f9fcc2d39   Tejun Heo   blkcg: collapse b...
1691

001bea73e   Tejun Heo   blkcg: replace bl...
1692
  	.pd_alloc_fn		= throtl_pd_alloc,
f9fcc2d39   Tejun Heo   blkcg: collapse b...
1693
  	.pd_init_fn		= throtl_pd_init,
693e751e7   Tejun Heo   blk-throttle: imp...
1694
  	.pd_online_fn		= throtl_pd_online,
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1695
  	.pd_offline_fn		= throtl_pd_offline,
001bea73e   Tejun Heo   blkcg: replace bl...
1696
  	.pd_free_fn		= throtl_pd_free,
e43473b7f   Vivek Goyal   blkio: Core imple...
1697
  };
3f0abd806   Shaohua Li   blk-throttle: add...
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
  static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
  {
  	unsigned long rtime = jiffies, wtime = jiffies;
  
  	if (tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW])
  		rtime = tg->last_low_overflow_time[READ];
  	if (tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW])
  		wtime = tg->last_low_overflow_time[WRITE];
  	return min(rtime, wtime);
  }
  
  /* tg should not be an intermediate node */
  static unsigned long tg_last_low_overflow_time(struct throtl_grp *tg)
  {
  	struct throtl_service_queue *parent_sq;
  	struct throtl_grp *parent = tg;
  	unsigned long ret = __tg_last_low_overflow_time(tg);
  
  	while (true) {
  		parent_sq = parent->service_queue.parent_sq;
  		parent = sq_to_tg(parent_sq);
  		if (!parent)
  			break;
  
  		/*
  		 * The parent doesn't have low limit, it always reaches low
  		 * limit. Its overflow time is useless for children
  		 */
  		if (!parent->bps[READ][LIMIT_LOW] &&
  		    !parent->iops[READ][LIMIT_LOW] &&
  		    !parent->bps[WRITE][LIMIT_LOW] &&
  		    !parent->iops[WRITE][LIMIT_LOW])
  			continue;
  		if (time_after(__tg_last_low_overflow_time(parent), ret))
  			ret = __tg_last_low_overflow_time(parent);
  	}
  	return ret;
  }
9e234eeaf   Shaohua Li   blk-throttle: add...
1736
1737
1738
1739
1740
  static bool throtl_tg_is_idle(struct throtl_grp *tg)
  {
  	/*
  	 * cgroup is idle if:
  	 * - single idle is too long, longer than a fixed value (in case user
b4f428ef2   Shaohua Li   blk-throttle: for...
1741
  	 *   configure a too big threshold) or 4 times of idletime threshold
9e234eeaf   Shaohua Li   blk-throttle: add...
1742
  	 * - average think time is more than threshold
53696b8d2   Shaohua Li   blk-throttle: add...
1743
  	 * - IO latency is largely below threshold
9e234eeaf   Shaohua Li   blk-throttle: add...
1744
  	 */
b4f428ef2   Shaohua Li   blk-throttle: for...
1745
  	unsigned long time;
4cff729f6   Shaohua Li   blk-throttle: out...
1746
  	bool ret;
9e234eeaf   Shaohua Li   blk-throttle: add...
1747

b4f428ef2   Shaohua Li   blk-throttle: for...
1748
1749
1750
1751
1752
1753
  	time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
  	ret = tg->latency_target == DFL_LATENCY_TARGET ||
  	      tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
  	      (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
  	      tg->avg_idletime > tg->idletime_threshold ||
  	      (tg->latency_target && tg->bio_cnt &&
53696b8d2   Shaohua Li   blk-throttle: add...
1754
  		tg->bad_bio_cnt * 5 < tg->bio_cnt);
4cff729f6   Shaohua Li   blk-throttle: out...
1755
1756
1757
1758
1759
  	throtl_log(&tg->service_queue,
  		"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
  		tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
  		tg->bio_cnt, ret, tg->td->scale);
  	return ret;
9e234eeaf   Shaohua Li   blk-throttle: add...
1760
  }
c79892c55   Shaohua Li   blk-throttle: add...
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
  static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
  {
  	struct throtl_service_queue *sq = &tg->service_queue;
  	bool read_limit, write_limit;
  
  	/*
  	 * if cgroup reaches low limit (if low limit is 0, the cgroup always
  	 * reaches), it's ok to upgrade to next limit
  	 */
  	read_limit = tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW];
  	write_limit = tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW];
  	if (!read_limit && !write_limit)
  		return true;
  	if (read_limit && sq->nr_queued[READ] &&
  	    (!write_limit || sq->nr_queued[WRITE]))
  		return true;
  	if (write_limit && sq->nr_queued[WRITE] &&
  	    (!read_limit || sq->nr_queued[READ]))
  		return true;
aec242468   Shaohua Li   blk-throttle: det...
1780
1781
  
  	if (time_after_eq(jiffies,
fa6fb5aab   Shaohua Li   blk-throttle: ign...
1782
1783
  		tg_last_low_overflow_time(tg) + tg->td->throtl_slice) &&
  	    throtl_tg_is_idle(tg))
aec242468   Shaohua Li   blk-throttle: det...
1784
  		return true;
c79892c55   Shaohua Li   blk-throttle: add...
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
  	return false;
  }
  
  static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
  {
  	while (true) {
  		if (throtl_tg_can_upgrade(tg))
  			return true;
  		tg = sq_to_tg(tg->service_queue.parent_sq);
  		if (!tg || !tg_to_blkg(tg)->parent)
  			return false;
  	}
  	return false;
  }
  
  static bool throtl_can_upgrade(struct throtl_data *td,
  	struct throtl_grp *this_tg)
  {
  	struct cgroup_subsys_state *pos_css;
  	struct blkcg_gq *blkg;
  
  	if (td->limit_index != LIMIT_LOW)
  		return false;
297e3d854   Shaohua Li   blk-throttle: mak...
1808
  	if (time_before(jiffies, td->low_downgrade_time + td->throtl_slice))
3f0abd806   Shaohua Li   blk-throttle: add...
1809
  		return false;
c79892c55   Shaohua Li   blk-throttle: add...
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
  	rcu_read_lock();
  	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
  		struct throtl_grp *tg = blkg_to_tg(blkg);
  
  		if (tg == this_tg)
  			continue;
  		if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
  			continue;
  		if (!throtl_hierarchy_can_upgrade(tg)) {
  			rcu_read_unlock();
  			return false;
  		}
  	}
  	rcu_read_unlock();
  	return true;
  }
fa6fb5aab   Shaohua Li   blk-throttle: ign...
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
  static void throtl_upgrade_check(struct throtl_grp *tg)
  {
  	unsigned long now = jiffies;
  
  	if (tg->td->limit_index != LIMIT_LOW)
  		return;
  
  	if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
  		return;
  
  	tg->last_check_time = now;
  
  	if (!time_after_eq(now,
  	     __tg_last_low_overflow_time(tg) + tg->td->throtl_slice))
  		return;
  
  	if (throtl_can_upgrade(tg->td, NULL))
  		throtl_upgrade_state(tg->td);
  }
c79892c55   Shaohua Li   blk-throttle: add...
1845
1846
1847
1848
  static void throtl_upgrade_state(struct throtl_data *td)
  {
  	struct cgroup_subsys_state *pos_css;
  	struct blkcg_gq *blkg;
4cff729f6   Shaohua Li   blk-throttle: out...
1849
  	throtl_log(&td->service_queue, "upgrade to max");
c79892c55   Shaohua Li   blk-throttle: add...
1850
  	td->limit_index = LIMIT_MAX;
3f0abd806   Shaohua Li   blk-throttle: add...
1851
  	td->low_upgrade_time = jiffies;
7394e31fa   Shaohua Li   blk-throttle: mak...
1852
  	td->scale = 0;
c79892c55   Shaohua Li   blk-throttle: add...
1853
1854
1855
1856
1857
1858
1859
  	rcu_read_lock();
  	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
  		struct throtl_grp *tg = blkg_to_tg(blkg);
  		struct throtl_service_queue *sq = &tg->service_queue;
  
  		tg->disptime = jiffies - 1;
  		throtl_select_dispatch(sq);
4f02fb761   Joseph Qi   blk-throttle: fix...
1860
  		throtl_schedule_next_dispatch(sq, true);
c79892c55   Shaohua Li   blk-throttle: add...
1861
1862
1863
  	}
  	rcu_read_unlock();
  	throtl_select_dispatch(&td->service_queue);
4f02fb761   Joseph Qi   blk-throttle: fix...
1864
  	throtl_schedule_next_dispatch(&td->service_queue, true);
c79892c55   Shaohua Li   blk-throttle: add...
1865
1866
  	queue_work(kthrotld_workqueue, &td->dispatch_work);
  }
4247d9c8b   Baolin Wang   blk-throttle: Rem...
1867
  static void throtl_downgrade_state(struct throtl_data *td)
3f0abd806   Shaohua Li   blk-throttle: add...
1868
  {
7394e31fa   Shaohua Li   blk-throttle: mak...
1869
  	td->scale /= 2;
4cff729f6   Shaohua Li   blk-throttle: out...
1870
  	throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
7394e31fa   Shaohua Li   blk-throttle: mak...
1871
1872
1873
1874
  	if (td->scale) {
  		td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
  		return;
  	}
4247d9c8b   Baolin Wang   blk-throttle: Rem...
1875
  	td->limit_index = LIMIT_LOW;
3f0abd806   Shaohua Li   blk-throttle: add...
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
  	td->low_downgrade_time = jiffies;
  }
  
  static bool throtl_tg_can_downgrade(struct throtl_grp *tg)
  {
  	struct throtl_data *td = tg->td;
  	unsigned long now = jiffies;
  
  	/*
  	 * If cgroup is below low limit, consider downgrade and throttle other
  	 * cgroups
  	 */
297e3d854   Shaohua Li   blk-throttle: mak...
1888
1889
  	if (time_after_eq(now, td->low_upgrade_time + td->throtl_slice) &&
  	    time_after_eq(now, tg_last_low_overflow_time(tg) +
fa6fb5aab   Shaohua Li   blk-throttle: ign...
1890
1891
1892
  					td->throtl_slice) &&
  	    (!throtl_tg_is_idle(tg) ||
  	     !list_empty(&tg_to_blkg(tg)->blkcg->css.children)))
3f0abd806   Shaohua Li   blk-throttle: add...
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
  		return true;
  	return false;
  }
  
  static bool throtl_hierarchy_can_downgrade(struct throtl_grp *tg)
  {
  	while (true) {
  		if (!throtl_tg_can_downgrade(tg))
  			return false;
  		tg = sq_to_tg(tg->service_queue.parent_sq);
  		if (!tg || !tg_to_blkg(tg)->parent)
  			break;
  	}
  	return true;
  }
  
  static void throtl_downgrade_check(struct throtl_grp *tg)
  {
  	uint64_t bps;
  	unsigned int iops;
  	unsigned long elapsed_time;
  	unsigned long now = jiffies;
  
  	if (tg->td->limit_index != LIMIT_MAX ||
  	    !tg->td->limit_valid[LIMIT_LOW])
  		return;
  	if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
  		return;
297e3d854   Shaohua Li   blk-throttle: mak...
1921
  	if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
3f0abd806   Shaohua Li   blk-throttle: add...
1922
1923
1924
1925
  		return;
  
  	elapsed_time = now - tg->last_check_time;
  	tg->last_check_time = now;
297e3d854   Shaohua Li   blk-throttle: mak...
1926
1927
  	if (time_before(now, tg_last_low_overflow_time(tg) +
  			tg->td->throtl_slice))
3f0abd806   Shaohua Li   blk-throttle: add...
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
  		return;
  
  	if (tg->bps[READ][LIMIT_LOW]) {
  		bps = tg->last_bytes_disp[READ] * HZ;
  		do_div(bps, elapsed_time);
  		if (bps >= tg->bps[READ][LIMIT_LOW])
  			tg->last_low_overflow_time[READ] = now;
  	}
  
  	if (tg->bps[WRITE][LIMIT_LOW]) {
  		bps = tg->last_bytes_disp[WRITE] * HZ;
  		do_div(bps, elapsed_time);
  		if (bps >= tg->bps[WRITE][LIMIT_LOW])
  			tg->last_low_overflow_time[WRITE] = now;
  	}
  
  	if (tg->iops[READ][LIMIT_LOW]) {
4f1e9630a   Chunguang Xu   blk-throtl: optim...
1945
  		tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
3f0abd806   Shaohua Li   blk-throttle: add...
1946
1947
1948
1949
1950
1951
  		iops = tg->last_io_disp[READ] * HZ / elapsed_time;
  		if (iops >= tg->iops[READ][LIMIT_LOW])
  			tg->last_low_overflow_time[READ] = now;
  	}
  
  	if (tg->iops[WRITE][LIMIT_LOW]) {
4f1e9630a   Chunguang Xu   blk-throtl: optim...
1952
  		tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
3f0abd806   Shaohua Li   blk-throttle: add...
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
  		iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
  		if (iops >= tg->iops[WRITE][LIMIT_LOW])
  			tg->last_low_overflow_time[WRITE] = now;
  	}
  
  	/*
  	 * If cgroup is below low limit, consider downgrade and throttle other
  	 * cgroups
  	 */
  	if (throtl_hierarchy_can_downgrade(tg))
4247d9c8b   Baolin Wang   blk-throttle: Rem...
1963
  		throtl_downgrade_state(tg->td);
3f0abd806   Shaohua Li   blk-throttle: add...
1964
1965
1966
1967
1968
1969
  
  	tg->last_bytes_disp[READ] = 0;
  	tg->last_bytes_disp[WRITE] = 0;
  	tg->last_io_disp[READ] = 0;
  	tg->last_io_disp[WRITE] = 0;
  }
9e234eeaf   Shaohua Li   blk-throttle: add...
1970
1971
  static void blk_throtl_update_idletime(struct throtl_grp *tg)
  {
7901601ae   Baolin Wang   blk-throttle: Avo...
1972
  	unsigned long now;
9e234eeaf   Shaohua Li   blk-throttle: add...
1973
  	unsigned long last_finish_time = tg->last_finish_time;
7901601ae   Baolin Wang   blk-throttle: Avo...
1974
1975
1976
1977
1978
  	if (last_finish_time == 0)
  		return;
  
  	now = ktime_get_ns() >> 10;
  	if (now <= last_finish_time ||
9e234eeaf   Shaohua Li   blk-throttle: add...
1979
1980
1981
1982
1983
1984
  	    last_finish_time == tg->checked_last_finish_time)
  		return;
  
  	tg->avg_idletime = (tg->avg_idletime * 7 + now - last_finish_time) >> 3;
  	tg->checked_last_finish_time = last_finish_time;
  }
b9147dd1b   Shaohua Li   blk-throttle: add...
1985
1986
1987
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  static void throtl_update_latency_buckets(struct throtl_data *td)
  {
b889bf66d   Joseph Qi   blk-throttle: tra...
1988
1989
1990
1991
  	struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE];
  	int i, cpu, rw;
  	unsigned long last_latency[2] = { 0 };
  	unsigned long latency[2];
b9147dd1b   Shaohua Li   blk-throttle: add...
1992

b185efa78   Baolin Wang   blk-throttle: Avo...
1993
  	if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW])
b9147dd1b   Shaohua Li   blk-throttle: add...
1994
1995
1996
1997
1998
1999
  		return;
  	if (time_before(jiffies, td->last_calculate_time + HZ))
  		return;
  	td->last_calculate_time = jiffies;
  
  	memset(avg_latency, 0, sizeof(avg_latency));
b889bf66d   Joseph Qi   blk-throttle: tra...
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
  	for (rw = READ; rw <= WRITE; rw++) {
  		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
  			struct latency_bucket *tmp = &td->tmp_buckets[rw][i];
  
  			for_each_possible_cpu(cpu) {
  				struct latency_bucket *bucket;
  
  				/* this isn't race free, but ok in practice */
  				bucket = per_cpu_ptr(td->latency_buckets[rw],
  					cpu);
  				tmp->total_latency += bucket[i].total_latency;
  				tmp->samples += bucket[i].samples;
  				bucket[i].total_latency = 0;
  				bucket[i].samples = 0;
  			}
b9147dd1b   Shaohua Li   blk-throttle: add...
2015

b889bf66d   Joseph Qi   blk-throttle: tra...
2016
2017
  			if (tmp->samples >= 32) {
  				int samples = tmp->samples;
b9147dd1b   Shaohua Li   blk-throttle: add...
2018

b889bf66d   Joseph Qi   blk-throttle: tra...
2019
  				latency[rw] = tmp->total_latency;
b9147dd1b   Shaohua Li   blk-throttle: add...
2020

b889bf66d   Joseph Qi   blk-throttle: tra...
2021
2022
2023
2024
2025
2026
2027
  				tmp->total_latency = 0;
  				tmp->samples = 0;
  				latency[rw] /= samples;
  				if (latency[rw] == 0)
  					continue;
  				avg_latency[rw][i].latency = latency[rw];
  			}
b9147dd1b   Shaohua Li   blk-throttle: add...
2028
2029
  		}
  	}
b889bf66d   Joseph Qi   blk-throttle: tra...
2030
2031
2032
2033
2034
2035
2036
2037
  	for (rw = READ; rw <= WRITE; rw++) {
  		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
  			if (!avg_latency[rw][i].latency) {
  				if (td->avg_buckets[rw][i].latency < last_latency[rw])
  					td->avg_buckets[rw][i].latency =
  						last_latency[rw];
  				continue;
  			}
b9147dd1b   Shaohua Li   blk-throttle: add...
2038

b889bf66d   Joseph Qi   blk-throttle: tra...
2039
2040
2041
2042
2043
  			if (!td->avg_buckets[rw][i].valid)
  				latency[rw] = avg_latency[rw][i].latency;
  			else
  				latency[rw] = (td->avg_buckets[rw][i].latency * 7 +
  					avg_latency[rw][i].latency) >> 3;
b9147dd1b   Shaohua Li   blk-throttle: add...
2044

b889bf66d   Joseph Qi   blk-throttle: tra...
2045
2046
2047
2048
2049
  			td->avg_buckets[rw][i].latency = max(latency[rw],
  				last_latency[rw]);
  			td->avg_buckets[rw][i].valid = true;
  			last_latency[rw] = td->avg_buckets[rw][i].latency;
  		}
b9147dd1b   Shaohua Li   blk-throttle: add...
2050
  	}
4cff729f6   Shaohua Li   blk-throttle: out...
2051
2052
2053
  
  	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
  		throtl_log(&td->service_queue,
b889bf66d   Joseph Qi   blk-throttle: tra...
2054
2055
2056
2057
2058
2059
  			"Latency bucket %d: read latency=%ld, read valid=%d, "
  			"write latency=%ld, write valid=%d", i,
  			td->avg_buckets[READ][i].latency,
  			td->avg_buckets[READ][i].valid,
  			td->avg_buckets[WRITE][i].latency,
  			td->avg_buckets[WRITE][i].valid);
b9147dd1b   Shaohua Li   blk-throttle: add...
2060
2061
2062
2063
2064
2065
  }
  #else
  static inline void throtl_update_latency_buckets(struct throtl_data *td)
  {
  }
  #endif
4f1e9630a   Chunguang Xu   blk-throtl: optim...
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
  void blk_throtl_charge_bio_split(struct bio *bio)
  {
  	struct blkcg_gq *blkg = bio->bi_blkg;
  	struct throtl_grp *parent = blkg_to_tg(blkg);
  	struct throtl_service_queue *parent_sq;
  	bool rw = bio_data_dir(bio);
  
  	do {
  		if (!parent->has_rules[rw])
  			break;
  
  		atomic_inc(&parent->io_split_cnt[rw]);
  		atomic_inc(&parent->last_io_split_cnt[rw]);
  
  		parent_sq = parent->service_queue.parent_sq;
  		parent = sq_to_tg(parent_sq);
  	} while (parent);
  }
db18a53e5   Christoph Hellwig   blk-cgroup: remov...
2084
  bool blk_throtl_bio(struct bio *bio)
e43473b7f   Vivek Goyal   blkio: Core imple...
2085
  {
309dca309   Christoph Hellwig   block: store a bl...
2086
  	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
db18a53e5   Christoph Hellwig   blk-cgroup: remov...
2087
  	struct blkcg_gq *blkg = bio->bi_blkg;
c5cc2070b   Tejun Heo   blk-throttle: add...
2088
  	struct throtl_qnode *qn = NULL;
a2e83ef9c   Christoph Hellwig   blk-cgroup: remov...
2089
  	struct throtl_grp *tg = blkg_to_tg(blkg);
73f0d49a9   Tejun Heo   blk-throttle: mov...
2090
  	struct throtl_service_queue *sq;
0e9f4164b   Tejun Heo   blk-throttle: gen...
2091
  	bool rw = bio_data_dir(bio);
bc16a4f93   Tejun Heo   block: reorganize...
2092
  	bool throttled = false;
b9147dd1b   Shaohua Li   blk-throttle: add...
2093
  	struct throtl_data *td = tg->td;
e43473b7f   Vivek Goyal   blkio: Core imple...
2094

93b806380   Christoph Hellwig   blk-cgroup: move ...
2095
  	rcu_read_lock();
ae1188963   Tejun Heo   blkcg: consolidat...
2096

2a0f61e6e   Tejun Heo   blk-throttle: set...
2097
  	/* see throtl_charge_bio() */
7ca464383   Tejun Heo   blk-throtl: stop ...
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
  	if (bio_flagged(bio, BIO_THROTTLED))
  		goto out;
  
  	if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
  		blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
  				bio->bi_iter.bi_size);
  		blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
  	}
  
  	if (!tg->has_rules[rw])
bc16a4f93   Tejun Heo   block: reorganize...
2108
  		goto out;
e43473b7f   Vivek Goyal   blkio: Core imple...
2109

0d945c1f9   Christoph Hellwig   block: remove the...
2110
  	spin_lock_irq(&q->queue_lock);
c9589f03e   Tejun Heo   blk-throttle: imp...
2111

b9147dd1b   Shaohua Li   blk-throttle: add...
2112
  	throtl_update_latency_buckets(td);
9e234eeaf   Shaohua Li   blk-throttle: add...
2113
  	blk_throtl_update_idletime(tg);
73f0d49a9   Tejun Heo   blk-throttle: mov...
2114
  	sq = &tg->service_queue;
c79892c55   Shaohua Li   blk-throttle: add...
2115
  again:
9e660acff   Tejun Heo   blk-throttle: mak...
2116
  	while (true) {
3f0abd806   Shaohua Li   blk-throttle: add...
2117
2118
2119
  		if (tg->last_low_overflow_time[rw] == 0)
  			tg->last_low_overflow_time[rw] = jiffies;
  		throtl_downgrade_check(tg);
fa6fb5aab   Shaohua Li   blk-throttle: ign...
2120
  		throtl_upgrade_check(tg);
9e660acff   Tejun Heo   blk-throttle: mak...
2121
2122
2123
  		/* throtl is FIFO - if bios are already queued, should queue */
  		if (sq->nr_queued[rw])
  			break;
de701c74a   Vivek Goyal   blk-throttle: Som...
2124

9e660acff   Tejun Heo   blk-throttle: mak...
2125
  		/* if above limits, break to queue */
c79892c55   Shaohua Li   blk-throttle: add...
2126
  		if (!tg_may_dispatch(tg, bio, NULL)) {
3f0abd806   Shaohua Li   blk-throttle: add...
2127
  			tg->last_low_overflow_time[rw] = jiffies;
b9147dd1b   Shaohua Li   blk-throttle: add...
2128
2129
  			if (throtl_can_upgrade(td, tg)) {
  				throtl_upgrade_state(td);
c79892c55   Shaohua Li   blk-throttle: add...
2130
2131
  				goto again;
  			}
9e660acff   Tejun Heo   blk-throttle: mak...
2132
  			break;
c79892c55   Shaohua Li   blk-throttle: add...
2133
  		}
9e660acff   Tejun Heo   blk-throttle: mak...
2134
2135
  
  		/* within limits, let's charge and dispatch directly */
e43473b7f   Vivek Goyal   blkio: Core imple...
2136
  		throtl_charge_bio(tg, bio);
04521db04   Vivek Goyal   blk-throttle: Res...
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
  
  		/*
  		 * We need to trim slice even when bios are not being queued
  		 * otherwise it might happen that a bio is not queued for
  		 * a long time and slice keeps on extending and trim is not
  		 * called for a long time. Now if limits are reduced suddenly
  		 * we take into account all the IO dispatched so far at new
  		 * low rate and * newly queued IO gets a really long dispatch
  		 * time.
  		 *
  		 * So keep on trimming slice even if bio is not queued.
  		 */
0f3457f60   Tejun Heo   blk-throttle: add...
2149
  		throtl_trim_slice(tg, rw);
9e660acff   Tejun Heo   blk-throttle: mak...
2150
2151
2152
  
  		/*
  		 * @bio passed through this layer without being throttled.
b53b072c4   Baolin Wang   blk-throttle: Fix...
2153
  		 * Climb up the ladder.  If we're already at the top, it
9e660acff   Tejun Heo   blk-throttle: mak...
2154
2155
  		 * can be executed directly.
  		 */
c5cc2070b   Tejun Heo   blk-throttle: add...
2156
  		qn = &tg->qnode_on_parent[rw];
9e660acff   Tejun Heo   blk-throttle: mak...
2157
2158
2159
2160
  		sq = sq->parent_sq;
  		tg = sq_to_tg(sq);
  		if (!tg)
  			goto out_unlock;
e43473b7f   Vivek Goyal   blkio: Core imple...
2161
  	}
9e660acff   Tejun Heo   blk-throttle: mak...
2162
  	/* out-of-limit, queue to @tg */
fda6f272c   Tejun Heo   blk-throttle: imp...
2163
2164
  	throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
  		   rw == READ ? 'R' : 'W',
9f626e372   Shaohua Li   blk-throttle: pre...
2165
2166
2167
  		   tg->bytes_disp[rw], bio->bi_iter.bi_size,
  		   tg_bps_limit(tg, rw),
  		   tg->io_disp[rw], tg_iops_limit(tg, rw),
fda6f272c   Tejun Heo   blk-throttle: imp...
2168
  		   sq->nr_queued[READ], sq->nr_queued[WRITE]);
e43473b7f   Vivek Goyal   blkio: Core imple...
2169

3f0abd806   Shaohua Li   blk-throttle: add...
2170
  	tg->last_low_overflow_time[rw] = jiffies;
b9147dd1b   Shaohua Li   blk-throttle: add...
2171
  	td->nr_queued[rw]++;
c5cc2070b   Tejun Heo   blk-throttle: add...
2172
  	throtl_add_bio_tg(bio, qn, tg);
bc16a4f93   Tejun Heo   block: reorganize...
2173
  	throttled = true;
e43473b7f   Vivek Goyal   blkio: Core imple...
2174

7f52f98c2   Tejun Heo   blk-throttle: imp...
2175
2176
2177
2178
2179
2180
  	/*
  	 * Update @tg's dispatch time and force schedule dispatch if @tg
  	 * was empty before @bio.  The forced scheduling isn't likely to
  	 * cause undue delay as @bio is likely to be dispatched directly if
  	 * its @tg's disptime is not in the future.
  	 */
0e9f4164b   Tejun Heo   blk-throttle: gen...
2181
  	if (tg->flags & THROTL_TG_WAS_EMPTY) {
77216b048   Tejun Heo   blk-throttle: add...
2182
  		tg_update_disptime(tg);
7f52f98c2   Tejun Heo   blk-throttle: imp...
2183
  		throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true);
e43473b7f   Vivek Goyal   blkio: Core imple...
2184
  	}
bc16a4f93   Tejun Heo   block: reorganize...
2185
  out_unlock:
0d945c1f9   Christoph Hellwig   block: remove the...
2186
  	spin_unlock_irq(&q->queue_lock);
bc16a4f93   Tejun Heo   block: reorganize...
2187
  out:
111be8839   Shaohua Li   block-throttle: a...
2188
  	bio_set_flag(bio, BIO_THROTTLED);
b9147dd1b   Shaohua Li   blk-throttle: add...
2189
2190
2191
  
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  	if (throttled || !td->track_bio_latency)
5238dcf41   Omar Sandoval   block: replace bi...
2192
  		bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
b9147dd1b   Shaohua Li   blk-throttle: add...
2193
  #endif
93b806380   Christoph Hellwig   blk-cgroup: move ...
2194
  	rcu_read_unlock();
bc16a4f93   Tejun Heo   block: reorganize...
2195
  	return throttled;
e43473b7f   Vivek Goyal   blkio: Core imple...
2196
  }
9e234eeaf   Shaohua Li   blk-throttle: add...
2197
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
b9147dd1b   Shaohua Li   blk-throttle: add...
2198
2199
2200
2201
2202
  static void throtl_track_latency(struct throtl_data *td, sector_t size,
  	int op, unsigned long time)
  {
  	struct latency_bucket *latency;
  	int index;
b889bf66d   Joseph Qi   blk-throttle: tra...
2203
2204
  	if (!td || td->limit_index != LIMIT_LOW ||
  	    !(op == REQ_OP_READ || op == REQ_OP_WRITE) ||
b9147dd1b   Shaohua Li   blk-throttle: add...
2205
2206
2207
2208
  	    !blk_queue_nonrot(td->queue))
  		return;
  
  	index = request_bucket_index(size);
b889bf66d   Joseph Qi   blk-throttle: tra...
2209
  	latency = get_cpu_ptr(td->latency_buckets[op]);
b9147dd1b   Shaohua Li   blk-throttle: add...
2210
2211
  	latency[index].total_latency += time;
  	latency[index].samples++;
b889bf66d   Joseph Qi   blk-throttle: tra...
2212
  	put_cpu_ptr(td->latency_buckets[op]);
b9147dd1b   Shaohua Li   blk-throttle: add...
2213
2214
2215
2216
2217
2218
  }
  
  void blk_throtl_stat_add(struct request *rq, u64 time_ns)
  {
  	struct request_queue *q = rq->q;
  	struct throtl_data *td = q->td;
3d2443069   Hou Tao   block: make rq se...
2219
2220
  	throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
  			     time_ns >> 10);
b9147dd1b   Shaohua Li   blk-throttle: add...
2221
  }
9e234eeaf   Shaohua Li   blk-throttle: add...
2222
2223
  void blk_throtl_bio_endio(struct bio *bio)
  {
08e18eab0   Josef Bacik   block: add bi_blk...
2224
  	struct blkcg_gq *blkg;
9e234eeaf   Shaohua Li   blk-throttle: add...
2225
  	struct throtl_grp *tg;
b9147dd1b   Shaohua Li   blk-throttle: add...
2226
2227
2228
2229
  	u64 finish_time_ns;
  	unsigned long finish_time;
  	unsigned long start_time;
  	unsigned long lat;
b889bf66d   Joseph Qi   blk-throttle: tra...
2230
  	int rw = bio_data_dir(bio);
9e234eeaf   Shaohua Li   blk-throttle: add...
2231

08e18eab0   Josef Bacik   block: add bi_blk...
2232
2233
  	blkg = bio->bi_blkg;
  	if (!blkg)
9e234eeaf   Shaohua Li   blk-throttle: add...
2234
  		return;
08e18eab0   Josef Bacik   block: add bi_blk...
2235
  	tg = blkg_to_tg(blkg);
b185efa78   Baolin Wang   blk-throttle: Avo...
2236
2237
  	if (!tg->td->limit_valid[LIMIT_LOW])
  		return;
9e234eeaf   Shaohua Li   blk-throttle: add...
2238

b9147dd1b   Shaohua Li   blk-throttle: add...
2239
2240
  	finish_time_ns = ktime_get_ns();
  	tg->last_finish_time = finish_time_ns >> 10;
5238dcf41   Omar Sandoval   block: replace bi...
2241
2242
  	start_time = bio_issue_time(&bio->bi_issue) >> 10;
  	finish_time = __bio_issue_time(finish_time_ns) >> 10;
08e18eab0   Josef Bacik   block: add bi_blk...
2243
  	if (!start_time || finish_time <= start_time)
53696b8d2   Shaohua Li   blk-throttle: add...
2244
2245
2246
  		return;
  
  	lat = finish_time - start_time;
b9147dd1b   Shaohua Li   blk-throttle: add...
2247
  	/* this is only for bio based driver */
5238dcf41   Omar Sandoval   block: replace bi...
2248
2249
2250
  	if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY))
  		throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue),
  				     bio_op(bio), lat);
53696b8d2   Shaohua Li   blk-throttle: add...
2251

6679a90c4   Shaohua Li   blk-throttle: set...
2252
  	if (tg->latency_target && lat >= tg->td->filtered_latency) {
53696b8d2   Shaohua Li   blk-throttle: add...
2253
2254
  		int bucket;
  		unsigned int threshold;
5238dcf41   Omar Sandoval   block: replace bi...
2255
  		bucket = request_bucket_index(bio_issue_size(&bio->bi_issue));
b889bf66d   Joseph Qi   blk-throttle: tra...
2256
  		threshold = tg->td->avg_buckets[rw][bucket].latency +
53696b8d2   Shaohua Li   blk-throttle: add...
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
  			tg->latency_target;
  		if (lat > threshold)
  			tg->bad_bio_cnt++;
  		/*
  		 * Not race free, could get wrong count, which means cgroups
  		 * will be throttled
  		 */
  		tg->bio_cnt++;
  	}
  
  	if (time_after(jiffies, tg->bio_cnt_reset_time) || tg->bio_cnt > 1024) {
  		tg->bio_cnt_reset_time = tg->td->throtl_slice + jiffies;
  		tg->bio_cnt /= 2;
  		tg->bad_bio_cnt /= 2;
b9147dd1b   Shaohua Li   blk-throttle: add...
2271
  	}
9e234eeaf   Shaohua Li   blk-throttle: add...
2272
2273
  }
  #endif
e43473b7f   Vivek Goyal   blkio: Core imple...
2274
2275
2276
  int blk_throtl_init(struct request_queue *q)
  {
  	struct throtl_data *td;
a2b1693ba   Tejun Heo   blkcg: implement ...
2277
  	int ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
2278
2279
2280
2281
  
  	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
  	if (!td)
  		return -ENOMEM;
b889bf66d   Joseph Qi   blk-throttle: tra...
2282
  	td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) *
b9147dd1b   Shaohua Li   blk-throttle: add...
2283
  		LATENCY_BUCKET_SIZE, __alignof__(u64));
b889bf66d   Joseph Qi   blk-throttle: tra...
2284
2285
2286
2287
2288
  	if (!td->latency_buckets[READ]) {
  		kfree(td);
  		return -ENOMEM;
  	}
  	td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) *
b9147dd1b   Shaohua Li   blk-throttle: add...
2289
  		LATENCY_BUCKET_SIZE, __alignof__(u64));
b889bf66d   Joseph Qi   blk-throttle: tra...
2290
2291
  	if (!td->latency_buckets[WRITE]) {
  		free_percpu(td->latency_buckets[READ]);
b9147dd1b   Shaohua Li   blk-throttle: add...
2292
2293
2294
  		kfree(td);
  		return -ENOMEM;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
2295

69df0ab03   Tejun Heo   blk-throttle: sep...
2296
  	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
b2ce2643c   Tejun Heo   blk-throttle: cle...
2297
  	throtl_service_queue_init(&td->service_queue);
e43473b7f   Vivek Goyal   blkio: Core imple...
2298

cd1604fab   Tejun Heo   blkcg: factor out...
2299
  	q->td = td;
29b125892   Vivek Goyal   blk-throttle: Dyn...
2300
  	td->queue = q;
02977e4af   Vivek Goyal   blkio: Add root g...
2301

9f626e372   Shaohua Li   blk-throttle: pre...
2302
  	td->limit_valid[LIMIT_MAX] = true;
cd5ab1b0f   Shaohua Li   blk-throttle: add...
2303
  	td->limit_index = LIMIT_MAX;
3f0abd806   Shaohua Li   blk-throttle: add...
2304
2305
  	td->low_upgrade_time = jiffies;
  	td->low_downgrade_time = jiffies;
9e234eeaf   Shaohua Li   blk-throttle: add...
2306

a2b1693ba   Tejun Heo   blkcg: implement ...
2307
  	/* activate policy */
3c798398e   Tejun Heo   blkcg: mass renam...
2308
  	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
b9147dd1b   Shaohua Li   blk-throttle: add...
2309
  	if (ret) {
b889bf66d   Joseph Qi   blk-throttle: tra...
2310
2311
  		free_percpu(td->latency_buckets[READ]);
  		free_percpu(td->latency_buckets[WRITE]);
f51b802c1   Tejun Heo   blkcg: use the us...
2312
  		kfree(td);
b9147dd1b   Shaohua Li   blk-throttle: add...
2313
  	}
a2b1693ba   Tejun Heo   blkcg: implement ...
2314
  	return ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
2315
2316
2317
2318
  }
  
  void blk_throtl_exit(struct request_queue *q)
  {
c875f4d02   Tejun Heo   blkcg: drop unnec...
2319
  	BUG_ON(!q->td);
884f0e84f   Li Jinlin   blk-throttle: fix...
2320
  	del_timer_sync(&q->td->service_queue.pending_timer);
da5277700   Vivek Goyal   block: Move blk_t...
2321
  	throtl_shutdown_wq(q);
3c798398e   Tejun Heo   blkcg: mass renam...
2322
  	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
b889bf66d   Joseph Qi   blk-throttle: tra...
2323
2324
  	free_percpu(q->td->latency_buckets[READ]);
  	free_percpu(q->td->latency_buckets[WRITE]);
c9a929dde   Tejun Heo   block: fix reques...
2325
  	kfree(q->td);
e43473b7f   Vivek Goyal   blkio: Core imple...
2326
  }
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2327
2328
2329
  void blk_throtl_register_queue(struct request_queue *q)
  {
  	struct throtl_data *td;
6679a90c4   Shaohua Li   blk-throttle: set...
2330
  	int i;
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2331
2332
2333
  
  	td = q->td;
  	BUG_ON(!td);
6679a90c4   Shaohua Li   blk-throttle: set...
2334
  	if (blk_queue_nonrot(q)) {
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2335
  		td->throtl_slice = DFL_THROTL_SLICE_SSD;
6679a90c4   Shaohua Li   blk-throttle: set...
2336
2337
  		td->filtered_latency = LATENCY_FILTERED_SSD;
  	} else {
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2338
  		td->throtl_slice = DFL_THROTL_SLICE_HD;
6679a90c4   Shaohua Li   blk-throttle: set...
2339
  		td->filtered_latency = LATENCY_FILTERED_HD;
b889bf66d   Joseph Qi   blk-throttle: tra...
2340
2341
2342
2343
  		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
  			td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY;
  			td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY;
  		}
6679a90c4   Shaohua Li   blk-throttle: set...
2344
  	}
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2345
2346
2347
2348
  #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
  	/* if no low limit, use previous default */
  	td->throtl_slice = DFL_THROTL_SLICE_HD;
  #endif
9e234eeaf   Shaohua Li   blk-throttle: add...
2349

344e9ffcb   Jens Axboe   block: add queue_...
2350
  	td->track_bio_latency = !queue_is_mq(q);
b9147dd1b   Shaohua Li   blk-throttle: add...
2351
2352
  	if (!td->track_bio_latency)
  		blk_stat_enable_accounting(q);
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2353
  }
297e3d854   Shaohua Li   blk-throttle: mak...
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page)
  {
  	if (!q->td)
  		return -EINVAL;
  	return sprintf(page, "%u
  ", jiffies_to_msecs(q->td->throtl_slice));
  }
  
  ssize_t blk_throtl_sample_time_store(struct request_queue *q,
  	const char *page, size_t count)
  {
  	unsigned long v;
  	unsigned long t;
  
  	if (!q->td)
  		return -EINVAL;
  	if (kstrtoul(page, 10, &v))
  		return -EINVAL;
  	t = msecs_to_jiffies(v);
  	if (t == 0 || t > MAX_THROTL_SLICE)
  		return -EINVAL;
  	q->td->throtl_slice = t;
  	return count;
  }
  #endif
e43473b7f   Vivek Goyal   blkio: Core imple...
2380
2381
  static int __init throtl_init(void)
  {
450adcbe5   Vivek Goyal   blk-throttle: Do ...
2382
2383
2384
2385
  	kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
  	if (!kthrotld_workqueue)
  		panic("Failed to create kthrotld
  ");
3c798398e   Tejun Heo   blkcg: mass renam...
2386
  	return blkcg_policy_register(&blkcg_policy_throtl);
e43473b7f   Vivek Goyal   blkio: Core imple...
2387
2388
2389
  }
  
  module_init(throtl_init);