Blame view

block/blk-throttle.c 67.5 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
e43473b7f   Vivek Goyal   blkio: Core imple...
2
3
4
5
6
7
8
9
10
11
12
  /*
   * Interface for controlling IO bandwidth on a request queue
   *
   * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com>
   */
  
  #include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/blkdev.h>
  #include <linux/bio.h>
  #include <linux/blktrace_api.h>
eea8f41cc   Tejun Heo   blkcg: move block...
13
  #include <linux/blk-cgroup.h>
bc9fcbf9c   Tejun Heo   block: move blk_t...
14
  #include "blk.h"
1d156646e   Tejun Heo   blk-cgroup: separ...
15
  #include "blk-cgroup-rwstat.h"
e43473b7f   Vivek Goyal   blkio: Core imple...
16
17
  
  /* Max dispatch from a group in 1 round */
e675df2ad   Baolin Wang   blk-throttle: Def...
18
  #define THROTL_GRP_QUANTUM 8
e43473b7f   Vivek Goyal   blkio: Core imple...
19
20
  
  /* Total max dispatch from all groups in one round */
e675df2ad   Baolin Wang   blk-throttle: Def...
21
  #define THROTL_QUANTUM 32
e43473b7f   Vivek Goyal   blkio: Core imple...
22

d61fcfa4b   Shaohua Li   blk-throttle: cho...
23
24
25
  /* Throttling is performed over a slice and after that slice is renewed */
  #define DFL_THROTL_SLICE_HD (HZ / 10)
  #define DFL_THROTL_SLICE_SSD (HZ / 50)
297e3d854   Shaohua Li   blk-throttle: mak...
26
  #define MAX_THROTL_SLICE (HZ)
9e234eeaf   Shaohua Li   blk-throttle: add...
27
  #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
9bb67aeb9   Shaohua Li   blk-throttle: res...
28
29
  #define MIN_THROTL_BPS (320 * 1024)
  #define MIN_THROTL_IOPS (10)
b4f428ef2   Shaohua Li   blk-throttle: for...
30
31
  #define DFL_LATENCY_TARGET (-1L)
  #define DFL_IDLE_THRESHOLD (0)
6679a90c4   Shaohua Li   blk-throttle: set...
32
33
34
35
36
37
38
  #define DFL_HD_BASELINE_LATENCY (4000L) /* 4ms */
  #define LATENCY_FILTERED_SSD (0)
  /*
   * For HD, very small latency comes from sequential IO. Such IO is helpless to
   * help determine if its IO is impacted by others, hence we ignore the IO
   */
  #define LATENCY_FILTERED_HD (1000L) /* 1ms */
e43473b7f   Vivek Goyal   blkio: Core imple...
39

3c798398e   Tejun Heo   blkcg: mass renam...
40
  static struct blkcg_policy blkcg_policy_throtl;
0381411e4   Tejun Heo   blkcg: let blkcg ...
41

450adcbe5   Vivek Goyal   blk-throttle: Do ...
42
43
  /* A workqueue to queue throttle related work */
  static struct workqueue_struct *kthrotld_workqueue;
450adcbe5   Vivek Goyal   blk-throttle: Do ...
44

c5cc2070b   Tejun Heo   blk-throttle: add...
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  /*
   * To implement hierarchical throttling, throtl_grps form a tree and bios
   * are dispatched upwards level by level until they reach the top and get
   * issued.  When dispatching bios from the children and local group at each
   * level, if the bios are dispatched into a single bio_list, there's a risk
   * of a local or child group which can queue many bios at once filling up
   * the list starving others.
   *
   * To avoid such starvation, dispatched bios are queued separately
   * according to where they came from.  When they are again dispatched to
   * the parent, they're popped in round-robin order so that no single source
   * hogs the dispatch window.
   *
   * throtl_qnode is used to keep the queued bios separated by their sources.
   * Bios are queued to throtl_qnode which in turn is queued to
   * throtl_service_queue and then dispatched in round-robin order.
   *
   * It's also used to track the reference counts on blkg's.  A qnode always
   * belongs to a throtl_grp and gets queued on itself or the parent, so
   * incrementing the reference of the associated throtl_grp when a qnode is
   * queued and decrementing when dequeued is enough to keep the whole blkg
   * tree pinned while bios are in flight.
   */
  struct throtl_qnode {
  	struct list_head	node;		/* service_queue->queued[] */
  	struct bio_list		bios;		/* queued bios */
  	struct throtl_grp	*tg;		/* tg this qnode belongs to */
  };
c9e0332e8   Tejun Heo   blk-throttle: ren...
73
  struct throtl_service_queue {
77216b048   Tejun Heo   blk-throttle: add...
74
  	struct throtl_service_queue *parent_sq;	/* the parent service_queue */
73f0d49a9   Tejun Heo   blk-throttle: mov...
75
76
77
78
  	/*
  	 * Bios queued directly to this service_queue or dispatched from
  	 * children throtl_grp's.
  	 */
c5cc2070b   Tejun Heo   blk-throttle: add...
79
  	struct list_head	queued[2];	/* throtl_qnode [READ/WRITE] */
73f0d49a9   Tejun Heo   blk-throttle: mov...
80
81
82
83
84
85
  	unsigned int		nr_queued[2];	/* number of queued bios */
  
  	/*
  	 * RB tree of active children throtl_grp's, which are sorted by
  	 * their ->disptime.
  	 */
9ff01255a   Liu Bo   Blk-throttle: upd...
86
  	struct rb_root_cached	pending_tree;	/* RB tree of active tgs */
c9e0332e8   Tejun Heo   blk-throttle: ren...
87
88
  	unsigned int		nr_pending;	/* # queued in the tree */
  	unsigned long		first_pending_disptime;	/* disptime of the first tg */
69df0ab03   Tejun Heo   blk-throttle: sep...
89
  	struct timer_list	pending_timer;	/* fires on first_pending_disptime */
e43473b7f   Vivek Goyal   blkio: Core imple...
90
  };
5b2c16aae   Tejun Heo   blk-throttle: sim...
91
92
  enum tg_state_flags {
  	THROTL_TG_PENDING	= 1 << 0,	/* on parent's pending tree */
0e9f4164b   Tejun Heo   blk-throttle: gen...
93
  	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
5b2c16aae   Tejun Heo   blk-throttle: sim...
94
  };
e43473b7f   Vivek Goyal   blkio: Core imple...
95
  #define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
9f626e372   Shaohua Li   blk-throttle: pre...
96
  enum {
cd5ab1b0f   Shaohua Li   blk-throttle: add...
97
  	LIMIT_LOW,
9f626e372   Shaohua Li   blk-throttle: pre...
98
99
100
  	LIMIT_MAX,
  	LIMIT_CNT,
  };
e43473b7f   Vivek Goyal   blkio: Core imple...
101
  struct throtl_grp {
f95a04afa   Tejun Heo   blkcg: embed stru...
102
103
  	/* must be the first member */
  	struct blkg_policy_data pd;
c9e0332e8   Tejun Heo   blk-throttle: ren...
104
  	/* active throtl group service_queue member */
e43473b7f   Vivek Goyal   blkio: Core imple...
105
  	struct rb_node rb_node;
0f3457f60   Tejun Heo   blk-throttle: add...
106
107
  	/* throtl_data this group belongs to */
  	struct throtl_data *td;
49a2f1e3f   Tejun Heo   blk-throttle: add...
108
109
  	/* this group's service queue */
  	struct throtl_service_queue service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
110
  	/*
c5cc2070b   Tejun Heo   blk-throttle: add...
111
112
113
114
115
116
117
118
119
120
121
  	 * qnode_on_self is used when bios are directly queued to this
  	 * throtl_grp so that local bios compete fairly with bios
  	 * dispatched from children.  qnode_on_parent is used when bios are
  	 * dispatched from this throtl_grp into its parent and will compete
  	 * with the sibling qnode_on_parents and the parent's
  	 * qnode_on_self.
  	 */
  	struct throtl_qnode qnode_on_self[2];
  	struct throtl_qnode qnode_on_parent[2];
  
  	/*
e43473b7f   Vivek Goyal   blkio: Core imple...
122
123
124
125
126
  	 * Dispatch time in jiffies. This is the estimated time when group
  	 * will unthrottle and is ready to dispatch more bio. It is used as
  	 * key to sort active groups in service tree.
  	 */
  	unsigned long disptime;
e43473b7f   Vivek Goyal   blkio: Core imple...
127
  	unsigned int flags;
693e751e7   Tejun Heo   blk-throttle: imp...
128
129
  	/* are there any throtl rules between this group and td? */
  	bool has_rules[2];
cd5ab1b0f   Shaohua Li   blk-throttle: add...
130
  	/* internally used bytes per second rate limits */
9f626e372   Shaohua Li   blk-throttle: pre...
131
  	uint64_t bps[2][LIMIT_CNT];
cd5ab1b0f   Shaohua Li   blk-throttle: add...
132
133
  	/* user configured bps limits */
  	uint64_t bps_conf[2][LIMIT_CNT];
e43473b7f   Vivek Goyal   blkio: Core imple...
134

cd5ab1b0f   Shaohua Li   blk-throttle: add...
135
  	/* internally used IOPS limits */
9f626e372   Shaohua Li   blk-throttle: pre...
136
  	unsigned int iops[2][LIMIT_CNT];
cd5ab1b0f   Shaohua Li   blk-throttle: add...
137
138
  	/* user configured IOPS limits */
  	unsigned int iops_conf[2][LIMIT_CNT];
8e89d13f4   Vivek Goyal   blkio: Implementa...
139

b53b072c4   Baolin Wang   blk-throttle: Fix...
140
  	/* Number of bytes dispatched in current slice */
e43473b7f   Vivek Goyal   blkio: Core imple...
141
  	uint64_t bytes_disp[2];
8e89d13f4   Vivek Goyal   blkio: Implementa...
142
143
  	/* Number of bio's dispatched in current slice */
  	unsigned int io_disp[2];
e43473b7f   Vivek Goyal   blkio: Core imple...
144

3f0abd806   Shaohua Li   blk-throttle: add...
145
146
147
148
149
150
  	unsigned long last_low_overflow_time[2];
  
  	uint64_t last_bytes_disp[2];
  	unsigned int last_io_disp[2];
  
  	unsigned long last_check_time;
ec80991d6   Shaohua Li   blk-throttle: add...
151
  	unsigned long latency_target; /* us */
5b81fc3cc   Shaohua Li   blk-throttle: add...
152
  	unsigned long latency_target_conf; /* us */
e43473b7f   Vivek Goyal   blkio: Core imple...
153
154
155
  	/* When did we start a new slice */
  	unsigned long slice_start[2];
  	unsigned long slice_end[2];
9e234eeaf   Shaohua Li   blk-throttle: add...
156
157
158
159
160
  
  	unsigned long last_finish_time; /* ns / 1024 */
  	unsigned long checked_last_finish_time; /* ns / 1024 */
  	unsigned long avg_idletime; /* ns / 1024 */
  	unsigned long idletime_threshold; /* us */
5b81fc3cc   Shaohua Li   blk-throttle: add...
161
  	unsigned long idletime_threshold_conf; /* us */
53696b8d2   Shaohua Li   blk-throttle: add...
162
163
164
165
  
  	unsigned int bio_cnt; /* total bios */
  	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
  	unsigned long bio_cnt_reset_time;
7ca464383   Tejun Heo   blk-throtl: stop ...
166
167
168
  
  	struct blkg_rwstat stat_bytes;
  	struct blkg_rwstat stat_ios;
e43473b7f   Vivek Goyal   blkio: Core imple...
169
  };
b9147dd1b   Shaohua Li   blk-throttle: add...
170
171
172
173
174
175
176
177
178
179
180
181
  /* We measure latency for request size from <= 4k to >= 1M */
  #define LATENCY_BUCKET_SIZE 9
  
  struct latency_bucket {
  	unsigned long total_latency; /* ns / 1024 */
  	int samples;
  };
  
  struct avg_latency_bucket {
  	unsigned long latency; /* ns / 1024 */
  	bool valid;
  };
e43473b7f   Vivek Goyal   blkio: Core imple...
182
183
  struct throtl_data
  {
e43473b7f   Vivek Goyal   blkio: Core imple...
184
  	/* service tree for active throtl groups */
c9e0332e8   Tejun Heo   blk-throttle: ren...
185
  	struct throtl_service_queue service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
186

e43473b7f   Vivek Goyal   blkio: Core imple...
187
188
189
190
  	struct request_queue *queue;
  
  	/* Total Number of queued bios on READ and WRITE lists */
  	unsigned int nr_queued[2];
297e3d854   Shaohua Li   blk-throttle: mak...
191
  	unsigned int throtl_slice;
e43473b7f   Vivek Goyal   blkio: Core imple...
192
  	/* Work for dispatching throttled bios */
69df0ab03   Tejun Heo   blk-throttle: sep...
193
  	struct work_struct dispatch_work;
9f626e372   Shaohua Li   blk-throttle: pre...
194
195
  	unsigned int limit_index;
  	bool limit_valid[LIMIT_CNT];
3f0abd806   Shaohua Li   blk-throttle: add...
196
197
198
  
  	unsigned long low_upgrade_time;
  	unsigned long low_downgrade_time;
7394e31fa   Shaohua Li   blk-throttle: mak...
199
200
  
  	unsigned int scale;
b9147dd1b   Shaohua Li   blk-throttle: add...
201

b889bf66d   Joseph Qi   blk-throttle: tra...
202
203
204
  	struct latency_bucket tmp_buckets[2][LATENCY_BUCKET_SIZE];
  	struct avg_latency_bucket avg_buckets[2][LATENCY_BUCKET_SIZE];
  	struct latency_bucket __percpu *latency_buckets[2];
b9147dd1b   Shaohua Li   blk-throttle: add...
205
  	unsigned long last_calculate_time;
6679a90c4   Shaohua Li   blk-throttle: set...
206
  	unsigned long filtered_latency;
b9147dd1b   Shaohua Li   blk-throttle: add...
207
208
  
  	bool track_bio_latency;
e43473b7f   Vivek Goyal   blkio: Core imple...
209
  };
e99e88a9d   Kees Cook   treewide: setup_t...
210
  static void throtl_pending_timer_fn(struct timer_list *t);
69df0ab03   Tejun Heo   blk-throttle: sep...
211

f95a04afa   Tejun Heo   blkcg: embed stru...
212
213
214
215
  static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
  {
  	return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
  }
3c798398e   Tejun Heo   blkcg: mass renam...
216
  static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
217
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
218
  	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
0381411e4   Tejun Heo   blkcg: let blkcg ...
219
  }
3c798398e   Tejun Heo   blkcg: mass renam...
220
  static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
221
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
222
  	return pd_to_blkg(&tg->pd);
0381411e4   Tejun Heo   blkcg: let blkcg ...
223
  }
fda6f272c   Tejun Heo   blk-throttle: imp...
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
  /**
   * sq_to_tg - return the throl_grp the specified service queue belongs to
   * @sq: the throtl_service_queue of interest
   *
   * Return the throtl_grp @sq belongs to.  If @sq is the top-level one
   * embedded in throtl_data, %NULL is returned.
   */
  static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq)
  {
  	if (sq && sq->parent_sq)
  		return container_of(sq, struct throtl_grp, service_queue);
  	else
  		return NULL;
  }
  
  /**
   * sq_to_td - return throtl_data the specified service queue belongs to
   * @sq: the throtl_service_queue of interest
   *
b43daedc0   Masahiro Yamada   scripts/spelling....
243
   * A service_queue can be embedded in either a throtl_grp or throtl_data.
fda6f272c   Tejun Heo   blk-throttle: imp...
244
245
246
247
248
249
250
251
252
253
254
   * Determine the associated throtl_data accordingly and return it.
   */
  static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
  {
  	struct throtl_grp *tg = sq_to_tg(sq);
  
  	if (tg)
  		return tg->td;
  	else
  		return container_of(sq, struct throtl_data, service_queue);
  }
7394e31fa   Shaohua Li   blk-throttle: mak...
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
  /*
   * cgroup's limit in LIMIT_MAX is scaled if low limit is set. This scale is to
   * make the IO dispatch more smooth.
   * Scale up: linearly scale up according to lapsed time since upgrade. For
   *           every throtl_slice, the limit scales up 1/2 .low limit till the
   *           limit hits .max limit
   * Scale down: exponentially scale down if a cgroup doesn't hit its .low limit
   */
  static uint64_t throtl_adjusted_limit(uint64_t low, struct throtl_data *td)
  {
  	/* arbitrary value to avoid too big scale */
  	if (td->scale < 4096 && time_after_eq(jiffies,
  	    td->low_upgrade_time + td->scale * td->throtl_slice))
  		td->scale = (jiffies - td->low_upgrade_time) / td->throtl_slice;
  
  	return low + (low >> 1) * td->scale;
  }
9f626e372   Shaohua Li   blk-throttle: pre...
272
273
  static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
  {
b22c417c8   Shaohua Li   blk-throttle: con...
274
  	struct blkcg_gq *blkg = tg_to_blkg(tg);
7394e31fa   Shaohua Li   blk-throttle: mak...
275
  	struct throtl_data *td;
b22c417c8   Shaohua Li   blk-throttle: con...
276
277
278
279
  	uint64_t ret;
  
  	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
  		return U64_MAX;
7394e31fa   Shaohua Li   blk-throttle: mak...
280
281
282
  
  	td = tg->td;
  	ret = tg->bps[rw][td->limit_index];
9bb67aeb9   Shaohua Li   blk-throttle: res...
283
284
285
286
287
288
289
290
  	if (ret == 0 && td->limit_index == LIMIT_LOW) {
  		/* intermediate node or iops isn't 0 */
  		if (!list_empty(&blkg->blkcg->css.children) ||
  		    tg->iops[rw][td->limit_index])
  			return U64_MAX;
  		else
  			return MIN_THROTL_BPS;
  	}
7394e31fa   Shaohua Li   blk-throttle: mak...
291
292
293
294
295
296
297
298
  
  	if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
  	    tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
  		uint64_t adjusted;
  
  		adjusted = throtl_adjusted_limit(tg->bps[rw][LIMIT_LOW], td);
  		ret = min(tg->bps[rw][LIMIT_MAX], adjusted);
  	}
b22c417c8   Shaohua Li   blk-throttle: con...
299
  	return ret;
9f626e372   Shaohua Li   blk-throttle: pre...
300
301
302
303
  }
  
  static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
  {
b22c417c8   Shaohua Li   blk-throttle: con...
304
  	struct blkcg_gq *blkg = tg_to_blkg(tg);
7394e31fa   Shaohua Li   blk-throttle: mak...
305
  	struct throtl_data *td;
b22c417c8   Shaohua Li   blk-throttle: con...
306
307
308
309
  	unsigned int ret;
  
  	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
  		return UINT_MAX;
9bb67aeb9   Shaohua Li   blk-throttle: res...
310

7394e31fa   Shaohua Li   blk-throttle: mak...
311
312
  	td = tg->td;
  	ret = tg->iops[rw][td->limit_index];
9bb67aeb9   Shaohua Li   blk-throttle: res...
313
314
315
316
317
318
319
320
  	if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
  		/* intermediate node or bps isn't 0 */
  		if (!list_empty(&blkg->blkcg->css.children) ||
  		    tg->bps[rw][td->limit_index])
  			return UINT_MAX;
  		else
  			return MIN_THROTL_IOPS;
  	}
7394e31fa   Shaohua Li   blk-throttle: mak...
321
322
323
324
325
326
327
328
329
330
  
  	if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
  	    tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
  		uint64_t adjusted;
  
  		adjusted = throtl_adjusted_limit(tg->iops[rw][LIMIT_LOW], td);
  		if (adjusted > UINT_MAX)
  			adjusted = UINT_MAX;
  		ret = min_t(unsigned int, tg->iops[rw][LIMIT_MAX], adjusted);
  	}
b22c417c8   Shaohua Li   blk-throttle: con...
331
  	return ret;
9f626e372   Shaohua Li   blk-throttle: pre...
332
  }
b9147dd1b   Shaohua Li   blk-throttle: add...
333
334
  #define request_bucket_index(sectors) \
  	clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
fda6f272c   Tejun Heo   blk-throttle: imp...
335
336
337
338
339
340
341
342
  /**
   * throtl_log - log debug message via blktrace
   * @sq: the service_queue being reported
   * @fmt: printf format string
   * @args: printf args
   *
   * The messages are prefixed with "throtl BLKG_NAME" if @sq belongs to a
   * throtl_grp; otherwise, just "throtl".
fda6f272c   Tejun Heo   blk-throttle: imp...
343
344
345
346
347
348
   */
  #define throtl_log(sq, fmt, args...)	do {				\
  	struct throtl_grp *__tg = sq_to_tg((sq));			\
  	struct throtl_data *__td = sq_to_td((sq));			\
  									\
  	(void)__td;							\
59fa0224c   Shaohua Li   blk-throttle: don...
349
350
  	if (likely(!blk_trace_note_message_enabled(__td->queue)))	\
  		break;							\
fda6f272c   Tejun Heo   blk-throttle: imp...
351
  	if ((__tg)) {							\
35fe6d763   Shaohua Li   block: use standa...
352
353
  		blk_add_cgroup_trace_msg(__td->queue,			\
  			tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
fda6f272c   Tejun Heo   blk-throttle: imp...
354
355
356
  	} else {							\
  		blk_add_trace_msg(__td->queue, "throtl " fmt, ##args);	\
  	}								\
54e7ed12b   Tejun Heo   blkcg: remove blk...
357
  } while (0)
e43473b7f   Vivek Goyal   blkio: Core imple...
358

ea0ea2bc6   Shaohua Li   blk-throttle: cap...
359
360
361
362
363
364
365
  static inline unsigned int throtl_bio_data_size(struct bio *bio)
  {
  	/* assume it's one sector */
  	if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
  		return 512;
  	return bio->bi_iter.bi_size;
  }
c5cc2070b   Tejun Heo   blk-throttle: add...
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
  static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
  {
  	INIT_LIST_HEAD(&qn->node);
  	bio_list_init(&qn->bios);
  	qn->tg = tg;
  }
  
  /**
   * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it
   * @bio: bio being added
   * @qn: qnode to add bio to
   * @queued: the service_queue->queued[] list @qn belongs to
   *
   * Add @bio to @qn and put @qn on @queued if it's not already on.
   * @qn->tg's reference count is bumped when @qn is activated.  See the
   * comment on top of throtl_qnode definition for details.
   */
  static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
  				 struct list_head *queued)
  {
  	bio_list_add(&qn->bios, bio);
  	if (list_empty(&qn->node)) {
  		list_add_tail(&qn->node, queued);
  		blkg_get(tg_to_blkg(qn->tg));
  	}
  }
  
  /**
   * throtl_peek_queued - peek the first bio on a qnode list
   * @queued: the qnode list to peek
   */
  static struct bio *throtl_peek_queued(struct list_head *queued)
  {
b7b609de5   Baolin Wang   blk-throttle: Mov...
399
  	struct throtl_qnode *qn;
c5cc2070b   Tejun Heo   blk-throttle: add...
400
401
402
403
  	struct bio *bio;
  
  	if (list_empty(queued))
  		return NULL;
b7b609de5   Baolin Wang   blk-throttle: Mov...
404
  	qn = list_first_entry(queued, struct throtl_qnode, node);
c5cc2070b   Tejun Heo   blk-throttle: add...
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
  	bio = bio_list_peek(&qn->bios);
  	WARN_ON_ONCE(!bio);
  	return bio;
  }
  
  /**
   * throtl_pop_queued - pop the first bio form a qnode list
   * @queued: the qnode list to pop a bio from
   * @tg_to_put: optional out argument for throtl_grp to put
   *
   * Pop the first bio from the qnode list @queued.  After popping, the first
   * qnode is removed from @queued if empty or moved to the end of @queued so
   * that the popping order is round-robin.
   *
   * When the first qnode is removed, its associated throtl_grp should be put
   * too.  If @tg_to_put is NULL, this function automatically puts it;
   * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is
   * responsible for putting it.
   */
  static struct bio *throtl_pop_queued(struct list_head *queued,
  				     struct throtl_grp **tg_to_put)
  {
b7b609de5   Baolin Wang   blk-throttle: Mov...
427
  	struct throtl_qnode *qn;
c5cc2070b   Tejun Heo   blk-throttle: add...
428
429
430
431
  	struct bio *bio;
  
  	if (list_empty(queued))
  		return NULL;
b7b609de5   Baolin Wang   blk-throttle: Mov...
432
  	qn = list_first_entry(queued, struct throtl_qnode, node);
c5cc2070b   Tejun Heo   blk-throttle: add...
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
  	bio = bio_list_pop(&qn->bios);
  	WARN_ON_ONCE(!bio);
  
  	if (bio_list_empty(&qn->bios)) {
  		list_del_init(&qn->node);
  		if (tg_to_put)
  			*tg_to_put = qn->tg;
  		else
  			blkg_put(tg_to_blkg(qn->tg));
  	} else {
  		list_move_tail(&qn->node, queued);
  	}
  
  	return bio;
  }
49a2f1e3f   Tejun Heo   blk-throttle: add...
448
  /* init a service_queue, assumes the caller zeroed it */
b2ce2643c   Tejun Heo   blk-throttle: cle...
449
  static void throtl_service_queue_init(struct throtl_service_queue *sq)
49a2f1e3f   Tejun Heo   blk-throttle: add...
450
  {
c5cc2070b   Tejun Heo   blk-throttle: add...
451
452
  	INIT_LIST_HEAD(&sq->queued[0]);
  	INIT_LIST_HEAD(&sq->queued[1]);
9ff01255a   Liu Bo   Blk-throttle: upd...
453
  	sq->pending_tree = RB_ROOT_CACHED;
e99e88a9d   Kees Cook   treewide: setup_t...
454
  	timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
69df0ab03   Tejun Heo   blk-throttle: sep...
455
  }
cf09a8ee1   Tejun Heo   blkcg: pass @q an...
456
457
458
  static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
  						struct request_queue *q,
  						struct blkcg *blkcg)
001bea73e   Tejun Heo   blkcg: replace bl...
459
  {
4fb72036f   Tejun Heo   blk-throttle: rem...
460
  	struct throtl_grp *tg;
24bdb8ef0   Tejun Heo   blkcg: make blkcg...
461
  	int rw;
4fb72036f   Tejun Heo   blk-throttle: rem...
462

cf09a8ee1   Tejun Heo   blkcg: pass @q an...
463
  	tg = kzalloc_node(sizeof(*tg), gfp, q->node);
4fb72036f   Tejun Heo   blk-throttle: rem...
464
  	if (!tg)
77ea73388   Tejun Heo   blkcg: move io_se...
465
  		return NULL;
4fb72036f   Tejun Heo   blk-throttle: rem...
466

7ca464383   Tejun Heo   blk-throtl: stop ...
467
468
469
470
471
  	if (blkg_rwstat_init(&tg->stat_bytes, gfp))
  		goto err_free_tg;
  
  	if (blkg_rwstat_init(&tg->stat_ios, gfp))
  		goto err_exit_stat_bytes;
b2ce2643c   Tejun Heo   blk-throttle: cle...
472
473
474
475
476
477
478
479
  	throtl_service_queue_init(&tg->service_queue);
  
  	for (rw = READ; rw <= WRITE; rw++) {
  		throtl_qnode_init(&tg->qnode_on_self[rw], tg);
  		throtl_qnode_init(&tg->qnode_on_parent[rw], tg);
  	}
  
  	RB_CLEAR_NODE(&tg->rb_node);
9f626e372   Shaohua Li   blk-throttle: pre...
480
481
482
483
  	tg->bps[READ][LIMIT_MAX] = U64_MAX;
  	tg->bps[WRITE][LIMIT_MAX] = U64_MAX;
  	tg->iops[READ][LIMIT_MAX] = UINT_MAX;
  	tg->iops[WRITE][LIMIT_MAX] = UINT_MAX;
cd5ab1b0f   Shaohua Li   blk-throttle: add...
484
485
486
487
488
  	tg->bps_conf[READ][LIMIT_MAX] = U64_MAX;
  	tg->bps_conf[WRITE][LIMIT_MAX] = U64_MAX;
  	tg->iops_conf[READ][LIMIT_MAX] = UINT_MAX;
  	tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;
  	/* LIMIT_LOW will have default value 0 */
b2ce2643c   Tejun Heo   blk-throttle: cle...
489

ec80991d6   Shaohua Li   blk-throttle: add...
490
  	tg->latency_target = DFL_LATENCY_TARGET;
5b81fc3cc   Shaohua Li   blk-throttle: add...
491
  	tg->latency_target_conf = DFL_LATENCY_TARGET;
b4f428ef2   Shaohua Li   blk-throttle: for...
492
493
  	tg->idletime_threshold = DFL_IDLE_THRESHOLD;
  	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
ec80991d6   Shaohua Li   blk-throttle: add...
494

4fb72036f   Tejun Heo   blk-throttle: rem...
495
  	return &tg->pd;
7ca464383   Tejun Heo   blk-throtl: stop ...
496
497
498
499
500
501
  
  err_exit_stat_bytes:
  	blkg_rwstat_exit(&tg->stat_bytes);
  err_free_tg:
  	kfree(tg);
  	return NULL;
001bea73e   Tejun Heo   blkcg: replace bl...
502
  }
a9520cd6f   Tejun Heo   blkcg: make blkcg...
503
  static void throtl_pd_init(struct blkg_policy_data *pd)
a29a171e7   Vivek Goyal   blk-throttle: Do ...
504
  {
a9520cd6f   Tejun Heo   blkcg: make blkcg...
505
506
  	struct throtl_grp *tg = pd_to_tg(pd);
  	struct blkcg_gq *blkg = tg_to_blkg(tg);
77216b048   Tejun Heo   blk-throttle: add...
507
  	struct throtl_data *td = blkg->q->td;
b2ce2643c   Tejun Heo   blk-throttle: cle...
508
  	struct throtl_service_queue *sq = &tg->service_queue;
cd1604fab   Tejun Heo   blkcg: factor out...
509

9138125be   Tejun Heo   blk-throttle: imp...
510
  	/*
aa6ec29be   Tejun Heo   cgroup: remove sa...
511
  	 * If on the default hierarchy, we switch to properly hierarchical
9138125be   Tejun Heo   blk-throttle: imp...
512
513
514
515
516
  	 * behavior where limits on a given throtl_grp are applied to the
  	 * whole subtree rather than just the group itself.  e.g. If 16M
  	 * read_bps limit is set on the root group, the whole system can't
  	 * exceed 16M for the device.
  	 *
aa6ec29be   Tejun Heo   cgroup: remove sa...
517
  	 * If not on the default hierarchy, the broken flat hierarchy
9138125be   Tejun Heo   blk-throttle: imp...
518
519
520
521
522
  	 * behavior is retained where all throtl_grps are treated as if
  	 * they're all separate root groups right below throtl_data.
  	 * Limits of a group don't interact with limits of other groups
  	 * regardless of the position of the group in the hierarchy.
  	 */
b2ce2643c   Tejun Heo   blk-throttle: cle...
523
  	sq->parent_sq = &td->service_queue;
9e10a130d   Tejun Heo   cgroup: replace c...
524
  	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
b2ce2643c   Tejun Heo   blk-throttle: cle...
525
  		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
77216b048   Tejun Heo   blk-throttle: add...
526
  	tg->td = td;
8a3d26151   Tejun Heo   blkcg: move blkio...
527
  }
693e751e7   Tejun Heo   blk-throttle: imp...
528
529
530
531
532
533
534
535
  /*
   * Set has_rules[] if @tg or any of its parents have limits configured.
   * This doesn't require walking up to the top of the hierarchy as the
   * parent's has_rules[] is guaranteed to be correct.
   */
  static void tg_update_has_rules(struct throtl_grp *tg)
  {
  	struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
9f626e372   Shaohua Li   blk-throttle: pre...
536
  	struct throtl_data *td = tg->td;
693e751e7   Tejun Heo   blk-throttle: imp...
537
538
539
540
  	int rw;
  
  	for (rw = READ; rw <= WRITE; rw++)
  		tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
9f626e372   Shaohua Li   blk-throttle: pre...
541
542
543
  			(td->limit_valid[td->limit_index] &&
  			 (tg_bps_limit(tg, rw) != U64_MAX ||
  			  tg_iops_limit(tg, rw) != UINT_MAX));
693e751e7   Tejun Heo   blk-throttle: imp...
544
  }
a9520cd6f   Tejun Heo   blkcg: make blkcg...
545
  static void throtl_pd_online(struct blkg_policy_data *pd)
693e751e7   Tejun Heo   blk-throttle: imp...
546
  {
aec242468   Shaohua Li   blk-throttle: det...
547
  	struct throtl_grp *tg = pd_to_tg(pd);
693e751e7   Tejun Heo   blk-throttle: imp...
548
549
550
551
  	/*
  	 * We don't want new groups to escape the limits of its ancestors.
  	 * Update has_rules[] after a new group is brought online.
  	 */
aec242468   Shaohua Li   blk-throttle: det...
552
  	tg_update_has_rules(tg);
693e751e7   Tejun Heo   blk-throttle: imp...
553
  }
cd5ab1b0f   Shaohua Li   blk-throttle: add...
554
555
556
557
558
559
560
561
562
563
564
  static void blk_throtl_update_limit_valid(struct throtl_data *td)
  {
  	struct cgroup_subsys_state *pos_css;
  	struct blkcg_gq *blkg;
  	bool low_valid = false;
  
  	rcu_read_lock();
  	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
  		struct throtl_grp *tg = blkg_to_tg(blkg);
  
  		if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||
43ada7878   Liu Bo   Block: blk-thrott...
565
  		    tg->iops[READ][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) {
cd5ab1b0f   Shaohua Li   blk-throttle: add...
566
  			low_valid = true;
43ada7878   Liu Bo   Block: blk-thrott...
567
568
  			break;
  		}
cd5ab1b0f   Shaohua Li   blk-throttle: add...
569
570
571
572
573
  	}
  	rcu_read_unlock();
  
  	td->limit_valid[LIMIT_LOW] = low_valid;
  }
c79892c55   Shaohua Li   blk-throttle: add...
574
  static void throtl_upgrade_state(struct throtl_data *td);
cd5ab1b0f   Shaohua Li   blk-throttle: add...
575
576
577
578
579
580
581
582
583
584
  static void throtl_pd_offline(struct blkg_policy_data *pd)
  {
  	struct throtl_grp *tg = pd_to_tg(pd);
  
  	tg->bps[READ][LIMIT_LOW] = 0;
  	tg->bps[WRITE][LIMIT_LOW] = 0;
  	tg->iops[READ][LIMIT_LOW] = 0;
  	tg->iops[WRITE][LIMIT_LOW] = 0;
  
  	blk_throtl_update_limit_valid(tg->td);
c79892c55   Shaohua Li   blk-throttle: add...
585
586
  	if (!tg->td->limit_valid[tg->td->limit_index])
  		throtl_upgrade_state(tg->td);
cd5ab1b0f   Shaohua Li   blk-throttle: add...
587
  }
001bea73e   Tejun Heo   blkcg: replace bl...
588
589
  static void throtl_pd_free(struct blkg_policy_data *pd)
  {
4fb72036f   Tejun Heo   blk-throttle: rem...
590
  	struct throtl_grp *tg = pd_to_tg(pd);
b2ce2643c   Tejun Heo   blk-throttle: cle...
591
  	del_timer_sync(&tg->service_queue.pending_timer);
7ca464383   Tejun Heo   blk-throtl: stop ...
592
593
  	blkg_rwstat_exit(&tg->stat_bytes);
  	blkg_rwstat_exit(&tg->stat_ios);
4fb72036f   Tejun Heo   blk-throttle: rem...
594
  	kfree(tg);
001bea73e   Tejun Heo   blkcg: replace bl...
595
  }
0049af73b   Tejun Heo   blk-throttle: reo...
596
597
  static struct throtl_grp *
  throtl_rb_first(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
598
  {
9ff01255a   Liu Bo   Blk-throttle: upd...
599
  	struct rb_node *n;
e43473b7f   Vivek Goyal   blkio: Core imple...
600

9ff01255a   Liu Bo   Blk-throttle: upd...
601
602
603
604
605
  	n = rb_first_cached(&parent_sq->pending_tree);
  	WARN_ON_ONCE(!n);
  	if (!n)
  		return NULL;
  	return rb_entry_tg(n);
e43473b7f   Vivek Goyal   blkio: Core imple...
606
  }
0049af73b   Tejun Heo   blk-throttle: reo...
607
608
  static void throtl_rb_erase(struct rb_node *n,
  			    struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
609
  {
9ff01255a   Liu Bo   Blk-throttle: upd...
610
611
  	rb_erase_cached(n, &parent_sq->pending_tree);
  	RB_CLEAR_NODE(n);
0049af73b   Tejun Heo   blk-throttle: reo...
612
  	--parent_sq->nr_pending;
e43473b7f   Vivek Goyal   blkio: Core imple...
613
  }
0049af73b   Tejun Heo   blk-throttle: reo...
614
  static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
615
616
  {
  	struct throtl_grp *tg;
0049af73b   Tejun Heo   blk-throttle: reo...
617
  	tg = throtl_rb_first(parent_sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
618
619
  	if (!tg)
  		return;
0049af73b   Tejun Heo   blk-throttle: reo...
620
  	parent_sq->first_pending_disptime = tg->disptime;
e43473b7f   Vivek Goyal   blkio: Core imple...
621
  }
77216b048   Tejun Heo   blk-throttle: add...
622
  static void tg_service_queue_add(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
623
  {
77216b048   Tejun Heo   blk-throttle: add...
624
  	struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq;
9ff01255a   Liu Bo   Blk-throttle: upd...
625
  	struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node;
e43473b7f   Vivek Goyal   blkio: Core imple...
626
627
628
  	struct rb_node *parent = NULL;
  	struct throtl_grp *__tg;
  	unsigned long key = tg->disptime;
9ff01255a   Liu Bo   Blk-throttle: upd...
629
  	bool leftmost = true;
e43473b7f   Vivek Goyal   blkio: Core imple...
630
631
632
633
634
635
636
637
638
  
  	while (*node != NULL) {
  		parent = *node;
  		__tg = rb_entry_tg(parent);
  
  		if (time_before(key, __tg->disptime))
  			node = &parent->rb_left;
  		else {
  			node = &parent->rb_right;
9ff01255a   Liu Bo   Blk-throttle: upd...
639
  			leftmost = false;
e43473b7f   Vivek Goyal   blkio: Core imple...
640
641
  		}
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
642
  	rb_link_node(&tg->rb_node, parent, node);
9ff01255a   Liu Bo   Blk-throttle: upd...
643
644
  	rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree,
  			       leftmost);
e43473b7f   Vivek Goyal   blkio: Core imple...
645
  }
77216b048   Tejun Heo   blk-throttle: add...
646
  static void throtl_enqueue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
647
  {
29379674b   Baolin Wang   blk-throttle: Ope...
648
649
650
651
652
  	if (!(tg->flags & THROTL_TG_PENDING)) {
  		tg_service_queue_add(tg);
  		tg->flags |= THROTL_TG_PENDING;
  		tg->service_queue.parent_sq->nr_pending++;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
653
  }
77216b048   Tejun Heo   blk-throttle: add...
654
  static void throtl_dequeue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
655
  {
29379674b   Baolin Wang   blk-throttle: Ope...
656
657
658
659
  	if (tg->flags & THROTL_TG_PENDING) {
  		throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
  		tg->flags &= ~THROTL_TG_PENDING;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
660
  }
a9131a27e   Tejun Heo   blk-throttle: rel...
661
  /* Call with queue lock held */
69df0ab03   Tejun Heo   blk-throttle: sep...
662
663
  static void throtl_schedule_pending_timer(struct throtl_service_queue *sq,
  					  unsigned long expires)
a9131a27e   Tejun Heo   blk-throttle: rel...
664
  {
a41b816c1   Joseph Qi   blk-throttle: fix...
665
  	unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice;
06cceedcc   Shaohua Li   blk-throttle: mak...
666
667
668
669
670
671
672
673
674
675
  
  	/*
  	 * Since we are adjusting the throttle limit dynamically, the sleep
  	 * time calculated according to previous limit might be invalid. It's
  	 * possible the cgroup sleep time is very long and no other cgroups
  	 * have IO running so notify the limit changes. Make sure the cgroup
  	 * doesn't sleep too long to avoid the missed notification.
  	 */
  	if (time_after(expires, max_expire))
  		expires = max_expire;
69df0ab03   Tejun Heo   blk-throttle: sep...
676
677
678
  	mod_timer(&sq->pending_timer, expires);
  	throtl_log(sq, "schedule timer. delay=%lu jiffies=%lu",
  		   expires - jiffies, jiffies);
a9131a27e   Tejun Heo   blk-throttle: rel...
679
  }
7f52f98c2   Tejun Heo   blk-throttle: imp...
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
  /**
   * throtl_schedule_next_dispatch - schedule the next dispatch cycle
   * @sq: the service_queue to schedule dispatch for
   * @force: force scheduling
   *
   * Arm @sq->pending_timer so that the next dispatch cycle starts on the
   * dispatch time of the first pending child.  Returns %true if either timer
   * is armed or there's no pending child left.  %false if the current
   * dispatch window is still open and the caller should continue
   * dispatching.
   *
   * If @force is %true, the dispatch timer is always scheduled and this
   * function is guaranteed to return %true.  This is to be used when the
   * caller can't dispatch itself and needs to invoke pending_timer
   * unconditionally.  Note that forced scheduling is likely to induce short
   * delay before dispatch starts even if @sq->first_pending_disptime is not
   * in the future and thus shouldn't be used in hot paths.
   */
  static bool throtl_schedule_next_dispatch(struct throtl_service_queue *sq,
  					  bool force)
e43473b7f   Vivek Goyal   blkio: Core imple...
700
  {
6a525600f   Tejun Heo   blk-throttle: rem...
701
  	/* any pending children left? */
c9e0332e8   Tejun Heo   blk-throttle: ren...
702
  	if (!sq->nr_pending)
7f52f98c2   Tejun Heo   blk-throttle: imp...
703
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
704

c9e0332e8   Tejun Heo   blk-throttle: ren...
705
  	update_min_dispatch_time(sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
706

69df0ab03   Tejun Heo   blk-throttle: sep...
707
  	/* is the next dispatch time in the future? */
7f52f98c2   Tejun Heo   blk-throttle: imp...
708
  	if (force || time_after(sq->first_pending_disptime, jiffies)) {
69df0ab03   Tejun Heo   blk-throttle: sep...
709
  		throtl_schedule_pending_timer(sq, sq->first_pending_disptime);
7f52f98c2   Tejun Heo   blk-throttle: imp...
710
  		return true;
69df0ab03   Tejun Heo   blk-throttle: sep...
711
  	}
7f52f98c2   Tejun Heo   blk-throttle: imp...
712
713
  	/* tell the caller to continue dispatching */
  	return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
714
  }
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
715
716
717
718
719
720
721
722
723
724
725
726
727
728
  static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
  		bool rw, unsigned long start)
  {
  	tg->bytes_disp[rw] = 0;
  	tg->io_disp[rw] = 0;
  
  	/*
  	 * Previous slice has expired. We must have trimmed it after last
  	 * bio dispatch. That means since start of last slice, we never used
  	 * that bandwidth. Do try to make use of that bandwidth while giving
  	 * credit.
  	 */
  	if (time_after_eq(start, tg->slice_start[rw]))
  		tg->slice_start[rw] = start;
297e3d854   Shaohua Li   blk-throttle: mak...
729
  	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
730
731
732
733
734
  	throtl_log(&tg->service_queue,
  		   "[%c] new slice with credit start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
  }
0f3457f60   Tejun Heo   blk-throttle: add...
735
  static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
736
737
  {
  	tg->bytes_disp[rw] = 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
738
  	tg->io_disp[rw] = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
739
  	tg->slice_start[rw] = jiffies;
297e3d854   Shaohua Li   blk-throttle: mak...
740
  	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
fda6f272c   Tejun Heo   blk-throttle: imp...
741
742
743
744
  	throtl_log(&tg->service_queue,
  		   "[%c] new slice start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
745
  }
0f3457f60   Tejun Heo   blk-throttle: add...
746
747
  static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
  					unsigned long jiffy_end)
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
748
  {
297e3d854   Shaohua Li   blk-throttle: mak...
749
  	tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice);
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
750
  }
0f3457f60   Tejun Heo   blk-throttle: add...
751
752
  static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
  				       unsigned long jiffy_end)
e43473b7f   Vivek Goyal   blkio: Core imple...
753
  {
1da30f952   Baolin Wang   blk-throttle: Re-...
754
  	throtl_set_slice_end(tg, rw, jiffy_end);
fda6f272c   Tejun Heo   blk-throttle: imp...
755
756
757
758
  	throtl_log(&tg->service_queue,
  		   "[%c] extend slice start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
759
760
761
  }
  
  /* Determine if previously allocated or extended slice is complete or not */
0f3457f60   Tejun Heo   blk-throttle: add...
762
  static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
763
764
  {
  	if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
5cf8c2277   Fabian Frederick   block/blk-throttl...
765
  		return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
766

0b6bad7d6   Chengguang Xu   blk-throttle: ret...
767
  	return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
768
769
770
  }
  
  /* Trim the used slices and adjust slice start accordingly */
0f3457f60   Tejun Heo   blk-throttle: add...
771
  static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
772
  {
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
773
774
  	unsigned long nr_slices, time_elapsed, io_trim;
  	u64 bytes_trim, tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
775
776
777
778
779
780
781
782
  
  	BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
  
  	/*
  	 * If bps are unlimited (-1), then time slice don't get
  	 * renewed. Don't try to trim the slice if slice is used. A new
  	 * slice will start when appropriate.
  	 */
0f3457f60   Tejun Heo   blk-throttle: add...
783
  	if (throtl_slice_used(tg, rw))
e43473b7f   Vivek Goyal   blkio: Core imple...
784
  		return;
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
785
786
787
  	/*
  	 * A bio has been dispatched. Also adjust slice_end. It might happen
  	 * that initially cgroup limit was very low resulting in high
b53b072c4   Baolin Wang   blk-throttle: Fix...
788
  	 * slice_end, but later limit was bumped up and bio was dispatched
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
789
790
791
  	 * sooner, then we need to reduce slice_end. A high bogus slice_end
  	 * is bad because it does not allow new slice to start.
  	 */
297e3d854   Shaohua Li   blk-throttle: mak...
792
  	throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
793

e43473b7f   Vivek Goyal   blkio: Core imple...
794
  	time_elapsed = jiffies - tg->slice_start[rw];
297e3d854   Shaohua Li   blk-throttle: mak...
795
  	nr_slices = time_elapsed / tg->td->throtl_slice;
e43473b7f   Vivek Goyal   blkio: Core imple...
796
797
798
  
  	if (!nr_slices)
  		return;
297e3d854   Shaohua Li   blk-throttle: mak...
799
  	tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
800
801
  	do_div(tmp, HZ);
  	bytes_trim = tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
802

297e3d854   Shaohua Li   blk-throttle: mak...
803
804
  	io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /
  		HZ;
e43473b7f   Vivek Goyal   blkio: Core imple...
805

8e89d13f4   Vivek Goyal   blkio: Implementa...
806
  	if (!bytes_trim && !io_trim)
e43473b7f   Vivek Goyal   blkio: Core imple...
807
808
809
810
811
812
  		return;
  
  	if (tg->bytes_disp[rw] >= bytes_trim)
  		tg->bytes_disp[rw] -= bytes_trim;
  	else
  		tg->bytes_disp[rw] = 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
813
814
815
816
  	if (tg->io_disp[rw] >= io_trim)
  		tg->io_disp[rw] -= io_trim;
  	else
  		tg->io_disp[rw] = 0;
297e3d854   Shaohua Li   blk-throttle: mak...
817
  	tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;
e43473b7f   Vivek Goyal   blkio: Core imple...
818

fda6f272c   Tejun Heo   blk-throttle: imp...
819
820
821
822
  	throtl_log(&tg->service_queue,
  		   "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
  		   tg->slice_start[rw], tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
823
  }
0f3457f60   Tejun Heo   blk-throttle: add...
824
  static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
4599ea49d   Baolin Wang   blk-throttle: Avo...
825
  				  u32 iops_limit, unsigned long *wait)
e43473b7f   Vivek Goyal   blkio: Core imple...
826
827
  {
  	bool rw = bio_data_dir(bio);
8e89d13f4   Vivek Goyal   blkio: Implementa...
828
  	unsigned int io_allowed;
e43473b7f   Vivek Goyal   blkio: Core imple...
829
  	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
c49c06e49   Vivek Goyal   blkio-throttle: F...
830
  	u64 tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
831

87fbeb881   Baolin Wang   blk-throttle: Avo...
832
833
834
835
836
  	if (iops_limit == UINT_MAX) {
  		if (wait)
  			*wait = 0;
  		return true;
  	}
3a10f999f   Konstantin Khlebnikov   blk-throttle: fix...
837
  	jiffy_elapsed = jiffies - tg->slice_start[rw];
e43473b7f   Vivek Goyal   blkio: Core imple...
838

3a10f999f   Konstantin Khlebnikov   blk-throttle: fix...
839
840
  	/* Round up to the next throttle slice, wait time must be nonzero */
  	jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
8e89d13f4   Vivek Goyal   blkio: Implementa...
841

c49c06e49   Vivek Goyal   blkio-throttle: F...
842
843
844
845
846
847
  	/*
  	 * jiffy_elapsed_rnd should not be a big value as minimum iops can be
  	 * 1 then at max jiffy elapsed should be equivalent of 1 second as we
  	 * will allow dispatch after 1 second and after that slice should
  	 * have been trimmed.
  	 */
4599ea49d   Baolin Wang   blk-throttle: Avo...
848
  	tmp = (u64)iops_limit * jiffy_elapsed_rnd;
c49c06e49   Vivek Goyal   blkio-throttle: F...
849
850
851
852
853
854
  	do_div(tmp, HZ);
  
  	if (tmp > UINT_MAX)
  		io_allowed = UINT_MAX;
  	else
  		io_allowed = tmp;
8e89d13f4   Vivek Goyal   blkio: Implementa...
855
856
  
  	if (tg->io_disp[rw] + 1 <= io_allowed) {
e43473b7f   Vivek Goyal   blkio: Core imple...
857
858
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
859
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
860
  	}
8e89d13f4   Vivek Goyal   blkio: Implementa...
861
  	/* Calc approx time to dispatch */
991f61fe7   Liu Bo   Blk-throttle: red...
862
  	jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
8e89d13f4   Vivek Goyal   blkio: Implementa...
863
864
865
  
  	if (wait)
  		*wait = jiffy_wait;
0b6bad7d6   Chengguang Xu   blk-throttle: ret...
866
  	return false;
8e89d13f4   Vivek Goyal   blkio: Implementa...
867
  }
0f3457f60   Tejun Heo   blk-throttle: add...
868
  static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
4599ea49d   Baolin Wang   blk-throttle: Avo...
869
  				 u64 bps_limit, unsigned long *wait)
8e89d13f4   Vivek Goyal   blkio: Implementa...
870
871
  {
  	bool rw = bio_data_dir(bio);
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
872
  	u64 bytes_allowed, extra_bytes, tmp;
8e89d13f4   Vivek Goyal   blkio: Implementa...
873
  	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
874
  	unsigned int bio_size = throtl_bio_data_size(bio);
e43473b7f   Vivek Goyal   blkio: Core imple...
875

87fbeb881   Baolin Wang   blk-throttle: Avo...
876
877
878
879
880
  	if (bps_limit == U64_MAX) {
  		if (wait)
  			*wait = 0;
  		return true;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
881
882
883
884
  	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
  
  	/* Slice has just started. Consider one slice interval */
  	if (!jiffy_elapsed)
297e3d854   Shaohua Li   blk-throttle: mak...
885
  		jiffy_elapsed_rnd = tg->td->throtl_slice;
e43473b7f   Vivek Goyal   blkio: Core imple...
886

297e3d854   Shaohua Li   blk-throttle: mak...
887
  	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
e43473b7f   Vivek Goyal   blkio: Core imple...
888

4599ea49d   Baolin Wang   blk-throttle: Avo...
889
  	tmp = bps_limit * jiffy_elapsed_rnd;
5e901a2b9   Vivek Goyal   blkio-throttle: T...
890
  	do_div(tmp, HZ);
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
891
  	bytes_allowed = tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
892

ea0ea2bc6   Shaohua Li   blk-throttle: cap...
893
  	if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
e43473b7f   Vivek Goyal   blkio: Core imple...
894
895
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
896
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
897
898
899
  	}
  
  	/* Calc approx time to dispatch */
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
900
  	extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
4599ea49d   Baolin Wang   blk-throttle: Avo...
901
  	jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit);
e43473b7f   Vivek Goyal   blkio: Core imple...
902
903
904
905
906
907
908
909
910
  
  	if (!jiffy_wait)
  		jiffy_wait = 1;
  
  	/*
  	 * This wait time is without taking into consideration the rounding
  	 * up we did. Add that time also.
  	 */
  	jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
e43473b7f   Vivek Goyal   blkio: Core imple...
911
912
  	if (wait)
  		*wait = jiffy_wait;
0b6bad7d6   Chengguang Xu   blk-throttle: ret...
913
  	return false;
8e89d13f4   Vivek Goyal   blkio: Implementa...
914
915
916
917
918
919
  }
  
  /*
   * Returns whether one can dispatch a bio or not. Also returns approx number
   * of jiffies to wait before this bio is with-in IO rate and can be dispatched
   */
0f3457f60   Tejun Heo   blk-throttle: add...
920
921
  static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
  			    unsigned long *wait)
8e89d13f4   Vivek Goyal   blkio: Implementa...
922
923
924
  {
  	bool rw = bio_data_dir(bio);
  	unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
4599ea49d   Baolin Wang   blk-throttle: Avo...
925
926
  	u64 bps_limit = tg_bps_limit(tg, rw);
  	u32 iops_limit = tg_iops_limit(tg, rw);
8e89d13f4   Vivek Goyal   blkio: Implementa...
927
928
929
930
931
932
933
  
  	/*
   	 * Currently whole state machine of group depends on first bio
  	 * queued in the group bio list. So one should not be calling
  	 * this function with a different bio if there are other bios
  	 * queued.
  	 */
73f0d49a9   Tejun Heo   blk-throttle: mov...
934
  	BUG_ON(tg->service_queue.nr_queued[rw] &&
c5cc2070b   Tejun Heo   blk-throttle: add...
935
  	       bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
e43473b7f   Vivek Goyal   blkio: Core imple...
936

8e89d13f4   Vivek Goyal   blkio: Implementa...
937
  	/* If tg->bps = -1, then BW is unlimited */
4599ea49d   Baolin Wang   blk-throttle: Avo...
938
  	if (bps_limit == U64_MAX && iops_limit == UINT_MAX) {
8e89d13f4   Vivek Goyal   blkio: Implementa...
939
940
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
941
  		return true;
8e89d13f4   Vivek Goyal   blkio: Implementa...
942
943
944
945
946
  	}
  
  	/*
  	 * If previous slice expired, start a new one otherwise renew/extend
  	 * existing slice to make sure it is at least throtl_slice interval
164c80ed8   Vivek Goyal   blk-throttle: Ext...
947
948
949
  	 * long since now. New slice is started only for empty throttle group.
  	 * If there is queued bio, that means there should be an active
  	 * slice and it should be extended instead.
8e89d13f4   Vivek Goyal   blkio: Implementa...
950
  	 */
164c80ed8   Vivek Goyal   blk-throttle: Ext...
951
  	if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
0f3457f60   Tejun Heo   blk-throttle: add...
952
  		throtl_start_new_slice(tg, rw);
8e89d13f4   Vivek Goyal   blkio: Implementa...
953
  	else {
297e3d854   Shaohua Li   blk-throttle: mak...
954
955
956
957
  		if (time_before(tg->slice_end[rw],
  		    jiffies + tg->td->throtl_slice))
  			throtl_extend_slice(tg, rw,
  				jiffies + tg->td->throtl_slice);
8e89d13f4   Vivek Goyal   blkio: Implementa...
958
  	}
4599ea49d   Baolin Wang   blk-throttle: Avo...
959
960
  	if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
  	    tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
8e89d13f4   Vivek Goyal   blkio: Implementa...
961
962
  		if (wait)
  			*wait = 0;
0b6bad7d6   Chengguang Xu   blk-throttle: ret...
963
  		return true;
8e89d13f4   Vivek Goyal   blkio: Implementa...
964
965
966
967
968
969
970
971
  	}
  
  	max_wait = max(bps_wait, iops_wait);
  
  	if (wait)
  		*wait = max_wait;
  
  	if (time_before(tg->slice_end[rw], jiffies + max_wait))
0f3457f60   Tejun Heo   blk-throttle: add...
972
  		throtl_extend_slice(tg, rw, jiffies + max_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
973

0b6bad7d6   Chengguang Xu   blk-throttle: ret...
974
  	return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
975
976
977
978
979
  }
  
  static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
  {
  	bool rw = bio_data_dir(bio);
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
980
  	unsigned int bio_size = throtl_bio_data_size(bio);
e43473b7f   Vivek Goyal   blkio: Core imple...
981
982
  
  	/* Charge the bio to the group */
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
983
  	tg->bytes_disp[rw] += bio_size;
8e89d13f4   Vivek Goyal   blkio: Implementa...
984
  	tg->io_disp[rw]++;
ea0ea2bc6   Shaohua Li   blk-throttle: cap...
985
  	tg->last_bytes_disp[rw] += bio_size;
3f0abd806   Shaohua Li   blk-throttle: add...
986
  	tg->last_io_disp[rw]++;
e43473b7f   Vivek Goyal   blkio: Core imple...
987

2a0f61e6e   Tejun Heo   blk-throttle: set...
988
  	/*
8d2bbd4c8   Christoph Hellwig   block: replace RE...
989
  	 * BIO_THROTTLED is used to prevent the same bio to be throttled
2a0f61e6e   Tejun Heo   blk-throttle: set...
990
991
992
  	 * more than once as a throttled bio will go through blk-throtl the
  	 * second time when it eventually gets issued.  Set it when a bio
  	 * is being charged to a tg.
2a0f61e6e   Tejun Heo   blk-throttle: set...
993
  	 */
8d2bbd4c8   Christoph Hellwig   block: replace RE...
994
995
  	if (!bio_flagged(bio, BIO_THROTTLED))
  		bio_set_flag(bio, BIO_THROTTLED);
e43473b7f   Vivek Goyal   blkio: Core imple...
996
  }
c5cc2070b   Tejun Heo   blk-throttle: add...
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
  /**
   * throtl_add_bio_tg - add a bio to the specified throtl_grp
   * @bio: bio to add
   * @qn: qnode to use
   * @tg: the target throtl_grp
   *
   * Add @bio to @tg's service_queue using @qn.  If @qn is not specified,
   * tg->qnode_on_self[] is used.
   */
  static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
  			      struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
1008
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
1009
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
1010
  	bool rw = bio_data_dir(bio);
c5cc2070b   Tejun Heo   blk-throttle: add...
1011
1012
  	if (!qn)
  		qn = &tg->qnode_on_self[rw];
0e9f4164b   Tejun Heo   blk-throttle: gen...
1013
1014
1015
1016
  	/*
  	 * If @tg doesn't currently have any bios queued in the same
  	 * direction, queueing @bio can change when @tg should be
  	 * dispatched.  Mark that @tg was empty.  This is automatically
b53b072c4   Baolin Wang   blk-throttle: Fix...
1017
  	 * cleared on the next tg_update_disptime().
0e9f4164b   Tejun Heo   blk-throttle: gen...
1018
1019
1020
  	 */
  	if (!sq->nr_queued[rw])
  		tg->flags |= THROTL_TG_WAS_EMPTY;
c5cc2070b   Tejun Heo   blk-throttle: add...
1021
  	throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
73f0d49a9   Tejun Heo   blk-throttle: mov...
1022
  	sq->nr_queued[rw]++;
77216b048   Tejun Heo   blk-throttle: add...
1023
  	throtl_enqueue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1024
  }
77216b048   Tejun Heo   blk-throttle: add...
1025
  static void tg_update_disptime(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
1026
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
1027
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
1028
1029
  	unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
  	struct bio *bio;
d609af3a1   Markus Elfring   blk-throttle: Adj...
1030
1031
  	bio = throtl_peek_queued(&sq->queued[READ]);
  	if (bio)
0f3457f60   Tejun Heo   blk-throttle: add...
1032
  		tg_may_dispatch(tg, bio, &read_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
1033

d609af3a1   Markus Elfring   blk-throttle: Adj...
1034
1035
  	bio = throtl_peek_queued(&sq->queued[WRITE]);
  	if (bio)
0f3457f60   Tejun Heo   blk-throttle: add...
1036
  		tg_may_dispatch(tg, bio, &write_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
1037
1038
1039
  
  	min_wait = min(read_wait, write_wait);
  	disptime = jiffies + min_wait;
e43473b7f   Vivek Goyal   blkio: Core imple...
1040
  	/* Update dispatch time */
77216b048   Tejun Heo   blk-throttle: add...
1041
  	throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1042
  	tg->disptime = disptime;
77216b048   Tejun Heo   blk-throttle: add...
1043
  	throtl_enqueue_tg(tg);
0e9f4164b   Tejun Heo   blk-throttle: gen...
1044
1045
1046
  
  	/* see throtl_add_bio_tg() */
  	tg->flags &= ~THROTL_TG_WAS_EMPTY;
e43473b7f   Vivek Goyal   blkio: Core imple...
1047
  }
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
1048
1049
1050
1051
1052
1053
1054
1055
1056
  static void start_parent_slice_with_credit(struct throtl_grp *child_tg,
  					struct throtl_grp *parent_tg, bool rw)
  {
  	if (throtl_slice_used(parent_tg, rw)) {
  		throtl_start_new_slice_with_credit(parent_tg, rw,
  				child_tg->slice_start[rw]);
  	}
  
  }
77216b048   Tejun Heo   blk-throttle: add...
1057
  static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
1058
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
1059
  	struct throtl_service_queue *sq = &tg->service_queue;
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1060
1061
  	struct throtl_service_queue *parent_sq = sq->parent_sq;
  	struct throtl_grp *parent_tg = sq_to_tg(parent_sq);
c5cc2070b   Tejun Heo   blk-throttle: add...
1062
  	struct throtl_grp *tg_to_put = NULL;
e43473b7f   Vivek Goyal   blkio: Core imple...
1063
  	struct bio *bio;
c5cc2070b   Tejun Heo   blk-throttle: add...
1064
1065
1066
1067
1068
1069
1070
  	/*
  	 * @bio is being transferred from @tg to @parent_sq.  Popping a bio
  	 * from @tg may put its reference and @parent_sq might end up
  	 * getting released prematurely.  Remember the tg to put and put it
  	 * after @bio is transferred to @parent_sq.
  	 */
  	bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
73f0d49a9   Tejun Heo   blk-throttle: mov...
1071
  	sq->nr_queued[rw]--;
e43473b7f   Vivek Goyal   blkio: Core imple...
1072
1073
  
  	throtl_charge_bio(tg, bio);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1074
1075
1076
1077
1078
1079
1080
1081
1082
  
  	/*
  	 * If our parent is another tg, we just need to transfer @bio to
  	 * the parent using throtl_add_bio_tg().  If our parent is
  	 * @td->service_queue, @bio is ready to be issued.  Put it on its
  	 * bio_lists[] and decrease total number queued.  The caller is
  	 * responsible for issuing these bios.
  	 */
  	if (parent_tg) {
c5cc2070b   Tejun Heo   blk-throttle: add...
1083
  		throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg);
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
1084
  		start_parent_slice_with_credit(tg, parent_tg, rw);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1085
  	} else {
c5cc2070b   Tejun Heo   blk-throttle: add...
1086
1087
  		throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw],
  				     &parent_sq->queued[rw]);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1088
1089
1090
  		BUG_ON(tg->td->nr_queued[rw] <= 0);
  		tg->td->nr_queued[rw]--;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
1091

0f3457f60   Tejun Heo   blk-throttle: add...
1092
  	throtl_trim_slice(tg, rw);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1093

c5cc2070b   Tejun Heo   blk-throttle: add...
1094
1095
  	if (tg_to_put)
  		blkg_put(tg_to_blkg(tg_to_put));
e43473b7f   Vivek Goyal   blkio: Core imple...
1096
  }
77216b048   Tejun Heo   blk-throttle: add...
1097
  static int throtl_dispatch_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
1098
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
1099
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
1100
  	unsigned int nr_reads = 0, nr_writes = 0;
e675df2ad   Baolin Wang   blk-throttle: Def...
1101
1102
  	unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4;
  	unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads;
e43473b7f   Vivek Goyal   blkio: Core imple...
1103
1104
1105
  	struct bio *bio;
  
  	/* Try to dispatch 75% READS and 25% WRITES */
c5cc2070b   Tejun Heo   blk-throttle: add...
1106
  	while ((bio = throtl_peek_queued(&sq->queued[READ])) &&
0f3457f60   Tejun Heo   blk-throttle: add...
1107
  	       tg_may_dispatch(tg, bio, NULL)) {
e43473b7f   Vivek Goyal   blkio: Core imple...
1108

77216b048   Tejun Heo   blk-throttle: add...
1109
  		tg_dispatch_one_bio(tg, bio_data_dir(bio));
e43473b7f   Vivek Goyal   blkio: Core imple...
1110
1111
1112
1113
1114
  		nr_reads++;
  
  		if (nr_reads >= max_nr_reads)
  			break;
  	}
c5cc2070b   Tejun Heo   blk-throttle: add...
1115
  	while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&
0f3457f60   Tejun Heo   blk-throttle: add...
1116
  	       tg_may_dispatch(tg, bio, NULL)) {
e43473b7f   Vivek Goyal   blkio: Core imple...
1117

77216b048   Tejun Heo   blk-throttle: add...
1118
  		tg_dispatch_one_bio(tg, bio_data_dir(bio));
e43473b7f   Vivek Goyal   blkio: Core imple...
1119
1120
1121
1122
1123
1124
1125
1126
  		nr_writes++;
  
  		if (nr_writes >= max_nr_writes)
  			break;
  	}
  
  	return nr_reads + nr_writes;
  }
651930bc1   Tejun Heo   blk-throttle: dis...
1127
  static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
1128
1129
  {
  	unsigned int nr_disp = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
1130
1131
  
  	while (1) {
2397611ac   Baolin Wang   blk-throttle: Mov...
1132
  		struct throtl_grp *tg;
2ab74cd29   Liu Bo   blk-throttle: fix...
1133
  		struct throtl_service_queue *sq;
e43473b7f   Vivek Goyal   blkio: Core imple...
1134

2397611ac   Baolin Wang   blk-throttle: Mov...
1135
1136
1137
1138
  		if (!parent_sq->nr_pending)
  			break;
  
  		tg = throtl_rb_first(parent_sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
1139
1140
1141
1142
1143
  		if (!tg)
  			break;
  
  		if (time_before(jiffies, tg->disptime))
  			break;
77216b048   Tejun Heo   blk-throttle: add...
1144
  		throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1145

77216b048   Tejun Heo   blk-throttle: add...
1146
  		nr_disp += throtl_dispatch_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1147

2ab74cd29   Liu Bo   blk-throttle: fix...
1148
  		sq = &tg->service_queue;
73f0d49a9   Tejun Heo   blk-throttle: mov...
1149
  		if (sq->nr_queued[0] || sq->nr_queued[1])
77216b048   Tejun Heo   blk-throttle: add...
1150
  			tg_update_disptime(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
1151

e675df2ad   Baolin Wang   blk-throttle: Def...
1152
  		if (nr_disp >= THROTL_QUANTUM)
e43473b7f   Vivek Goyal   blkio: Core imple...
1153
1154
1155
1156
1157
  			break;
  	}
  
  	return nr_disp;
  }
c79892c55   Shaohua Li   blk-throttle: add...
1158
1159
  static bool throtl_can_upgrade(struct throtl_data *td,
  	struct throtl_grp *this_tg);
6e1a5704c   Tejun Heo   blk-throttle: dis...
1160
1161
  /**
   * throtl_pending_timer_fn - timer function for service_queue->pending_timer
216382dcc   Bart Van Assche   block: Fix throtl...
1162
   * @t: the pending_timer member of the throtl_service_queue being serviced
6e1a5704c   Tejun Heo   blk-throttle: dis...
1163
1164
1165
1166
   *
   * This timer is armed when a child throtl_grp with active bio's become
   * pending and queued on the service_queue's pending_tree and expires when
   * the first child throtl_grp should be dispatched.  This function
2e48a530a   Tejun Heo   blk-throttle: mak...
1167
1168
1169
1170
1171
1172
1173
   * dispatches bio's from the children throtl_grps to the parent
   * service_queue.
   *
   * If the parent's parent is another throtl_grp, dispatching is propagated
   * by either arming its pending_timer or repeating dispatch directly.  If
   * the top-level service_tree is reached, throtl_data->dispatch_work is
   * kicked so that the ready bio's are issued.
6e1a5704c   Tejun Heo   blk-throttle: dis...
1174
   */
e99e88a9d   Kees Cook   treewide: setup_t...
1175
  static void throtl_pending_timer_fn(struct timer_list *t)
69df0ab03   Tejun Heo   blk-throttle: sep...
1176
  {
e99e88a9d   Kees Cook   treewide: setup_t...
1177
  	struct throtl_service_queue *sq = from_timer(sq, t, pending_timer);
2e48a530a   Tejun Heo   blk-throttle: mak...
1178
  	struct throtl_grp *tg = sq_to_tg(sq);
69df0ab03   Tejun Heo   blk-throttle: sep...
1179
  	struct throtl_data *td = sq_to_td(sq);
cb76199c3   Tejun Heo   blk-throttle: col...
1180
  	struct request_queue *q = td->queue;
2e48a530a   Tejun Heo   blk-throttle: mak...
1181
1182
  	struct throtl_service_queue *parent_sq;
  	bool dispatched;
6e1a5704c   Tejun Heo   blk-throttle: dis...
1183
  	int ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
1184

0d945c1f9   Christoph Hellwig   block: remove the...
1185
  	spin_lock_irq(&q->queue_lock);
c79892c55   Shaohua Li   blk-throttle: add...
1186
1187
  	if (throtl_can_upgrade(td, NULL))
  		throtl_upgrade_state(td);
2e48a530a   Tejun Heo   blk-throttle: mak...
1188
1189
1190
  again:
  	parent_sq = sq->parent_sq;
  	dispatched = false;
e43473b7f   Vivek Goyal   blkio: Core imple...
1191

7f52f98c2   Tejun Heo   blk-throttle: imp...
1192
1193
  	while (true) {
  		throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u",
2e48a530a   Tejun Heo   blk-throttle: mak...
1194
1195
  			   sq->nr_queued[READ] + sq->nr_queued[WRITE],
  			   sq->nr_queued[READ], sq->nr_queued[WRITE]);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1196
1197
1198
  
  		ret = throtl_select_dispatch(sq);
  		if (ret) {
7f52f98c2   Tejun Heo   blk-throttle: imp...
1199
1200
1201
  			throtl_log(sq, "bios disp=%u", ret);
  			dispatched = true;
  		}
e43473b7f   Vivek Goyal   blkio: Core imple...
1202

7f52f98c2   Tejun Heo   blk-throttle: imp...
1203
1204
  		if (throtl_schedule_next_dispatch(sq, false))
  			break;
e43473b7f   Vivek Goyal   blkio: Core imple...
1205

7f52f98c2   Tejun Heo   blk-throttle: imp...
1206
  		/* this dispatch windows is still open, relax and repeat */
0d945c1f9   Christoph Hellwig   block: remove the...
1207
  		spin_unlock_irq(&q->queue_lock);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1208
  		cpu_relax();
0d945c1f9   Christoph Hellwig   block: remove the...
1209
  		spin_lock_irq(&q->queue_lock);
651930bc1   Tejun Heo   blk-throttle: dis...
1210
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
1211

2e48a530a   Tejun Heo   blk-throttle: mak...
1212
1213
  	if (!dispatched)
  		goto out_unlock;
6e1a5704c   Tejun Heo   blk-throttle: dis...
1214

2e48a530a   Tejun Heo   blk-throttle: mak...
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
  	if (parent_sq) {
  		/* @parent_sq is another throl_grp, propagate dispatch */
  		if (tg->flags & THROTL_TG_WAS_EMPTY) {
  			tg_update_disptime(tg);
  			if (!throtl_schedule_next_dispatch(parent_sq, false)) {
  				/* window is already open, repeat dispatching */
  				sq = parent_sq;
  				tg = sq_to_tg(sq);
  				goto again;
  			}
  		}
  	} else {
b53b072c4   Baolin Wang   blk-throttle: Fix...
1227
  		/* reached the top-level, queue issuing */
2e48a530a   Tejun Heo   blk-throttle: mak...
1228
1229
1230
  		queue_work(kthrotld_workqueue, &td->dispatch_work);
  	}
  out_unlock:
0d945c1f9   Christoph Hellwig   block: remove the...
1231
  	spin_unlock_irq(&q->queue_lock);
6e1a5704c   Tejun Heo   blk-throttle: dis...
1232
  }
e43473b7f   Vivek Goyal   blkio: Core imple...
1233

6e1a5704c   Tejun Heo   blk-throttle: dis...
1234
1235
1236
1237
  /**
   * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
   * @work: work item being executed
   *
b53b072c4   Baolin Wang   blk-throttle: Fix...
1238
1239
   * This function is queued for execution when bios reach the bio_lists[]
   * of throtl_data->service_queue.  Those bios are ready and issued by this
6e1a5704c   Tejun Heo   blk-throttle: dis...
1240
1241
   * function.
   */
8876e140e   Fabian Frederick   block/blk-throttl...
1242
  static void blk_throtl_dispatch_work_fn(struct work_struct *work)
6e1a5704c   Tejun Heo   blk-throttle: dis...
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
  {
  	struct throtl_data *td = container_of(work, struct throtl_data,
  					      dispatch_work);
  	struct throtl_service_queue *td_sq = &td->service_queue;
  	struct request_queue *q = td->queue;
  	struct bio_list bio_list_on_stack;
  	struct bio *bio;
  	struct blk_plug plug;
  	int rw;
  
  	bio_list_init(&bio_list_on_stack);
0d945c1f9   Christoph Hellwig   block: remove the...
1254
  	spin_lock_irq(&q->queue_lock);
c5cc2070b   Tejun Heo   blk-throttle: add...
1255
1256
1257
  	for (rw = READ; rw <= WRITE; rw++)
  		while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL)))
  			bio_list_add(&bio_list_on_stack, bio);
0d945c1f9   Christoph Hellwig   block: remove the...
1258
  	spin_unlock_irq(&q->queue_lock);
6e1a5704c   Tejun Heo   blk-throttle: dis...
1259
1260
  
  	if (!bio_list_empty(&bio_list_on_stack)) {
69d60eb96   Vivek Goyal   blk-throttle: Use...
1261
  		blk_start_plug(&plug);
ed00aabd5   Christoph Hellwig   block: rename gen...
1262
1263
  		while ((bio = bio_list_pop(&bio_list_on_stack)))
  			submit_bio_noacct(bio);
69d60eb96   Vivek Goyal   blk-throttle: Use...
1264
  		blk_finish_plug(&plug);
e43473b7f   Vivek Goyal   blkio: Core imple...
1265
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
1266
  }
f95a04afa   Tejun Heo   blkcg: embed stru...
1267
1268
  static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
  			      int off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1269
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
1270
1271
  	struct throtl_grp *tg = pd_to_tg(pd);
  	u64 v = *(u64 *)((void *)tg + off);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1272

2ab5492de   Shaohua Li   blk-throttle: use...
1273
  	if (v == U64_MAX)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1274
  		return 0;
f95a04afa   Tejun Heo   blkcg: embed stru...
1275
  	return __blkg_prfill_u64(sf, pd, v);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1276
  }
f95a04afa   Tejun Heo   blkcg: embed stru...
1277
1278
  static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
  			       int off)
e43473b7f   Vivek Goyal   blkio: Core imple...
1279
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
1280
1281
  	struct throtl_grp *tg = pd_to_tg(pd);
  	unsigned int v = *(unsigned int *)((void *)tg + off);
fe0714377   Vivek Goyal   blkio: Recalculat...
1282

2ab5492de   Shaohua Li   blk-throttle: use...
1283
  	if (v == UINT_MAX)
af133ceb2   Tejun Heo   blkcg: move blkio...
1284
  		return 0;
f95a04afa   Tejun Heo   blkcg: embed stru...
1285
  	return __blkg_prfill_u64(sf, pd, v);
e43473b7f   Vivek Goyal   blkio: Core imple...
1286
  }
2da8ca822   Tejun Heo   cgroup: replace c...
1287
  static int tg_print_conf_u64(struct seq_file *sf, void *v)
8e89d13f4   Vivek Goyal   blkio: Implementa...
1288
  {
2da8ca822   Tejun Heo   cgroup: replace c...
1289
1290
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
af133ceb2   Tejun Heo   blkcg: move blkio...
1291
  	return 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
1292
  }
2da8ca822   Tejun Heo   cgroup: replace c...
1293
  static int tg_print_conf_uint(struct seq_file *sf, void *v)
8e89d13f4   Vivek Goyal   blkio: Implementa...
1294
  {
2da8ca822   Tejun Heo   cgroup: replace c...
1295
1296
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
af133ceb2   Tejun Heo   blkcg: move blkio...
1297
  	return 0;
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1298
  }
9bb67aeb9   Shaohua Li   blk-throttle: res...
1299
  static void tg_conf_updated(struct throtl_grp *tg, bool global)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1300
  {
69948b070   Tejun Heo   blkcg: separate o...
1301
  	struct throtl_service_queue *sq = &tg->service_queue;
492eb21b9   Tejun Heo   cgroup: make hier...
1302
  	struct cgroup_subsys_state *pos_css;
69948b070   Tejun Heo   blkcg: separate o...
1303
  	struct blkcg_gq *blkg;
af133ceb2   Tejun Heo   blkcg: move blkio...
1304

fda6f272c   Tejun Heo   blk-throttle: imp...
1305
1306
  	throtl_log(&tg->service_queue,
  		   "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
9f626e372   Shaohua Li   blk-throttle: pre...
1307
1308
  		   tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE),
  		   tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE));
632b44935   Tejun Heo   blk-throttle: rem...
1309
1310
  
  	/*
693e751e7   Tejun Heo   blk-throttle: imp...
1311
1312
1313
1314
1315
1316
  	 * Update has_rules[] flags for the updated tg's subtree.  A tg is
  	 * considered to have rules if either the tg itself or any of its
  	 * ancestors has rules.  This identifies groups without any
  	 * restrictions in the whole hierarchy and allows them to bypass
  	 * blk-throttle.
  	 */
9bb67aeb9   Shaohua Li   blk-throttle: res...
1317
1318
  	blkg_for_each_descendant_pre(blkg, pos_css,
  			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
5b81fc3cc   Shaohua Li   blk-throttle: add...
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
  		struct throtl_grp *this_tg = blkg_to_tg(blkg);
  		struct throtl_grp *parent_tg;
  
  		tg_update_has_rules(this_tg);
  		/* ignore root/second level */
  		if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
  		    !blkg->parent->parent)
  			continue;
  		parent_tg = blkg_to_tg(blkg->parent);
  		/*
  		 * make sure all children has lower idle time threshold and
  		 * higher latency target
  		 */
  		this_tg->idletime_threshold = min(this_tg->idletime_threshold,
  				parent_tg->idletime_threshold);
  		this_tg->latency_target = max(this_tg->latency_target,
  				parent_tg->latency_target);
  	}
693e751e7   Tejun Heo   blk-throttle: imp...
1337
1338
  
  	/*
632b44935   Tejun Heo   blk-throttle: rem...
1339
1340
1341
1342
1343
1344
1345
  	 * We're already holding queue_lock and know @tg is valid.  Let's
  	 * apply the new config directly.
  	 *
  	 * Restart the slices for both READ and WRITES. It might happen
  	 * that a group's limit are dropped suddenly and we don't want to
  	 * account recently dispatched IO with new low rate.
  	 */
ff8b22c0f   Baolin Wang   blk-throttle: Use...
1346
1347
  	throtl_start_new_slice(tg, READ);
  	throtl_start_new_slice(tg, WRITE);
632b44935   Tejun Heo   blk-throttle: rem...
1348

5b2c16aae   Tejun Heo   blk-throttle: sim...
1349
  	if (tg->flags & THROTL_TG_PENDING) {
77216b048   Tejun Heo   blk-throttle: add...
1350
  		tg_update_disptime(tg);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1351
  		throtl_schedule_next_dispatch(sq->parent_sq, true);
632b44935   Tejun Heo   blk-throttle: rem...
1352
  	}
69948b070   Tejun Heo   blkcg: separate o...
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
  }
  
  static ssize_t tg_set_conf(struct kernfs_open_file *of,
  			   char *buf, size_t nbytes, loff_t off, bool is_u64)
  {
  	struct blkcg *blkcg = css_to_blkcg(of_css(of));
  	struct blkg_conf_ctx ctx;
  	struct throtl_grp *tg;
  	int ret;
  	u64 v;
  
  	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
  	if (ret)
  		return ret;
  
  	ret = -EINVAL;
  	if (sscanf(ctx.body, "%llu", &v) != 1)
  		goto out_finish;
  	if (!v)
2ab5492de   Shaohua Li   blk-throttle: use...
1372
  		v = U64_MAX;
69948b070   Tejun Heo   blkcg: separate o...
1373
1374
1375
1376
1377
1378
1379
  
  	tg = blkg_to_tg(ctx.blkg);
  
  	if (is_u64)
  		*(u64 *)((void *)tg + of_cft(of)->private) = v;
  	else
  		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1380

9bb67aeb9   Shaohua Li   blk-throttle: res...
1381
  	tg_conf_updated(tg, false);
36aa9e5f5   Tejun Heo   blkcg: move body ...
1382
1383
  	ret = 0;
  out_finish:
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1384
  	blkg_conf_finish(&ctx);
36aa9e5f5   Tejun Heo   blkcg: move body ...
1385
  	return ret ?: nbytes;
8e89d13f4   Vivek Goyal   blkio: Implementa...
1386
  }
451af504d   Tejun Heo   cgroup: replace c...
1387
1388
  static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
  			       char *buf, size_t nbytes, loff_t off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1389
  {
451af504d   Tejun Heo   cgroup: replace c...
1390
  	return tg_set_conf(of, buf, nbytes, off, true);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1391
  }
451af504d   Tejun Heo   cgroup: replace c...
1392
1393
  static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
  				char *buf, size_t nbytes, loff_t off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1394
  {
451af504d   Tejun Heo   cgroup: replace c...
1395
  	return tg_set_conf(of, buf, nbytes, off, false);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1396
  }
7ca464383   Tejun Heo   blk-throtl: stop ...
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
  static int tg_print_rwstat(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
  			  blkg_prfill_rwstat, &blkcg_policy_throtl,
  			  seq_cft(sf)->private, true);
  	return 0;
  }
  
  static u64 tg_prfill_rwstat_recursive(struct seq_file *sf,
  				      struct blkg_policy_data *pd, int off)
  {
  	struct blkg_rwstat_sample sum;
  
  	blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off,
  				  &sum);
  	return __blkg_prfill_rwstat(sf, pd, &sum);
  }
  
  static int tg_print_rwstat_recursive(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
  			  tg_prfill_rwstat_recursive, &blkcg_policy_throtl,
  			  seq_cft(sf)->private, true);
  	return 0;
  }
880f50e22   Tejun Heo   blkcg: mark exist...
1422
  static struct cftype throtl_legacy_files[] = {
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1423
1424
  	{
  		.name = "throttle.read_bps_device",
9f626e372   Shaohua Li   blk-throttle: pre...
1425
  		.private = offsetof(struct throtl_grp, bps[READ][LIMIT_MAX]),
2da8ca822   Tejun Heo   cgroup: replace c...
1426
  		.seq_show = tg_print_conf_u64,
451af504d   Tejun Heo   cgroup: replace c...
1427
  		.write = tg_set_conf_u64,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1428
1429
1430
  	},
  	{
  		.name = "throttle.write_bps_device",
9f626e372   Shaohua Li   blk-throttle: pre...
1431
  		.private = offsetof(struct throtl_grp, bps[WRITE][LIMIT_MAX]),
2da8ca822   Tejun Heo   cgroup: replace c...
1432
  		.seq_show = tg_print_conf_u64,
451af504d   Tejun Heo   cgroup: replace c...
1433
  		.write = tg_set_conf_u64,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1434
1435
1436
  	},
  	{
  		.name = "throttle.read_iops_device",
9f626e372   Shaohua Li   blk-throttle: pre...
1437
  		.private = offsetof(struct throtl_grp, iops[READ][LIMIT_MAX]),
2da8ca822   Tejun Heo   cgroup: replace c...
1438
  		.seq_show = tg_print_conf_uint,
451af504d   Tejun Heo   cgroup: replace c...
1439
  		.write = tg_set_conf_uint,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1440
1441
1442
  	},
  	{
  		.name = "throttle.write_iops_device",
9f626e372   Shaohua Li   blk-throttle: pre...
1443
  		.private = offsetof(struct throtl_grp, iops[WRITE][LIMIT_MAX]),
2da8ca822   Tejun Heo   cgroup: replace c...
1444
  		.seq_show = tg_print_conf_uint,
451af504d   Tejun Heo   cgroup: replace c...
1445
  		.write = tg_set_conf_uint,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1446
1447
1448
  	},
  	{
  		.name = "throttle.io_service_bytes",
7ca464383   Tejun Heo   blk-throtl: stop ...
1449
1450
  		.private = offsetof(struct throtl_grp, stat_bytes),
  		.seq_show = tg_print_rwstat,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1451
1452
  	},
  	{
17534c6f2   weiping zhang   blk-throttle: exp...
1453
  		.name = "throttle.io_service_bytes_recursive",
7ca464383   Tejun Heo   blk-throtl: stop ...
1454
1455
  		.private = offsetof(struct throtl_grp, stat_bytes),
  		.seq_show = tg_print_rwstat_recursive,
17534c6f2   weiping zhang   blk-throttle: exp...
1456
1457
  	},
  	{
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1458
  		.name = "throttle.io_serviced",
7ca464383   Tejun Heo   blk-throtl: stop ...
1459
1460
  		.private = offsetof(struct throtl_grp, stat_ios),
  		.seq_show = tg_print_rwstat,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1461
  	},
17534c6f2   weiping zhang   blk-throttle: exp...
1462
1463
  	{
  		.name = "throttle.io_serviced_recursive",
7ca464383   Tejun Heo   blk-throtl: stop ...
1464
1465
  		.private = offsetof(struct throtl_grp, stat_ios),
  		.seq_show = tg_print_rwstat_recursive,
17534c6f2   weiping zhang   blk-throttle: exp...
1466
  	},
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1467
1468
  	{ }	/* terminate */
  };
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1469
  static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
2ee867dcf   Tejun Heo   blkcg: implement ...
1470
1471
1472
1473
1474
  			 int off)
  {
  	struct throtl_grp *tg = pd_to_tg(pd);
  	const char *dname = blkg_dev_name(pd->blkg);
  	char bufs[4][21] = { "max", "max", "max", "max" };
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1475
1476
  	u64 bps_dft;
  	unsigned int iops_dft;
ada75b6e5   Shaohua Li   blk-throttle: add...
1477
  	char idle_time[26] = "";
ec80991d6   Shaohua Li   blk-throttle: add...
1478
  	char latency_time[26] = "";
2ee867dcf   Tejun Heo   blkcg: implement ...
1479
1480
1481
  
  	if (!dname)
  		return 0;
9f626e372   Shaohua Li   blk-throttle: pre...
1482

cd5ab1b0f   Shaohua Li   blk-throttle: add...
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
  	if (off == LIMIT_LOW) {
  		bps_dft = 0;
  		iops_dft = 0;
  	} else {
  		bps_dft = U64_MAX;
  		iops_dft = UINT_MAX;
  	}
  
  	if (tg->bps_conf[READ][off] == bps_dft &&
  	    tg->bps_conf[WRITE][off] == bps_dft &&
  	    tg->iops_conf[READ][off] == iops_dft &&
ada75b6e5   Shaohua Li   blk-throttle: add...
1494
  	    tg->iops_conf[WRITE][off] == iops_dft &&
ec80991d6   Shaohua Li   blk-throttle: add...
1495
  	    (off != LIMIT_LOW ||
b4f428ef2   Shaohua Li   blk-throttle: for...
1496
  	     (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
5b81fc3cc   Shaohua Li   blk-throttle: add...
1497
  	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
2ee867dcf   Tejun Heo   blkcg: implement ...
1498
  		return 0;
9bb67aeb9   Shaohua Li   blk-throttle: res...
1499
  	if (tg->bps_conf[READ][off] != U64_MAX)
9f626e372   Shaohua Li   blk-throttle: pre...
1500
  		snprintf(bufs[0], sizeof(bufs[0]), "%llu",
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1501
  			tg->bps_conf[READ][off]);
9bb67aeb9   Shaohua Li   blk-throttle: res...
1502
  	if (tg->bps_conf[WRITE][off] != U64_MAX)
9f626e372   Shaohua Li   blk-throttle: pre...
1503
  		snprintf(bufs[1], sizeof(bufs[1]), "%llu",
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1504
  			tg->bps_conf[WRITE][off]);
9bb67aeb9   Shaohua Li   blk-throttle: res...
1505
  	if (tg->iops_conf[READ][off] != UINT_MAX)
9f626e372   Shaohua Li   blk-throttle: pre...
1506
  		snprintf(bufs[2], sizeof(bufs[2]), "%u",
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1507
  			tg->iops_conf[READ][off]);
9bb67aeb9   Shaohua Li   blk-throttle: res...
1508
  	if (tg->iops_conf[WRITE][off] != UINT_MAX)
9f626e372   Shaohua Li   blk-throttle: pre...
1509
  		snprintf(bufs[3], sizeof(bufs[3]), "%u",
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1510
  			tg->iops_conf[WRITE][off]);
ada75b6e5   Shaohua Li   blk-throttle: add...
1511
  	if (off == LIMIT_LOW) {
5b81fc3cc   Shaohua Li   blk-throttle: add...
1512
  		if (tg->idletime_threshold_conf == ULONG_MAX)
ada75b6e5   Shaohua Li   blk-throttle: add...
1513
1514
1515
  			strcpy(idle_time, " idle=max");
  		else
  			snprintf(idle_time, sizeof(idle_time), " idle=%lu",
5b81fc3cc   Shaohua Li   blk-throttle: add...
1516
  				tg->idletime_threshold_conf);
ec80991d6   Shaohua Li   blk-throttle: add...
1517

5b81fc3cc   Shaohua Li   blk-throttle: add...
1518
  		if (tg->latency_target_conf == ULONG_MAX)
ec80991d6   Shaohua Li   blk-throttle: add...
1519
1520
1521
  			strcpy(latency_time, " latency=max");
  		else
  			snprintf(latency_time, sizeof(latency_time),
5b81fc3cc   Shaohua Li   blk-throttle: add...
1522
  				" latency=%lu", tg->latency_target_conf);
ada75b6e5   Shaohua Li   blk-throttle: add...
1523
  	}
2ee867dcf   Tejun Heo   blkcg: implement ...
1524

ec80991d6   Shaohua Li   blk-throttle: add...
1525
1526
1527
1528
  	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s
  ",
  		   dname, bufs[0], bufs[1], bufs[2], bufs[3], idle_time,
  		   latency_time);
2ee867dcf   Tejun Heo   blkcg: implement ...
1529
1530
  	return 0;
  }
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1531
  static int tg_print_limit(struct seq_file *sf, void *v)
2ee867dcf   Tejun Heo   blkcg: implement ...
1532
  {
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1533
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_limit,
2ee867dcf   Tejun Heo   blkcg: implement ...
1534
1535
1536
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
  	return 0;
  }
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1537
  static ssize_t tg_set_limit(struct kernfs_open_file *of,
2ee867dcf   Tejun Heo   blkcg: implement ...
1538
1539
1540
1541
1542
1543
  			  char *buf, size_t nbytes, loff_t off)
  {
  	struct blkcg *blkcg = css_to_blkcg(of_css(of));
  	struct blkg_conf_ctx ctx;
  	struct throtl_grp *tg;
  	u64 v[4];
ada75b6e5   Shaohua Li   blk-throttle: add...
1544
  	unsigned long idle_time;
ec80991d6   Shaohua Li   blk-throttle: add...
1545
  	unsigned long latency_time;
2ee867dcf   Tejun Heo   blkcg: implement ...
1546
  	int ret;
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1547
  	int index = of_cft(of)->private;
2ee867dcf   Tejun Heo   blkcg: implement ...
1548
1549
1550
1551
1552
1553
  
  	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
  	if (ret)
  		return ret;
  
  	tg = blkg_to_tg(ctx.blkg);
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1554
1555
1556
1557
  	v[0] = tg->bps_conf[READ][index];
  	v[1] = tg->bps_conf[WRITE][index];
  	v[2] = tg->iops_conf[READ][index];
  	v[3] = tg->iops_conf[WRITE][index];
2ee867dcf   Tejun Heo   blkcg: implement ...
1558

5b81fc3cc   Shaohua Li   blk-throttle: add...
1559
1560
  	idle_time = tg->idletime_threshold_conf;
  	latency_time = tg->latency_target_conf;
2ee867dcf   Tejun Heo   blkcg: implement ...
1561
1562
1563
  	while (true) {
  		char tok[27];	/* wiops=18446744073709551616 */
  		char *p;
2ab5492de   Shaohua Li   blk-throttle: use...
1564
  		u64 val = U64_MAX;
2ee867dcf   Tejun Heo   blkcg: implement ...
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
  		int len;
  
  		if (sscanf(ctx.body, "%26s%n", tok, &len) != 1)
  			break;
  		if (tok[0] == '\0')
  			break;
  		ctx.body += len;
  
  		ret = -EINVAL;
  		p = tok;
  		strsep(&p, "=");
  		if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max")))
  			goto out_finish;
  
  		ret = -ERANGE;
  		if (!val)
  			goto out_finish;
  
  		ret = -EINVAL;
5b7048b89   Baolin Wang   blk-throttle: Fix...
1584
  		if (!strcmp(tok, "rbps") && val > 1)
2ee867dcf   Tejun Heo   blkcg: implement ...
1585
  			v[0] = val;
5b7048b89   Baolin Wang   blk-throttle: Fix...
1586
  		else if (!strcmp(tok, "wbps") && val > 1)
2ee867dcf   Tejun Heo   blkcg: implement ...
1587
  			v[1] = val;
5b7048b89   Baolin Wang   blk-throttle: Fix...
1588
  		else if (!strcmp(tok, "riops") && val > 1)
2ee867dcf   Tejun Heo   blkcg: implement ...
1589
  			v[2] = min_t(u64, val, UINT_MAX);
5b7048b89   Baolin Wang   blk-throttle: Fix...
1590
  		else if (!strcmp(tok, "wiops") && val > 1)
2ee867dcf   Tejun Heo   blkcg: implement ...
1591
  			v[3] = min_t(u64, val, UINT_MAX);
ada75b6e5   Shaohua Li   blk-throttle: add...
1592
1593
  		else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
  			idle_time = val;
ec80991d6   Shaohua Li   blk-throttle: add...
1594
1595
  		else if (off == LIMIT_LOW && !strcmp(tok, "latency"))
  			latency_time = val;
2ee867dcf   Tejun Heo   blkcg: implement ...
1596
1597
1598
  		else
  			goto out_finish;
  	}
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1599
1600
1601
1602
  	tg->bps_conf[READ][index] = v[0];
  	tg->bps_conf[WRITE][index] = v[1];
  	tg->iops_conf[READ][index] = v[2];
  	tg->iops_conf[WRITE][index] = v[3];
2ee867dcf   Tejun Heo   blkcg: implement ...
1603

cd5ab1b0f   Shaohua Li   blk-throttle: add...
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
  	if (index == LIMIT_MAX) {
  		tg->bps[READ][index] = v[0];
  		tg->bps[WRITE][index] = v[1];
  		tg->iops[READ][index] = v[2];
  		tg->iops[WRITE][index] = v[3];
  	}
  	tg->bps[READ][LIMIT_LOW] = min(tg->bps_conf[READ][LIMIT_LOW],
  		tg->bps_conf[READ][LIMIT_MAX]);
  	tg->bps[WRITE][LIMIT_LOW] = min(tg->bps_conf[WRITE][LIMIT_LOW],
  		tg->bps_conf[WRITE][LIMIT_MAX]);
  	tg->iops[READ][LIMIT_LOW] = min(tg->iops_conf[READ][LIMIT_LOW],
  		tg->iops_conf[READ][LIMIT_MAX]);
  	tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
  		tg->iops_conf[WRITE][LIMIT_MAX]);
b4f428ef2   Shaohua Li   blk-throttle: for...
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
  	tg->idletime_threshold_conf = idle_time;
  	tg->latency_target_conf = latency_time;
  
  	/* force user to configure all settings for low limit  */
  	if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
  	      tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
  	    tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
  	    tg->latency_target_conf == DFL_LATENCY_TARGET) {
  		tg->bps[READ][LIMIT_LOW] = 0;
  		tg->bps[WRITE][LIMIT_LOW] = 0;
  		tg->iops[READ][LIMIT_LOW] = 0;
  		tg->iops[WRITE][LIMIT_LOW] = 0;
  		tg->idletime_threshold = DFL_IDLE_THRESHOLD;
  		tg->latency_target = DFL_LATENCY_TARGET;
  	} else if (index == LIMIT_LOW) {
5b81fc3cc   Shaohua Li   blk-throttle: add...
1633
  		tg->idletime_threshold = tg->idletime_threshold_conf;
5b81fc3cc   Shaohua Li   blk-throttle: add...
1634
  		tg->latency_target = tg->latency_target_conf;
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1635
  	}
b4f428ef2   Shaohua Li   blk-throttle: for...
1636
1637
1638
1639
1640
1641
1642
  
  	blk_throtl_update_limit_valid(tg->td);
  	if (tg->td->limit_valid[LIMIT_LOW]) {
  		if (index == LIMIT_LOW)
  			tg->td->limit_index = LIMIT_LOW;
  	} else
  		tg->td->limit_index = LIMIT_MAX;
9bb67aeb9   Shaohua Li   blk-throttle: res...
1643
1644
  	tg_conf_updated(tg, index == LIMIT_LOW &&
  		tg->td->limit_valid[LIMIT_LOW]);
2ee867dcf   Tejun Heo   blkcg: implement ...
1645
1646
1647
1648
1649
1650
1651
  	ret = 0;
  out_finish:
  	blkg_conf_finish(&ctx);
  	return ret ?: nbytes;
  }
  
  static struct cftype throtl_files[] = {
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1652
1653
1654
1655
1656
1657
1658
1659
1660
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  	{
  		.name = "low",
  		.flags = CFTYPE_NOT_ON_ROOT,
  		.seq_show = tg_print_limit,
  		.write = tg_set_limit,
  		.private = LIMIT_LOW,
  	},
  #endif
2ee867dcf   Tejun Heo   blkcg: implement ...
1661
1662
1663
  	{
  		.name = "max",
  		.flags = CFTYPE_NOT_ON_ROOT,
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1664
1665
1666
  		.seq_show = tg_print_limit,
  		.write = tg_set_limit,
  		.private = LIMIT_MAX,
2ee867dcf   Tejun Heo   blkcg: implement ...
1667
1668
1669
  	},
  	{ }	/* terminate */
  };
da5277700   Vivek Goyal   block: Move blk_t...
1670
  static void throtl_shutdown_wq(struct request_queue *q)
e43473b7f   Vivek Goyal   blkio: Core imple...
1671
1672
  {
  	struct throtl_data *td = q->td;
69df0ab03   Tejun Heo   blk-throttle: sep...
1673
  	cancel_work_sync(&td->dispatch_work);
e43473b7f   Vivek Goyal   blkio: Core imple...
1674
  }
3c798398e   Tejun Heo   blkcg: mass renam...
1675
  static struct blkcg_policy blkcg_policy_throtl = {
2ee867dcf   Tejun Heo   blkcg: implement ...
1676
  	.dfl_cftypes		= throtl_files,
880f50e22   Tejun Heo   blkcg: mark exist...
1677
  	.legacy_cftypes		= throtl_legacy_files,
f9fcc2d39   Tejun Heo   blkcg: collapse b...
1678

001bea73e   Tejun Heo   blkcg: replace bl...
1679
  	.pd_alloc_fn		= throtl_pd_alloc,
f9fcc2d39   Tejun Heo   blkcg: collapse b...
1680
  	.pd_init_fn		= throtl_pd_init,
693e751e7   Tejun Heo   blk-throttle: imp...
1681
  	.pd_online_fn		= throtl_pd_online,
cd5ab1b0f   Shaohua Li   blk-throttle: add...
1682
  	.pd_offline_fn		= throtl_pd_offline,
001bea73e   Tejun Heo   blkcg: replace bl...
1683
  	.pd_free_fn		= throtl_pd_free,
e43473b7f   Vivek Goyal   blkio: Core imple...
1684
  };
3f0abd806   Shaohua Li   blk-throttle: add...
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
  static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
  {
  	unsigned long rtime = jiffies, wtime = jiffies;
  
  	if (tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW])
  		rtime = tg->last_low_overflow_time[READ];
  	if (tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW])
  		wtime = tg->last_low_overflow_time[WRITE];
  	return min(rtime, wtime);
  }
  
  /* tg should not be an intermediate node */
  static unsigned long tg_last_low_overflow_time(struct throtl_grp *tg)
  {
  	struct throtl_service_queue *parent_sq;
  	struct throtl_grp *parent = tg;
  	unsigned long ret = __tg_last_low_overflow_time(tg);
  
  	while (true) {
  		parent_sq = parent->service_queue.parent_sq;
  		parent = sq_to_tg(parent_sq);
  		if (!parent)
  			break;
  
  		/*
  		 * The parent doesn't have low limit, it always reaches low
  		 * limit. Its overflow time is useless for children
  		 */
  		if (!parent->bps[READ][LIMIT_LOW] &&
  		    !parent->iops[READ][LIMIT_LOW] &&
  		    !parent->bps[WRITE][LIMIT_LOW] &&
  		    !parent->iops[WRITE][LIMIT_LOW])
  			continue;
  		if (time_after(__tg_last_low_overflow_time(parent), ret))
  			ret = __tg_last_low_overflow_time(parent);
  	}
  	return ret;
  }
9e234eeaf   Shaohua Li   blk-throttle: add...
1723
1724
1725
1726
1727
  static bool throtl_tg_is_idle(struct throtl_grp *tg)
  {
  	/*
  	 * cgroup is idle if:
  	 * - single idle is too long, longer than a fixed value (in case user
b4f428ef2   Shaohua Li   blk-throttle: for...
1728
  	 *   configure a too big threshold) or 4 times of idletime threshold
9e234eeaf   Shaohua Li   blk-throttle: add...
1729
  	 * - average think time is more than threshold
53696b8d2   Shaohua Li   blk-throttle: add...
1730
  	 * - IO latency is largely below threshold
9e234eeaf   Shaohua Li   blk-throttle: add...
1731
  	 */
b4f428ef2   Shaohua Li   blk-throttle: for...
1732
  	unsigned long time;
4cff729f6   Shaohua Li   blk-throttle: out...
1733
  	bool ret;
9e234eeaf   Shaohua Li   blk-throttle: add...
1734

b4f428ef2   Shaohua Li   blk-throttle: for...
1735
1736
1737
1738
1739
1740
  	time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
  	ret = tg->latency_target == DFL_LATENCY_TARGET ||
  	      tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
  	      (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
  	      tg->avg_idletime > tg->idletime_threshold ||
  	      (tg->latency_target && tg->bio_cnt &&
53696b8d2   Shaohua Li   blk-throttle: add...
1741
  		tg->bad_bio_cnt * 5 < tg->bio_cnt);
4cff729f6   Shaohua Li   blk-throttle: out...
1742
1743
1744
1745
1746
  	throtl_log(&tg->service_queue,
  		"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
  		tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
  		tg->bio_cnt, ret, tg->td->scale);
  	return ret;
9e234eeaf   Shaohua Li   blk-throttle: add...
1747
  }
c79892c55   Shaohua Li   blk-throttle: add...
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
  static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
  {
  	struct throtl_service_queue *sq = &tg->service_queue;
  	bool read_limit, write_limit;
  
  	/*
  	 * if cgroup reaches low limit (if low limit is 0, the cgroup always
  	 * reaches), it's ok to upgrade to next limit
  	 */
  	read_limit = tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW];
  	write_limit = tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW];
  	if (!read_limit && !write_limit)
  		return true;
  	if (read_limit && sq->nr_queued[READ] &&
  	    (!write_limit || sq->nr_queued[WRITE]))
  		return true;
  	if (write_limit && sq->nr_queued[WRITE] &&
  	    (!read_limit || sq->nr_queued[READ]))
  		return true;
aec242468   Shaohua Li   blk-throttle: det...
1767
1768
  
  	if (time_after_eq(jiffies,
fa6fb5aab   Shaohua Li   blk-throttle: ign...
1769
1770
  		tg_last_low_overflow_time(tg) + tg->td->throtl_slice) &&
  	    throtl_tg_is_idle(tg))
aec242468   Shaohua Li   blk-throttle: det...
1771
  		return true;
c79892c55   Shaohua Li   blk-throttle: add...
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
  	return false;
  }
  
  static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
  {
  	while (true) {
  		if (throtl_tg_can_upgrade(tg))
  			return true;
  		tg = sq_to_tg(tg->service_queue.parent_sq);
  		if (!tg || !tg_to_blkg(tg)->parent)
  			return false;
  	}
  	return false;
  }
  
  static bool throtl_can_upgrade(struct throtl_data *td,
  	struct throtl_grp *this_tg)
  {
  	struct cgroup_subsys_state *pos_css;
  	struct blkcg_gq *blkg;
  
  	if (td->limit_index != LIMIT_LOW)
  		return false;
297e3d854   Shaohua Li   blk-throttle: mak...
1795
  	if (time_before(jiffies, td->low_downgrade_time + td->throtl_slice))
3f0abd806   Shaohua Li   blk-throttle: add...
1796
  		return false;
c79892c55   Shaohua Li   blk-throttle: add...
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
  	rcu_read_lock();
  	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
  		struct throtl_grp *tg = blkg_to_tg(blkg);
  
  		if (tg == this_tg)
  			continue;
  		if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
  			continue;
  		if (!throtl_hierarchy_can_upgrade(tg)) {
  			rcu_read_unlock();
  			return false;
  		}
  	}
  	rcu_read_unlock();
  	return true;
  }
fa6fb5aab   Shaohua Li   blk-throttle: ign...
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
  static void throtl_upgrade_check(struct throtl_grp *tg)
  {
  	unsigned long now = jiffies;
  
  	if (tg->td->limit_index != LIMIT_LOW)
  		return;
  
  	if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
  		return;
  
  	tg->last_check_time = now;
  
  	if (!time_after_eq(now,
  	     __tg_last_low_overflow_time(tg) + tg->td->throtl_slice))
  		return;
  
  	if (throtl_can_upgrade(tg->td, NULL))
  		throtl_upgrade_state(tg->td);
  }
c79892c55   Shaohua Li   blk-throttle: add...
1832
1833
1834
1835
  static void throtl_upgrade_state(struct throtl_data *td)
  {
  	struct cgroup_subsys_state *pos_css;
  	struct blkcg_gq *blkg;
4cff729f6   Shaohua Li   blk-throttle: out...
1836
  	throtl_log(&td->service_queue, "upgrade to max");
c79892c55   Shaohua Li   blk-throttle: add...
1837
  	td->limit_index = LIMIT_MAX;
3f0abd806   Shaohua Li   blk-throttle: add...
1838
  	td->low_upgrade_time = jiffies;
7394e31fa   Shaohua Li   blk-throttle: mak...
1839
  	td->scale = 0;
c79892c55   Shaohua Li   blk-throttle: add...
1840
1841
1842
1843
1844
1845
1846
  	rcu_read_lock();
  	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
  		struct throtl_grp *tg = blkg_to_tg(blkg);
  		struct throtl_service_queue *sq = &tg->service_queue;
  
  		tg->disptime = jiffies - 1;
  		throtl_select_dispatch(sq);
4f02fb761   Joseph Qi   blk-throttle: fix...
1847
  		throtl_schedule_next_dispatch(sq, true);
c79892c55   Shaohua Li   blk-throttle: add...
1848
1849
1850
  	}
  	rcu_read_unlock();
  	throtl_select_dispatch(&td->service_queue);
4f02fb761   Joseph Qi   blk-throttle: fix...
1851
  	throtl_schedule_next_dispatch(&td->service_queue, true);
c79892c55   Shaohua Li   blk-throttle: add...
1852
1853
  	queue_work(kthrotld_workqueue, &td->dispatch_work);
  }
4247d9c8b   Baolin Wang   blk-throttle: Rem...
1854
  static void throtl_downgrade_state(struct throtl_data *td)
3f0abd806   Shaohua Li   blk-throttle: add...
1855
  {
7394e31fa   Shaohua Li   blk-throttle: mak...
1856
  	td->scale /= 2;
4cff729f6   Shaohua Li   blk-throttle: out...
1857
  	throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
7394e31fa   Shaohua Li   blk-throttle: mak...
1858
1859
1860
1861
  	if (td->scale) {
  		td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
  		return;
  	}
4247d9c8b   Baolin Wang   blk-throttle: Rem...
1862
  	td->limit_index = LIMIT_LOW;
3f0abd806   Shaohua Li   blk-throttle: add...
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
  	td->low_downgrade_time = jiffies;
  }
  
  static bool throtl_tg_can_downgrade(struct throtl_grp *tg)
  {
  	struct throtl_data *td = tg->td;
  	unsigned long now = jiffies;
  
  	/*
  	 * If cgroup is below low limit, consider downgrade and throttle other
  	 * cgroups
  	 */
297e3d854   Shaohua Li   blk-throttle: mak...
1875
1876
  	if (time_after_eq(now, td->low_upgrade_time + td->throtl_slice) &&
  	    time_after_eq(now, tg_last_low_overflow_time(tg) +
fa6fb5aab   Shaohua Li   blk-throttle: ign...
1877
1878
1879
  					td->throtl_slice) &&
  	    (!throtl_tg_is_idle(tg) ||
  	     !list_empty(&tg_to_blkg(tg)->blkcg->css.children)))
3f0abd806   Shaohua Li   blk-throttle: add...
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
  		return true;
  	return false;
  }
  
  static bool throtl_hierarchy_can_downgrade(struct throtl_grp *tg)
  {
  	while (true) {
  		if (!throtl_tg_can_downgrade(tg))
  			return false;
  		tg = sq_to_tg(tg->service_queue.parent_sq);
  		if (!tg || !tg_to_blkg(tg)->parent)
  			break;
  	}
  	return true;
  }
  
  static void throtl_downgrade_check(struct throtl_grp *tg)
  {
  	uint64_t bps;
  	unsigned int iops;
  	unsigned long elapsed_time;
  	unsigned long now = jiffies;
  
  	if (tg->td->limit_index != LIMIT_MAX ||
  	    !tg->td->limit_valid[LIMIT_LOW])
  		return;
  	if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
  		return;
297e3d854   Shaohua Li   blk-throttle: mak...
1908
  	if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
3f0abd806   Shaohua Li   blk-throttle: add...
1909
1910
1911
1912
  		return;
  
  	elapsed_time = now - tg->last_check_time;
  	tg->last_check_time = now;
297e3d854   Shaohua Li   blk-throttle: mak...
1913
1914
  	if (time_before(now, tg_last_low_overflow_time(tg) +
  			tg->td->throtl_slice))
3f0abd806   Shaohua Li   blk-throttle: add...
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
  		return;
  
  	if (tg->bps[READ][LIMIT_LOW]) {
  		bps = tg->last_bytes_disp[READ] * HZ;
  		do_div(bps, elapsed_time);
  		if (bps >= tg->bps[READ][LIMIT_LOW])
  			tg->last_low_overflow_time[READ] = now;
  	}
  
  	if (tg->bps[WRITE][LIMIT_LOW]) {
  		bps = tg->last_bytes_disp[WRITE] * HZ;
  		do_div(bps, elapsed_time);
  		if (bps >= tg->bps[WRITE][LIMIT_LOW])
  			tg->last_low_overflow_time[WRITE] = now;
  	}
  
  	if (tg->iops[READ][LIMIT_LOW]) {
  		iops = tg->last_io_disp[READ] * HZ / elapsed_time;
  		if (iops >= tg->iops[READ][LIMIT_LOW])
  			tg->last_low_overflow_time[READ] = now;
  	}
  
  	if (tg->iops[WRITE][LIMIT_LOW]) {
  		iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
  		if (iops >= tg->iops[WRITE][LIMIT_LOW])
  			tg->last_low_overflow_time[WRITE] = now;
  	}
  
  	/*
  	 * If cgroup is below low limit, consider downgrade and throttle other
  	 * cgroups
  	 */
  	if (throtl_hierarchy_can_downgrade(tg))
4247d9c8b   Baolin Wang   blk-throttle: Rem...
1948
  		throtl_downgrade_state(tg->td);
3f0abd806   Shaohua Li   blk-throttle: add...
1949
1950
1951
1952
1953
1954
  
  	tg->last_bytes_disp[READ] = 0;
  	tg->last_bytes_disp[WRITE] = 0;
  	tg->last_io_disp[READ] = 0;
  	tg->last_io_disp[WRITE] = 0;
  }
9e234eeaf   Shaohua Li   blk-throttle: add...
1955
1956
  static void blk_throtl_update_idletime(struct throtl_grp *tg)
  {
7901601ae   Baolin Wang   blk-throttle: Avo...
1957
  	unsigned long now;
9e234eeaf   Shaohua Li   blk-throttle: add...
1958
  	unsigned long last_finish_time = tg->last_finish_time;
7901601ae   Baolin Wang   blk-throttle: Avo...
1959
1960
1961
1962
1963
  	if (last_finish_time == 0)
  		return;
  
  	now = ktime_get_ns() >> 10;
  	if (now <= last_finish_time ||
9e234eeaf   Shaohua Li   blk-throttle: add...
1964
1965
1966
1967
1968
1969
  	    last_finish_time == tg->checked_last_finish_time)
  		return;
  
  	tg->avg_idletime = (tg->avg_idletime * 7 + now - last_finish_time) >> 3;
  	tg->checked_last_finish_time = last_finish_time;
  }
b9147dd1b   Shaohua Li   blk-throttle: add...
1970
1971
1972
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  static void throtl_update_latency_buckets(struct throtl_data *td)
  {
b889bf66d   Joseph Qi   blk-throttle: tra...
1973
1974
1975
1976
  	struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE];
  	int i, cpu, rw;
  	unsigned long last_latency[2] = { 0 };
  	unsigned long latency[2];
b9147dd1b   Shaohua Li   blk-throttle: add...
1977

b185efa78   Baolin Wang   blk-throttle: Avo...
1978
  	if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW])
b9147dd1b   Shaohua Li   blk-throttle: add...
1979
1980
1981
1982
1983
1984
  		return;
  	if (time_before(jiffies, td->last_calculate_time + HZ))
  		return;
  	td->last_calculate_time = jiffies;
  
  	memset(avg_latency, 0, sizeof(avg_latency));
b889bf66d   Joseph Qi   blk-throttle: tra...
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
  	for (rw = READ; rw <= WRITE; rw++) {
  		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
  			struct latency_bucket *tmp = &td->tmp_buckets[rw][i];
  
  			for_each_possible_cpu(cpu) {
  				struct latency_bucket *bucket;
  
  				/* this isn't race free, but ok in practice */
  				bucket = per_cpu_ptr(td->latency_buckets[rw],
  					cpu);
  				tmp->total_latency += bucket[i].total_latency;
  				tmp->samples += bucket[i].samples;
  				bucket[i].total_latency = 0;
  				bucket[i].samples = 0;
  			}
b9147dd1b   Shaohua Li   blk-throttle: add...
2000

b889bf66d   Joseph Qi   blk-throttle: tra...
2001
2002
  			if (tmp->samples >= 32) {
  				int samples = tmp->samples;
b9147dd1b   Shaohua Li   blk-throttle: add...
2003

b889bf66d   Joseph Qi   blk-throttle: tra...
2004
  				latency[rw] = tmp->total_latency;
b9147dd1b   Shaohua Li   blk-throttle: add...
2005

b889bf66d   Joseph Qi   blk-throttle: tra...
2006
2007
2008
2009
2010
2011
2012
  				tmp->total_latency = 0;
  				tmp->samples = 0;
  				latency[rw] /= samples;
  				if (latency[rw] == 0)
  					continue;
  				avg_latency[rw][i].latency = latency[rw];
  			}
b9147dd1b   Shaohua Li   blk-throttle: add...
2013
2014
  		}
  	}
b889bf66d   Joseph Qi   blk-throttle: tra...
2015
2016
2017
2018
2019
2020
2021
2022
  	for (rw = READ; rw <= WRITE; rw++) {
  		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
  			if (!avg_latency[rw][i].latency) {
  				if (td->avg_buckets[rw][i].latency < last_latency[rw])
  					td->avg_buckets[rw][i].latency =
  						last_latency[rw];
  				continue;
  			}
b9147dd1b   Shaohua Li   blk-throttle: add...
2023

b889bf66d   Joseph Qi   blk-throttle: tra...
2024
2025
2026
2027
2028
  			if (!td->avg_buckets[rw][i].valid)
  				latency[rw] = avg_latency[rw][i].latency;
  			else
  				latency[rw] = (td->avg_buckets[rw][i].latency * 7 +
  					avg_latency[rw][i].latency) >> 3;
b9147dd1b   Shaohua Li   blk-throttle: add...
2029

b889bf66d   Joseph Qi   blk-throttle: tra...
2030
2031
2032
2033
2034
  			td->avg_buckets[rw][i].latency = max(latency[rw],
  				last_latency[rw]);
  			td->avg_buckets[rw][i].valid = true;
  			last_latency[rw] = td->avg_buckets[rw][i].latency;
  		}
b9147dd1b   Shaohua Li   blk-throttle: add...
2035
  	}
4cff729f6   Shaohua Li   blk-throttle: out...
2036
2037
2038
  
  	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
  		throtl_log(&td->service_queue,
b889bf66d   Joseph Qi   blk-throttle: tra...
2039
2040
2041
2042
2043
2044
  			"Latency bucket %d: read latency=%ld, read valid=%d, "
  			"write latency=%ld, write valid=%d", i,
  			td->avg_buckets[READ][i].latency,
  			td->avg_buckets[READ][i].valid,
  			td->avg_buckets[WRITE][i].latency,
  			td->avg_buckets[WRITE][i].valid);
b9147dd1b   Shaohua Li   blk-throttle: add...
2045
2046
2047
2048
2049
2050
  }
  #else
  static inline void throtl_update_latency_buckets(struct throtl_data *td)
  {
  }
  #endif
db18a53e5   Christoph Hellwig   blk-cgroup: remov...
2051
  bool blk_throtl_bio(struct bio *bio)
e43473b7f   Vivek Goyal   blkio: Core imple...
2052
  {
db18a53e5   Christoph Hellwig   blk-cgroup: remov...
2053
2054
  	struct request_queue *q = bio->bi_disk->queue;
  	struct blkcg_gq *blkg = bio->bi_blkg;
c5cc2070b   Tejun Heo   blk-throttle: add...
2055
  	struct throtl_qnode *qn = NULL;
a2e83ef9c   Christoph Hellwig   blk-cgroup: remov...
2056
  	struct throtl_grp *tg = blkg_to_tg(blkg);
73f0d49a9   Tejun Heo   blk-throttle: mov...
2057
  	struct throtl_service_queue *sq;
0e9f4164b   Tejun Heo   blk-throttle: gen...
2058
  	bool rw = bio_data_dir(bio);
bc16a4f93   Tejun Heo   block: reorganize...
2059
  	bool throttled = false;
b9147dd1b   Shaohua Li   blk-throttle: add...
2060
  	struct throtl_data *td = tg->td;
e43473b7f   Vivek Goyal   blkio: Core imple...
2061

93b806380   Christoph Hellwig   blk-cgroup: move ...
2062
  	rcu_read_lock();
ae1188963   Tejun Heo   blkcg: consolidat...
2063

2a0f61e6e   Tejun Heo   blk-throttle: set...
2064
  	/* see throtl_charge_bio() */
7ca464383   Tejun Heo   blk-throtl: stop ...
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
  	if (bio_flagged(bio, BIO_THROTTLED))
  		goto out;
  
  	if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
  		blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
  				bio->bi_iter.bi_size);
  		blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
  	}
  
  	if (!tg->has_rules[rw])
bc16a4f93   Tejun Heo   block: reorganize...
2075
  		goto out;
e43473b7f   Vivek Goyal   blkio: Core imple...
2076

0d945c1f9   Christoph Hellwig   block: remove the...
2077
  	spin_lock_irq(&q->queue_lock);
c9589f03e   Tejun Heo   blk-throttle: imp...
2078

b9147dd1b   Shaohua Li   blk-throttle: add...
2079
  	throtl_update_latency_buckets(td);
9e234eeaf   Shaohua Li   blk-throttle: add...
2080
  	blk_throtl_update_idletime(tg);
73f0d49a9   Tejun Heo   blk-throttle: mov...
2081
  	sq = &tg->service_queue;
c79892c55   Shaohua Li   blk-throttle: add...
2082
  again:
9e660acff   Tejun Heo   blk-throttle: mak...
2083
  	while (true) {
3f0abd806   Shaohua Li   blk-throttle: add...
2084
2085
2086
  		if (tg->last_low_overflow_time[rw] == 0)
  			tg->last_low_overflow_time[rw] = jiffies;
  		throtl_downgrade_check(tg);
fa6fb5aab   Shaohua Li   blk-throttle: ign...
2087
  		throtl_upgrade_check(tg);
9e660acff   Tejun Heo   blk-throttle: mak...
2088
2089
2090
  		/* throtl is FIFO - if bios are already queued, should queue */
  		if (sq->nr_queued[rw])
  			break;
de701c74a   Vivek Goyal   blk-throttle: Som...
2091

9e660acff   Tejun Heo   blk-throttle: mak...
2092
  		/* if above limits, break to queue */
c79892c55   Shaohua Li   blk-throttle: add...
2093
  		if (!tg_may_dispatch(tg, bio, NULL)) {
3f0abd806   Shaohua Li   blk-throttle: add...
2094
  			tg->last_low_overflow_time[rw] = jiffies;
b9147dd1b   Shaohua Li   blk-throttle: add...
2095
2096
  			if (throtl_can_upgrade(td, tg)) {
  				throtl_upgrade_state(td);
c79892c55   Shaohua Li   blk-throttle: add...
2097
2098
  				goto again;
  			}
9e660acff   Tejun Heo   blk-throttle: mak...
2099
  			break;
c79892c55   Shaohua Li   blk-throttle: add...
2100
  		}
9e660acff   Tejun Heo   blk-throttle: mak...
2101
2102
  
  		/* within limits, let's charge and dispatch directly */
e43473b7f   Vivek Goyal   blkio: Core imple...
2103
  		throtl_charge_bio(tg, bio);
04521db04   Vivek Goyal   blk-throttle: Res...
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
  
  		/*
  		 * We need to trim slice even when bios are not being queued
  		 * otherwise it might happen that a bio is not queued for
  		 * a long time and slice keeps on extending and trim is not
  		 * called for a long time. Now if limits are reduced suddenly
  		 * we take into account all the IO dispatched so far at new
  		 * low rate and * newly queued IO gets a really long dispatch
  		 * time.
  		 *
  		 * So keep on trimming slice even if bio is not queued.
  		 */
0f3457f60   Tejun Heo   blk-throttle: add...
2116
  		throtl_trim_slice(tg, rw);
9e660acff   Tejun Heo   blk-throttle: mak...
2117
2118
2119
  
  		/*
  		 * @bio passed through this layer without being throttled.
b53b072c4   Baolin Wang   blk-throttle: Fix...
2120
  		 * Climb up the ladder.  If we're already at the top, it
9e660acff   Tejun Heo   blk-throttle: mak...
2121
2122
  		 * can be executed directly.
  		 */
c5cc2070b   Tejun Heo   blk-throttle: add...
2123
  		qn = &tg->qnode_on_parent[rw];
9e660acff   Tejun Heo   blk-throttle: mak...
2124
2125
2126
2127
  		sq = sq->parent_sq;
  		tg = sq_to_tg(sq);
  		if (!tg)
  			goto out_unlock;
e43473b7f   Vivek Goyal   blkio: Core imple...
2128
  	}
9e660acff   Tejun Heo   blk-throttle: mak...
2129
  	/* out-of-limit, queue to @tg */
fda6f272c   Tejun Heo   blk-throttle: imp...
2130
2131
  	throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
  		   rw == READ ? 'R' : 'W',
9f626e372   Shaohua Li   blk-throttle: pre...
2132
2133
2134
  		   tg->bytes_disp[rw], bio->bi_iter.bi_size,
  		   tg_bps_limit(tg, rw),
  		   tg->io_disp[rw], tg_iops_limit(tg, rw),
fda6f272c   Tejun Heo   blk-throttle: imp...
2135
  		   sq->nr_queued[READ], sq->nr_queued[WRITE]);
e43473b7f   Vivek Goyal   blkio: Core imple...
2136

3f0abd806   Shaohua Li   blk-throttle: add...
2137
  	tg->last_low_overflow_time[rw] = jiffies;
b9147dd1b   Shaohua Li   blk-throttle: add...
2138
  	td->nr_queued[rw]++;
c5cc2070b   Tejun Heo   blk-throttle: add...
2139
  	throtl_add_bio_tg(bio, qn, tg);
bc16a4f93   Tejun Heo   block: reorganize...
2140
  	throttled = true;
e43473b7f   Vivek Goyal   blkio: Core imple...
2141

7f52f98c2   Tejun Heo   blk-throttle: imp...
2142
2143
2144
2145
2146
2147
  	/*
  	 * Update @tg's dispatch time and force schedule dispatch if @tg
  	 * was empty before @bio.  The forced scheduling isn't likely to
  	 * cause undue delay as @bio is likely to be dispatched directly if
  	 * its @tg's disptime is not in the future.
  	 */
0e9f4164b   Tejun Heo   blk-throttle: gen...
2148
  	if (tg->flags & THROTL_TG_WAS_EMPTY) {
77216b048   Tejun Heo   blk-throttle: add...
2149
  		tg_update_disptime(tg);
7f52f98c2   Tejun Heo   blk-throttle: imp...
2150
  		throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true);
e43473b7f   Vivek Goyal   blkio: Core imple...
2151
  	}
bc16a4f93   Tejun Heo   block: reorganize...
2152
  out_unlock:
0d945c1f9   Christoph Hellwig   block: remove the...
2153
  	spin_unlock_irq(&q->queue_lock);
bc16a4f93   Tejun Heo   block: reorganize...
2154
  out:
111be8839   Shaohua Li   block-throttle: a...
2155
  	bio_set_flag(bio, BIO_THROTTLED);
b9147dd1b   Shaohua Li   blk-throttle: add...
2156
2157
2158
  
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  	if (throttled || !td->track_bio_latency)
5238dcf41   Omar Sandoval   block: replace bi...
2159
  		bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
b9147dd1b   Shaohua Li   blk-throttle: add...
2160
  #endif
93b806380   Christoph Hellwig   blk-cgroup: move ...
2161
  	rcu_read_unlock();
bc16a4f93   Tejun Heo   block: reorganize...
2162
  	return throttled;
e43473b7f   Vivek Goyal   blkio: Core imple...
2163
  }
9e234eeaf   Shaohua Li   blk-throttle: add...
2164
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
b9147dd1b   Shaohua Li   blk-throttle: add...
2165
2166
2167
2168
2169
  static void throtl_track_latency(struct throtl_data *td, sector_t size,
  	int op, unsigned long time)
  {
  	struct latency_bucket *latency;
  	int index;
b889bf66d   Joseph Qi   blk-throttle: tra...
2170
2171
  	if (!td || td->limit_index != LIMIT_LOW ||
  	    !(op == REQ_OP_READ || op == REQ_OP_WRITE) ||
b9147dd1b   Shaohua Li   blk-throttle: add...
2172
2173
2174
2175
  	    !blk_queue_nonrot(td->queue))
  		return;
  
  	index = request_bucket_index(size);
b889bf66d   Joseph Qi   blk-throttle: tra...
2176
  	latency = get_cpu_ptr(td->latency_buckets[op]);
b9147dd1b   Shaohua Li   blk-throttle: add...
2177
2178
  	latency[index].total_latency += time;
  	latency[index].samples++;
b889bf66d   Joseph Qi   blk-throttle: tra...
2179
  	put_cpu_ptr(td->latency_buckets[op]);
b9147dd1b   Shaohua Li   blk-throttle: add...
2180
2181
2182
2183
2184
2185
  }
  
  void blk_throtl_stat_add(struct request *rq, u64 time_ns)
  {
  	struct request_queue *q = rq->q;
  	struct throtl_data *td = q->td;
3d2443069   Hou Tao   block: make rq se...
2186
2187
  	throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
  			     time_ns >> 10);
b9147dd1b   Shaohua Li   blk-throttle: add...
2188
  }
9e234eeaf   Shaohua Li   blk-throttle: add...
2189
2190
  void blk_throtl_bio_endio(struct bio *bio)
  {
08e18eab0   Josef Bacik   block: add bi_blk...
2191
  	struct blkcg_gq *blkg;
9e234eeaf   Shaohua Li   blk-throttle: add...
2192
  	struct throtl_grp *tg;
b9147dd1b   Shaohua Li   blk-throttle: add...
2193
2194
2195
2196
  	u64 finish_time_ns;
  	unsigned long finish_time;
  	unsigned long start_time;
  	unsigned long lat;
b889bf66d   Joseph Qi   blk-throttle: tra...
2197
  	int rw = bio_data_dir(bio);
9e234eeaf   Shaohua Li   blk-throttle: add...
2198

08e18eab0   Josef Bacik   block: add bi_blk...
2199
2200
  	blkg = bio->bi_blkg;
  	if (!blkg)
9e234eeaf   Shaohua Li   blk-throttle: add...
2201
  		return;
08e18eab0   Josef Bacik   block: add bi_blk...
2202
  	tg = blkg_to_tg(blkg);
b185efa78   Baolin Wang   blk-throttle: Avo...
2203
2204
  	if (!tg->td->limit_valid[LIMIT_LOW])
  		return;
9e234eeaf   Shaohua Li   blk-throttle: add...
2205

b9147dd1b   Shaohua Li   blk-throttle: add...
2206
2207
  	finish_time_ns = ktime_get_ns();
  	tg->last_finish_time = finish_time_ns >> 10;
5238dcf41   Omar Sandoval   block: replace bi...
2208
2209
  	start_time = bio_issue_time(&bio->bi_issue) >> 10;
  	finish_time = __bio_issue_time(finish_time_ns) >> 10;
08e18eab0   Josef Bacik   block: add bi_blk...
2210
  	if (!start_time || finish_time <= start_time)
53696b8d2   Shaohua Li   blk-throttle: add...
2211
2212
2213
  		return;
  
  	lat = finish_time - start_time;
b9147dd1b   Shaohua Li   blk-throttle: add...
2214
  	/* this is only for bio based driver */
5238dcf41   Omar Sandoval   block: replace bi...
2215
2216
2217
  	if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY))
  		throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue),
  				     bio_op(bio), lat);
53696b8d2   Shaohua Li   blk-throttle: add...
2218

6679a90c4   Shaohua Li   blk-throttle: set...
2219
  	if (tg->latency_target && lat >= tg->td->filtered_latency) {
53696b8d2   Shaohua Li   blk-throttle: add...
2220
2221
  		int bucket;
  		unsigned int threshold;
5238dcf41   Omar Sandoval   block: replace bi...
2222
  		bucket = request_bucket_index(bio_issue_size(&bio->bi_issue));
b889bf66d   Joseph Qi   blk-throttle: tra...
2223
  		threshold = tg->td->avg_buckets[rw][bucket].latency +
53696b8d2   Shaohua Li   blk-throttle: add...
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
  			tg->latency_target;
  		if (lat > threshold)
  			tg->bad_bio_cnt++;
  		/*
  		 * Not race free, could get wrong count, which means cgroups
  		 * will be throttled
  		 */
  		tg->bio_cnt++;
  	}
  
  	if (time_after(jiffies, tg->bio_cnt_reset_time) || tg->bio_cnt > 1024) {
  		tg->bio_cnt_reset_time = tg->td->throtl_slice + jiffies;
  		tg->bio_cnt /= 2;
  		tg->bad_bio_cnt /= 2;
b9147dd1b   Shaohua Li   blk-throttle: add...
2238
  	}
9e234eeaf   Shaohua Li   blk-throttle: add...
2239
2240
  }
  #endif
e43473b7f   Vivek Goyal   blkio: Core imple...
2241
2242
2243
  int blk_throtl_init(struct request_queue *q)
  {
  	struct throtl_data *td;
a2b1693ba   Tejun Heo   blkcg: implement ...
2244
  	int ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
2245
2246
2247
2248
  
  	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
  	if (!td)
  		return -ENOMEM;
b889bf66d   Joseph Qi   blk-throttle: tra...
2249
  	td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) *
b9147dd1b   Shaohua Li   blk-throttle: add...
2250
  		LATENCY_BUCKET_SIZE, __alignof__(u64));
b889bf66d   Joseph Qi   blk-throttle: tra...
2251
2252
2253
2254
2255
  	if (!td->latency_buckets[READ]) {
  		kfree(td);
  		return -ENOMEM;
  	}
  	td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) *
b9147dd1b   Shaohua Li   blk-throttle: add...
2256
  		LATENCY_BUCKET_SIZE, __alignof__(u64));
b889bf66d   Joseph Qi   blk-throttle: tra...
2257
2258
  	if (!td->latency_buckets[WRITE]) {
  		free_percpu(td->latency_buckets[READ]);
b9147dd1b   Shaohua Li   blk-throttle: add...
2259
2260
2261
  		kfree(td);
  		return -ENOMEM;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
2262

69df0ab03   Tejun Heo   blk-throttle: sep...
2263
  	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
b2ce2643c   Tejun Heo   blk-throttle: cle...
2264
  	throtl_service_queue_init(&td->service_queue);
e43473b7f   Vivek Goyal   blkio: Core imple...
2265

cd1604fab   Tejun Heo   blkcg: factor out...
2266
  	q->td = td;
29b125892   Vivek Goyal   blk-throttle: Dyn...
2267
  	td->queue = q;
02977e4af   Vivek Goyal   blkio: Add root g...
2268

9f626e372   Shaohua Li   blk-throttle: pre...
2269
  	td->limit_valid[LIMIT_MAX] = true;
cd5ab1b0f   Shaohua Li   blk-throttle: add...
2270
  	td->limit_index = LIMIT_MAX;
3f0abd806   Shaohua Li   blk-throttle: add...
2271
2272
  	td->low_upgrade_time = jiffies;
  	td->low_downgrade_time = jiffies;
9e234eeaf   Shaohua Li   blk-throttle: add...
2273

a2b1693ba   Tejun Heo   blkcg: implement ...
2274
  	/* activate policy */
3c798398e   Tejun Heo   blkcg: mass renam...
2275
  	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
b9147dd1b   Shaohua Li   blk-throttle: add...
2276
  	if (ret) {
b889bf66d   Joseph Qi   blk-throttle: tra...
2277
2278
  		free_percpu(td->latency_buckets[READ]);
  		free_percpu(td->latency_buckets[WRITE]);
f51b802c1   Tejun Heo   blkcg: use the us...
2279
  		kfree(td);
b9147dd1b   Shaohua Li   blk-throttle: add...
2280
  	}
a2b1693ba   Tejun Heo   blkcg: implement ...
2281
  	return ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
2282
2283
2284
2285
  }
  
  void blk_throtl_exit(struct request_queue *q)
  {
c875f4d02   Tejun Heo   blkcg: drop unnec...
2286
  	BUG_ON(!q->td);
da5277700   Vivek Goyal   block: Move blk_t...
2287
  	throtl_shutdown_wq(q);
3c798398e   Tejun Heo   blkcg: mass renam...
2288
  	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
b889bf66d   Joseph Qi   blk-throttle: tra...
2289
2290
  	free_percpu(q->td->latency_buckets[READ]);
  	free_percpu(q->td->latency_buckets[WRITE]);
c9a929dde   Tejun Heo   block: fix reques...
2291
  	kfree(q->td);
e43473b7f   Vivek Goyal   blkio: Core imple...
2292
  }
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2293
2294
2295
  void blk_throtl_register_queue(struct request_queue *q)
  {
  	struct throtl_data *td;
6679a90c4   Shaohua Li   blk-throttle: set...
2296
  	int i;
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2297
2298
2299
  
  	td = q->td;
  	BUG_ON(!td);
6679a90c4   Shaohua Li   blk-throttle: set...
2300
  	if (blk_queue_nonrot(q)) {
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2301
  		td->throtl_slice = DFL_THROTL_SLICE_SSD;
6679a90c4   Shaohua Li   blk-throttle: set...
2302
2303
  		td->filtered_latency = LATENCY_FILTERED_SSD;
  	} else {
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2304
  		td->throtl_slice = DFL_THROTL_SLICE_HD;
6679a90c4   Shaohua Li   blk-throttle: set...
2305
  		td->filtered_latency = LATENCY_FILTERED_HD;
b889bf66d   Joseph Qi   blk-throttle: tra...
2306
2307
2308
2309
  		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
  			td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY;
  			td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY;
  		}
6679a90c4   Shaohua Li   blk-throttle: set...
2310
  	}
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2311
2312
2313
2314
  #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
  	/* if no low limit, use previous default */
  	td->throtl_slice = DFL_THROTL_SLICE_HD;
  #endif
9e234eeaf   Shaohua Li   blk-throttle: add...
2315

344e9ffcb   Jens Axboe   block: add queue_...
2316
  	td->track_bio_latency = !queue_is_mq(q);
b9147dd1b   Shaohua Li   blk-throttle: add...
2317
2318
  	if (!td->track_bio_latency)
  		blk_stat_enable_accounting(q);
d61fcfa4b   Shaohua Li   blk-throttle: cho...
2319
  }
297e3d854   Shaohua Li   blk-throttle: mak...
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page)
  {
  	if (!q->td)
  		return -EINVAL;
  	return sprintf(page, "%u
  ", jiffies_to_msecs(q->td->throtl_slice));
  }
  
  ssize_t blk_throtl_sample_time_store(struct request_queue *q,
  	const char *page, size_t count)
  {
  	unsigned long v;
  	unsigned long t;
  
  	if (!q->td)
  		return -EINVAL;
  	if (kstrtoul(page, 10, &v))
  		return -EINVAL;
  	t = msecs_to_jiffies(v);
  	if (t == 0 || t > MAX_THROTL_SLICE)
  		return -EINVAL;
  	q->td->throtl_slice = t;
  	return count;
  }
  #endif
e43473b7f   Vivek Goyal   blkio: Core imple...
2346
2347
  static int __init throtl_init(void)
  {
450adcbe5   Vivek Goyal   blk-throttle: Do ...
2348
2349
2350
2351
  	kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
  	if (!kthrotld_workqueue)
  		panic("Failed to create kthrotld
  ");
3c798398e   Tejun Heo   blkcg: mass renam...
2352
  	return blkcg_policy_register(&blkcg_policy_throtl);
e43473b7f   Vivek Goyal   blkio: Core imple...
2353
2354
2355
  }
  
  module_init(throtl_init);