Blame view

block/blk-throttle.c 42.6 KB
e43473b7f   Vivek Goyal   blkio: Core imple...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Interface for controlling IO bandwidth on a request queue
   *
   * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com>
   */
  
  #include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/blkdev.h>
  #include <linux/bio.h>
  #include <linux/blktrace_api.h>
eea8f41cc   Tejun Heo   blkcg: move block...
12
  #include <linux/blk-cgroup.h>
bc9fcbf9c   Tejun Heo   block: move blk_t...
13
  #include "blk.h"
e43473b7f   Vivek Goyal   blkio: Core imple...
14
15
16
17
18
19
20
21
22
  
  /* Max dispatch from a group in 1 round */
  static int throtl_grp_quantum = 8;
  
  /* Total max dispatch from all groups in one round */
  static int throtl_quantum = 32;
  
  /* Throttling is performed over 100ms slice and after that slice is renewed */
  static unsigned long throtl_slice = HZ/10;	/* 100 ms */
3c798398e   Tejun Heo   blkcg: mass renam...
23
  static struct blkcg_policy blkcg_policy_throtl;
0381411e4   Tejun Heo   blkcg: let blkcg ...
24

450adcbe5   Vivek Goyal   blk-throttle: Do ...
25
26
  /* A workqueue to queue throttle related work */
  static struct workqueue_struct *kthrotld_workqueue;
450adcbe5   Vivek Goyal   blk-throttle: Do ...
27

c5cc2070b   Tejun Heo   blk-throttle: add...
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
  /*
   * To implement hierarchical throttling, throtl_grps form a tree and bios
   * are dispatched upwards level by level until they reach the top and get
   * issued.  When dispatching bios from the children and local group at each
   * level, if the bios are dispatched into a single bio_list, there's a risk
   * of a local or child group which can queue many bios at once filling up
   * the list starving others.
   *
   * To avoid such starvation, dispatched bios are queued separately
   * according to where they came from.  When they are again dispatched to
   * the parent, they're popped in round-robin order so that no single source
   * hogs the dispatch window.
   *
   * throtl_qnode is used to keep the queued bios separated by their sources.
   * Bios are queued to throtl_qnode which in turn is queued to
   * throtl_service_queue and then dispatched in round-robin order.
   *
   * It's also used to track the reference counts on blkg's.  A qnode always
   * belongs to a throtl_grp and gets queued on itself or the parent, so
   * incrementing the reference of the associated throtl_grp when a qnode is
   * queued and decrementing when dequeued is enough to keep the whole blkg
   * tree pinned while bios are in flight.
   */
  struct throtl_qnode {
  	struct list_head	node;		/* service_queue->queued[] */
  	struct bio_list		bios;		/* queued bios */
  	struct throtl_grp	*tg;		/* tg this qnode belongs to */
  };
c9e0332e8   Tejun Heo   blk-throttle: ren...
56
  struct throtl_service_queue {
77216b048   Tejun Heo   blk-throttle: add...
57
  	struct throtl_service_queue *parent_sq;	/* the parent service_queue */
73f0d49a9   Tejun Heo   blk-throttle: mov...
58
59
60
61
  	/*
  	 * Bios queued directly to this service_queue or dispatched from
  	 * children throtl_grp's.
  	 */
c5cc2070b   Tejun Heo   blk-throttle: add...
62
  	struct list_head	queued[2];	/* throtl_qnode [READ/WRITE] */
73f0d49a9   Tejun Heo   blk-throttle: mov...
63
64
65
66
67
68
  	unsigned int		nr_queued[2];	/* number of queued bios */
  
  	/*
  	 * RB tree of active children throtl_grp's, which are sorted by
  	 * their ->disptime.
  	 */
c9e0332e8   Tejun Heo   blk-throttle: ren...
69
70
71
72
  	struct rb_root		pending_tree;	/* RB tree of active tgs */
  	struct rb_node		*first_pending;	/* first node in the tree */
  	unsigned int		nr_pending;	/* # queued in the tree */
  	unsigned long		first_pending_disptime;	/* disptime of the first tg */
69df0ab03   Tejun Heo   blk-throttle: sep...
73
  	struct timer_list	pending_timer;	/* fires on first_pending_disptime */
e43473b7f   Vivek Goyal   blkio: Core imple...
74
  };
5b2c16aae   Tejun Heo   blk-throttle: sim...
75
76
  enum tg_state_flags {
  	THROTL_TG_PENDING	= 1 << 0,	/* on parent's pending tree */
0e9f4164b   Tejun Heo   blk-throttle: gen...
77
  	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
5b2c16aae   Tejun Heo   blk-throttle: sim...
78
  };
e43473b7f   Vivek Goyal   blkio: Core imple...
79
80
81
  #define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
  
  struct throtl_grp {
f95a04afa   Tejun Heo   blkcg: embed stru...
82
83
  	/* must be the first member */
  	struct blkg_policy_data pd;
c9e0332e8   Tejun Heo   blk-throttle: ren...
84
  	/* active throtl group service_queue member */
e43473b7f   Vivek Goyal   blkio: Core imple...
85
  	struct rb_node rb_node;
0f3457f60   Tejun Heo   blk-throttle: add...
86
87
  	/* throtl_data this group belongs to */
  	struct throtl_data *td;
49a2f1e3f   Tejun Heo   blk-throttle: add...
88
89
  	/* this group's service queue */
  	struct throtl_service_queue service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
90
  	/*
c5cc2070b   Tejun Heo   blk-throttle: add...
91
92
93
94
95
96
97
98
99
100
101
  	 * qnode_on_self is used when bios are directly queued to this
  	 * throtl_grp so that local bios compete fairly with bios
  	 * dispatched from children.  qnode_on_parent is used when bios are
  	 * dispatched from this throtl_grp into its parent and will compete
  	 * with the sibling qnode_on_parents and the parent's
  	 * qnode_on_self.
  	 */
  	struct throtl_qnode qnode_on_self[2];
  	struct throtl_qnode qnode_on_parent[2];
  
  	/*
e43473b7f   Vivek Goyal   blkio: Core imple...
102
103
104
105
106
  	 * Dispatch time in jiffies. This is the estimated time when group
  	 * will unthrottle and is ready to dispatch more bio. It is used as
  	 * key to sort active groups in service tree.
  	 */
  	unsigned long disptime;
e43473b7f   Vivek Goyal   blkio: Core imple...
107
  	unsigned int flags;
693e751e7   Tejun Heo   blk-throttle: imp...
108
109
  	/* are there any throtl rules between this group and td? */
  	bool has_rules[2];
e43473b7f   Vivek Goyal   blkio: Core imple...
110
111
  	/* bytes per second rate limits */
  	uint64_t bps[2];
8e89d13f4   Vivek Goyal   blkio: Implementa...
112
113
  	/* IOPS limits */
  	unsigned int iops[2];
e43473b7f   Vivek Goyal   blkio: Core imple...
114
115
  	/* Number of bytes disptached in current slice */
  	uint64_t bytes_disp[2];
8e89d13f4   Vivek Goyal   blkio: Implementa...
116
117
  	/* Number of bio's dispatched in current slice */
  	unsigned int io_disp[2];
e43473b7f   Vivek Goyal   blkio: Core imple...
118
119
120
121
122
123
124
125
  
  	/* When did we start a new slice */
  	unsigned long slice_start[2];
  	unsigned long slice_end[2];
  };
  
  struct throtl_data
  {
e43473b7f   Vivek Goyal   blkio: Core imple...
126
  	/* service tree for active throtl groups */
c9e0332e8   Tejun Heo   blk-throttle: ren...
127
  	struct throtl_service_queue service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
128

e43473b7f   Vivek Goyal   blkio: Core imple...
129
130
131
132
133
134
  	struct request_queue *queue;
  
  	/* Total Number of queued bios on READ and WRITE lists */
  	unsigned int nr_queued[2];
  
  	/*
02977e4af   Vivek Goyal   blkio: Add root g...
135
  	 * number of total undestroyed groups
e43473b7f   Vivek Goyal   blkio: Core imple...
136
137
138
139
  	 */
  	unsigned int nr_undestroyed_grps;
  
  	/* Work for dispatching throttled bios */
69df0ab03   Tejun Heo   blk-throttle: sep...
140
  	struct work_struct dispatch_work;
e43473b7f   Vivek Goyal   blkio: Core imple...
141
  };
69df0ab03   Tejun Heo   blk-throttle: sep...
142
  static void throtl_pending_timer_fn(unsigned long arg);
f95a04afa   Tejun Heo   blkcg: embed stru...
143
144
145
146
  static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
  {
  	return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
  }
3c798398e   Tejun Heo   blkcg: mass renam...
147
  static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
148
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
149
  	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
0381411e4   Tejun Heo   blkcg: let blkcg ...
150
  }
3c798398e   Tejun Heo   blkcg: mass renam...
151
  static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
152
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
153
  	return pd_to_blkg(&tg->pd);
0381411e4   Tejun Heo   blkcg: let blkcg ...
154
  }
fda6f272c   Tejun Heo   blk-throttle: imp...
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
  /**
   * sq_to_tg - return the throl_grp the specified service queue belongs to
   * @sq: the throtl_service_queue of interest
   *
   * Return the throtl_grp @sq belongs to.  If @sq is the top-level one
   * embedded in throtl_data, %NULL is returned.
   */
  static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq)
  {
  	if (sq && sq->parent_sq)
  		return container_of(sq, struct throtl_grp, service_queue);
  	else
  		return NULL;
  }
  
  /**
   * sq_to_td - return throtl_data the specified service queue belongs to
   * @sq: the throtl_service_queue of interest
   *
   * A service_queue can be embeded in either a throtl_grp or throtl_data.
   * Determine the associated throtl_data accordingly and return it.
   */
  static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
  {
  	struct throtl_grp *tg = sq_to_tg(sq);
  
  	if (tg)
  		return tg->td;
  	else
  		return container_of(sq, struct throtl_data, service_queue);
  }
  
  /**
   * throtl_log - log debug message via blktrace
   * @sq: the service_queue being reported
   * @fmt: printf format string
   * @args: printf args
   *
   * The messages are prefixed with "throtl BLKG_NAME" if @sq belongs to a
   * throtl_grp; otherwise, just "throtl".
fda6f272c   Tejun Heo   blk-throttle: imp...
195
196
197
198
199
200
   */
  #define throtl_log(sq, fmt, args...)	do {				\
  	struct throtl_grp *__tg = sq_to_tg((sq));			\
  	struct throtl_data *__td = sq_to_td((sq));			\
  									\
  	(void)__td;							\
59fa0224c   Shaohua Li   blk-throttle: don...
201
202
  	if (likely(!blk_trace_note_message_enabled(__td->queue)))	\
  		break;							\
fda6f272c   Tejun Heo   blk-throttle: imp...
203
204
  	if ((__tg)) {							\
  		char __pbuf[128];					\
54e7ed12b   Tejun Heo   blkcg: remove blk...
205
  									\
fda6f272c   Tejun Heo   blk-throttle: imp...
206
207
208
209
210
  		blkg_path(tg_to_blkg(__tg), __pbuf, sizeof(__pbuf));	\
  		blk_add_trace_msg(__td->queue, "throtl %s " fmt, __pbuf, ##args); \
  	} else {							\
  		blk_add_trace_msg(__td->queue, "throtl " fmt, ##args);	\
  	}								\
54e7ed12b   Tejun Heo   blkcg: remove blk...
211
  } while (0)
e43473b7f   Vivek Goyal   blkio: Core imple...
212

c5cc2070b   Tejun Heo   blk-throttle: add...
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
  {
  	INIT_LIST_HEAD(&qn->node);
  	bio_list_init(&qn->bios);
  	qn->tg = tg;
  }
  
  /**
   * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it
   * @bio: bio being added
   * @qn: qnode to add bio to
   * @queued: the service_queue->queued[] list @qn belongs to
   *
   * Add @bio to @qn and put @qn on @queued if it's not already on.
   * @qn->tg's reference count is bumped when @qn is activated.  See the
   * comment on top of throtl_qnode definition for details.
   */
  static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
  				 struct list_head *queued)
  {
  	bio_list_add(&qn->bios, bio);
  	if (list_empty(&qn->node)) {
  		list_add_tail(&qn->node, queued);
  		blkg_get(tg_to_blkg(qn->tg));
  	}
  }
  
  /**
   * throtl_peek_queued - peek the first bio on a qnode list
   * @queued: the qnode list to peek
   */
  static struct bio *throtl_peek_queued(struct list_head *queued)
  {
  	struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node);
  	struct bio *bio;
  
  	if (list_empty(queued))
  		return NULL;
  
  	bio = bio_list_peek(&qn->bios);
  	WARN_ON_ONCE(!bio);
  	return bio;
  }
  
  /**
   * throtl_pop_queued - pop the first bio form a qnode list
   * @queued: the qnode list to pop a bio from
   * @tg_to_put: optional out argument for throtl_grp to put
   *
   * Pop the first bio from the qnode list @queued.  After popping, the first
   * qnode is removed from @queued if empty or moved to the end of @queued so
   * that the popping order is round-robin.
   *
   * When the first qnode is removed, its associated throtl_grp should be put
   * too.  If @tg_to_put is NULL, this function automatically puts it;
   * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is
   * responsible for putting it.
   */
  static struct bio *throtl_pop_queued(struct list_head *queued,
  				     struct throtl_grp **tg_to_put)
  {
  	struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node);
  	struct bio *bio;
  
  	if (list_empty(queued))
  		return NULL;
  
  	bio = bio_list_pop(&qn->bios);
  	WARN_ON_ONCE(!bio);
  
  	if (bio_list_empty(&qn->bios)) {
  		list_del_init(&qn->node);
  		if (tg_to_put)
  			*tg_to_put = qn->tg;
  		else
  			blkg_put(tg_to_blkg(qn->tg));
  	} else {
  		list_move_tail(&qn->node, queued);
  	}
  
  	return bio;
  }
49a2f1e3f   Tejun Heo   blk-throttle: add...
295
  /* init a service_queue, assumes the caller zeroed it */
b2ce2643c   Tejun Heo   blk-throttle: cle...
296
  static void throtl_service_queue_init(struct throtl_service_queue *sq)
49a2f1e3f   Tejun Heo   blk-throttle: add...
297
  {
c5cc2070b   Tejun Heo   blk-throttle: add...
298
299
  	INIT_LIST_HEAD(&sq->queued[0]);
  	INIT_LIST_HEAD(&sq->queued[1]);
49a2f1e3f   Tejun Heo   blk-throttle: add...
300
  	sq->pending_tree = RB_ROOT;
69df0ab03   Tejun Heo   blk-throttle: sep...
301
302
303
  	setup_timer(&sq->pending_timer, throtl_pending_timer_fn,
  		    (unsigned long)sq);
  }
001bea73e   Tejun Heo   blkcg: replace bl...
304
305
  static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
  {
4fb72036f   Tejun Heo   blk-throttle: rem...
306
  	struct throtl_grp *tg;
24bdb8ef0   Tejun Heo   blkcg: make blkcg...
307
  	int rw;
4fb72036f   Tejun Heo   blk-throttle: rem...
308
309
310
  
  	tg = kzalloc_node(sizeof(*tg), gfp, node);
  	if (!tg)
77ea73388   Tejun Heo   blkcg: move io_se...
311
  		return NULL;
4fb72036f   Tejun Heo   blk-throttle: rem...
312

b2ce2643c   Tejun Heo   blk-throttle: cle...
313
314
315
316
317
318
319
320
321
322
323
324
  	throtl_service_queue_init(&tg->service_queue);
  
  	for (rw = READ; rw <= WRITE; rw++) {
  		throtl_qnode_init(&tg->qnode_on_self[rw], tg);
  		throtl_qnode_init(&tg->qnode_on_parent[rw], tg);
  	}
  
  	RB_CLEAR_NODE(&tg->rb_node);
  	tg->bps[READ] = -1;
  	tg->bps[WRITE] = -1;
  	tg->iops[READ] = -1;
  	tg->iops[WRITE] = -1;
4fb72036f   Tejun Heo   blk-throttle: rem...
325
  	return &tg->pd;
001bea73e   Tejun Heo   blkcg: replace bl...
326
  }
a9520cd6f   Tejun Heo   blkcg: make blkcg...
327
  static void throtl_pd_init(struct blkg_policy_data *pd)
a29a171e7   Vivek Goyal   blk-throttle: Do ...
328
  {
a9520cd6f   Tejun Heo   blkcg: make blkcg...
329
330
  	struct throtl_grp *tg = pd_to_tg(pd);
  	struct blkcg_gq *blkg = tg_to_blkg(tg);
77216b048   Tejun Heo   blk-throttle: add...
331
  	struct throtl_data *td = blkg->q->td;
b2ce2643c   Tejun Heo   blk-throttle: cle...
332
  	struct throtl_service_queue *sq = &tg->service_queue;
cd1604fab   Tejun Heo   blkcg: factor out...
333

9138125be   Tejun Heo   blk-throttle: imp...
334
  	/*
aa6ec29be   Tejun Heo   cgroup: remove sa...
335
  	 * If on the default hierarchy, we switch to properly hierarchical
9138125be   Tejun Heo   blk-throttle: imp...
336
337
338
339
340
  	 * behavior where limits on a given throtl_grp are applied to the
  	 * whole subtree rather than just the group itself.  e.g. If 16M
  	 * read_bps limit is set on the root group, the whole system can't
  	 * exceed 16M for the device.
  	 *
aa6ec29be   Tejun Heo   cgroup: remove sa...
341
  	 * If not on the default hierarchy, the broken flat hierarchy
9138125be   Tejun Heo   blk-throttle: imp...
342
343
344
345
346
  	 * behavior is retained where all throtl_grps are treated as if
  	 * they're all separate root groups right below throtl_data.
  	 * Limits of a group don't interact with limits of other groups
  	 * regardless of the position of the group in the hierarchy.
  	 */
b2ce2643c   Tejun Heo   blk-throttle: cle...
347
  	sq->parent_sq = &td->service_queue;
9e10a130d   Tejun Heo   cgroup: replace c...
348
  	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
b2ce2643c   Tejun Heo   blk-throttle: cle...
349
  		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
77216b048   Tejun Heo   blk-throttle: add...
350
  	tg->td = td;
8a3d26151   Tejun Heo   blkcg: move blkio...
351
  }
693e751e7   Tejun Heo   blk-throttle: imp...
352
353
354
355
356
357
358
359
360
361
362
363
364
365
  /*
   * Set has_rules[] if @tg or any of its parents have limits configured.
   * This doesn't require walking up to the top of the hierarchy as the
   * parent's has_rules[] is guaranteed to be correct.
   */
  static void tg_update_has_rules(struct throtl_grp *tg)
  {
  	struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
  	int rw;
  
  	for (rw = READ; rw <= WRITE; rw++)
  		tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
  				    (tg->bps[rw] != -1 || tg->iops[rw] != -1);
  }
a9520cd6f   Tejun Heo   blkcg: make blkcg...
366
  static void throtl_pd_online(struct blkg_policy_data *pd)
693e751e7   Tejun Heo   blk-throttle: imp...
367
368
369
370
371
  {
  	/*
  	 * We don't want new groups to escape the limits of its ancestors.
  	 * Update has_rules[] after a new group is brought online.
  	 */
a9520cd6f   Tejun Heo   blkcg: make blkcg...
372
  	tg_update_has_rules(pd_to_tg(pd));
693e751e7   Tejun Heo   blk-throttle: imp...
373
  }
001bea73e   Tejun Heo   blkcg: replace bl...
374
375
  static void throtl_pd_free(struct blkg_policy_data *pd)
  {
4fb72036f   Tejun Heo   blk-throttle: rem...
376
  	struct throtl_grp *tg = pd_to_tg(pd);
b2ce2643c   Tejun Heo   blk-throttle: cle...
377
  	del_timer_sync(&tg->service_queue.pending_timer);
4fb72036f   Tejun Heo   blk-throttle: rem...
378
  	kfree(tg);
001bea73e   Tejun Heo   blkcg: replace bl...
379
  }
0049af73b   Tejun Heo   blk-throttle: reo...
380
381
  static struct throtl_grp *
  throtl_rb_first(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
382
383
  {
  	/* Service tree is empty */
0049af73b   Tejun Heo   blk-throttle: reo...
384
  	if (!parent_sq->nr_pending)
e43473b7f   Vivek Goyal   blkio: Core imple...
385
  		return NULL;
0049af73b   Tejun Heo   blk-throttle: reo...
386
387
  	if (!parent_sq->first_pending)
  		parent_sq->first_pending = rb_first(&parent_sq->pending_tree);
e43473b7f   Vivek Goyal   blkio: Core imple...
388

0049af73b   Tejun Heo   blk-throttle: reo...
389
390
  	if (parent_sq->first_pending)
  		return rb_entry_tg(parent_sq->first_pending);
e43473b7f   Vivek Goyal   blkio: Core imple...
391
392
393
394
395
396
397
398
399
  
  	return NULL;
  }
  
  static void rb_erase_init(struct rb_node *n, struct rb_root *root)
  {
  	rb_erase(n, root);
  	RB_CLEAR_NODE(n);
  }
0049af73b   Tejun Heo   blk-throttle: reo...
400
401
  static void throtl_rb_erase(struct rb_node *n,
  			    struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
402
  {
0049af73b   Tejun Heo   blk-throttle: reo...
403
404
405
406
  	if (parent_sq->first_pending == n)
  		parent_sq->first_pending = NULL;
  	rb_erase_init(n, &parent_sq->pending_tree);
  	--parent_sq->nr_pending;
e43473b7f   Vivek Goyal   blkio: Core imple...
407
  }
0049af73b   Tejun Heo   blk-throttle: reo...
408
  static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
409
410
  {
  	struct throtl_grp *tg;
0049af73b   Tejun Heo   blk-throttle: reo...
411
  	tg = throtl_rb_first(parent_sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
412
413
  	if (!tg)
  		return;
0049af73b   Tejun Heo   blk-throttle: reo...
414
  	parent_sq->first_pending_disptime = tg->disptime;
e43473b7f   Vivek Goyal   blkio: Core imple...
415
  }
77216b048   Tejun Heo   blk-throttle: add...
416
  static void tg_service_queue_add(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
417
  {
77216b048   Tejun Heo   blk-throttle: add...
418
  	struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq;
0049af73b   Tejun Heo   blk-throttle: reo...
419
  	struct rb_node **node = &parent_sq->pending_tree.rb_node;
e43473b7f   Vivek Goyal   blkio: Core imple...
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
  	struct rb_node *parent = NULL;
  	struct throtl_grp *__tg;
  	unsigned long key = tg->disptime;
  	int left = 1;
  
  	while (*node != NULL) {
  		parent = *node;
  		__tg = rb_entry_tg(parent);
  
  		if (time_before(key, __tg->disptime))
  			node = &parent->rb_left;
  		else {
  			node = &parent->rb_right;
  			left = 0;
  		}
  	}
  
  	if (left)
0049af73b   Tejun Heo   blk-throttle: reo...
438
  		parent_sq->first_pending = &tg->rb_node;
e43473b7f   Vivek Goyal   blkio: Core imple...
439
440
  
  	rb_link_node(&tg->rb_node, parent, node);
0049af73b   Tejun Heo   blk-throttle: reo...
441
  	rb_insert_color(&tg->rb_node, &parent_sq->pending_tree);
e43473b7f   Vivek Goyal   blkio: Core imple...
442
  }
77216b048   Tejun Heo   blk-throttle: add...
443
  static void __throtl_enqueue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
444
  {
77216b048   Tejun Heo   blk-throttle: add...
445
  	tg_service_queue_add(tg);
5b2c16aae   Tejun Heo   blk-throttle: sim...
446
  	tg->flags |= THROTL_TG_PENDING;
77216b048   Tejun Heo   blk-throttle: add...
447
  	tg->service_queue.parent_sq->nr_pending++;
e43473b7f   Vivek Goyal   blkio: Core imple...
448
  }
77216b048   Tejun Heo   blk-throttle: add...
449
  static void throtl_enqueue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
450
  {
5b2c16aae   Tejun Heo   blk-throttle: sim...
451
  	if (!(tg->flags & THROTL_TG_PENDING))
77216b048   Tejun Heo   blk-throttle: add...
452
  		__throtl_enqueue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
453
  }
77216b048   Tejun Heo   blk-throttle: add...
454
  static void __throtl_dequeue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
455
  {
77216b048   Tejun Heo   blk-throttle: add...
456
  	throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
5b2c16aae   Tejun Heo   blk-throttle: sim...
457
  	tg->flags &= ~THROTL_TG_PENDING;
e43473b7f   Vivek Goyal   blkio: Core imple...
458
  }
77216b048   Tejun Heo   blk-throttle: add...
459
  static void throtl_dequeue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
460
  {
5b2c16aae   Tejun Heo   blk-throttle: sim...
461
  	if (tg->flags & THROTL_TG_PENDING)
77216b048   Tejun Heo   blk-throttle: add...
462
  		__throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
463
  }
a9131a27e   Tejun Heo   blk-throttle: rel...
464
  /* Call with queue lock held */
69df0ab03   Tejun Heo   blk-throttle: sep...
465
466
  static void throtl_schedule_pending_timer(struct throtl_service_queue *sq,
  					  unsigned long expires)
a9131a27e   Tejun Heo   blk-throttle: rel...
467
  {
69df0ab03   Tejun Heo   blk-throttle: sep...
468
469
470
  	mod_timer(&sq->pending_timer, expires);
  	throtl_log(sq, "schedule timer. delay=%lu jiffies=%lu",
  		   expires - jiffies, jiffies);
a9131a27e   Tejun Heo   blk-throttle: rel...
471
  }
7f52f98c2   Tejun Heo   blk-throttle: imp...
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
  /**
   * throtl_schedule_next_dispatch - schedule the next dispatch cycle
   * @sq: the service_queue to schedule dispatch for
   * @force: force scheduling
   *
   * Arm @sq->pending_timer so that the next dispatch cycle starts on the
   * dispatch time of the first pending child.  Returns %true if either timer
   * is armed or there's no pending child left.  %false if the current
   * dispatch window is still open and the caller should continue
   * dispatching.
   *
   * If @force is %true, the dispatch timer is always scheduled and this
   * function is guaranteed to return %true.  This is to be used when the
   * caller can't dispatch itself and needs to invoke pending_timer
   * unconditionally.  Note that forced scheduling is likely to induce short
   * delay before dispatch starts even if @sq->first_pending_disptime is not
   * in the future and thus shouldn't be used in hot paths.
   */
  static bool throtl_schedule_next_dispatch(struct throtl_service_queue *sq,
  					  bool force)
e43473b7f   Vivek Goyal   blkio: Core imple...
492
  {
6a525600f   Tejun Heo   blk-throttle: rem...
493
  	/* any pending children left? */
c9e0332e8   Tejun Heo   blk-throttle: ren...
494
  	if (!sq->nr_pending)
7f52f98c2   Tejun Heo   blk-throttle: imp...
495
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
496

c9e0332e8   Tejun Heo   blk-throttle: ren...
497
  	update_min_dispatch_time(sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
498

69df0ab03   Tejun Heo   blk-throttle: sep...
499
  	/* is the next dispatch time in the future? */
7f52f98c2   Tejun Heo   blk-throttle: imp...
500
  	if (force || time_after(sq->first_pending_disptime, jiffies)) {
69df0ab03   Tejun Heo   blk-throttle: sep...
501
  		throtl_schedule_pending_timer(sq, sq->first_pending_disptime);
7f52f98c2   Tejun Heo   blk-throttle: imp...
502
  		return true;
69df0ab03   Tejun Heo   blk-throttle: sep...
503
  	}
7f52f98c2   Tejun Heo   blk-throttle: imp...
504
505
  	/* tell the caller to continue dispatching */
  	return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
506
  }
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
  static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
  		bool rw, unsigned long start)
  {
  	tg->bytes_disp[rw] = 0;
  	tg->io_disp[rw] = 0;
  
  	/*
  	 * Previous slice has expired. We must have trimmed it after last
  	 * bio dispatch. That means since start of last slice, we never used
  	 * that bandwidth. Do try to make use of that bandwidth while giving
  	 * credit.
  	 */
  	if (time_after_eq(start, tg->slice_start[rw]))
  		tg->slice_start[rw] = start;
  
  	tg->slice_end[rw] = jiffies + throtl_slice;
  	throtl_log(&tg->service_queue,
  		   "[%c] new slice with credit start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
  }
0f3457f60   Tejun Heo   blk-throttle: add...
528
  static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
529
530
  {
  	tg->bytes_disp[rw] = 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
531
  	tg->io_disp[rw] = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
532
533
  	tg->slice_start[rw] = jiffies;
  	tg->slice_end[rw] = jiffies + throtl_slice;
fda6f272c   Tejun Heo   blk-throttle: imp...
534
535
536
537
  	throtl_log(&tg->service_queue,
  		   "[%c] new slice start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
538
  }
0f3457f60   Tejun Heo   blk-throttle: add...
539
540
  static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
  					unsigned long jiffy_end)
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
541
542
543
  {
  	tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
  }
0f3457f60   Tejun Heo   blk-throttle: add...
544
545
  static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
  				       unsigned long jiffy_end)
e43473b7f   Vivek Goyal   blkio: Core imple...
546
547
  {
  	tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
fda6f272c   Tejun Heo   blk-throttle: imp...
548
549
550
551
  	throtl_log(&tg->service_queue,
  		   "[%c] extend slice start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
552
553
554
  }
  
  /* Determine if previously allocated or extended slice is complete or not */
0f3457f60   Tejun Heo   blk-throttle: add...
555
  static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
556
557
  {
  	if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
5cf8c2277   Fabian Frederick   block/blk-throttl...
558
  		return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
559
560
561
562
563
  
  	return 1;
  }
  
  /* Trim the used slices and adjust slice start accordingly */
0f3457f60   Tejun Heo   blk-throttle: add...
564
  static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
565
  {
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
566
567
  	unsigned long nr_slices, time_elapsed, io_trim;
  	u64 bytes_trim, tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
568
569
570
571
572
573
574
575
  
  	BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
  
  	/*
  	 * If bps are unlimited (-1), then time slice don't get
  	 * renewed. Don't try to trim the slice if slice is used. A new
  	 * slice will start when appropriate.
  	 */
0f3457f60   Tejun Heo   blk-throttle: add...
576
  	if (throtl_slice_used(tg, rw))
e43473b7f   Vivek Goyal   blkio: Core imple...
577
  		return;
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
578
579
580
581
582
583
584
  	/*
  	 * A bio has been dispatched. Also adjust slice_end. It might happen
  	 * that initially cgroup limit was very low resulting in high
  	 * slice_end, but later limit was bumped up and bio was dispached
  	 * sooner, then we need to reduce slice_end. A high bogus slice_end
  	 * is bad because it does not allow new slice to start.
  	 */
0f3457f60   Tejun Heo   blk-throttle: add...
585
  	throtl_set_slice_end(tg, rw, jiffies + throtl_slice);
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
586

e43473b7f   Vivek Goyal   blkio: Core imple...
587
588
589
590
591
592
  	time_elapsed = jiffies - tg->slice_start[rw];
  
  	nr_slices = time_elapsed / throtl_slice;
  
  	if (!nr_slices)
  		return;
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
593
594
595
  	tmp = tg->bps[rw] * throtl_slice * nr_slices;
  	do_div(tmp, HZ);
  	bytes_trim = tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
596

8e89d13f4   Vivek Goyal   blkio: Implementa...
597
  	io_trim = (tg->iops[rw] * throtl_slice * nr_slices)/HZ;
e43473b7f   Vivek Goyal   blkio: Core imple...
598

8e89d13f4   Vivek Goyal   blkio: Implementa...
599
  	if (!bytes_trim && !io_trim)
e43473b7f   Vivek Goyal   blkio: Core imple...
600
601
602
603
604
605
  		return;
  
  	if (tg->bytes_disp[rw] >= bytes_trim)
  		tg->bytes_disp[rw] -= bytes_trim;
  	else
  		tg->bytes_disp[rw] = 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
606
607
608
609
  	if (tg->io_disp[rw] >= io_trim)
  		tg->io_disp[rw] -= io_trim;
  	else
  		tg->io_disp[rw] = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
610
  	tg->slice_start[rw] += nr_slices * throtl_slice;
fda6f272c   Tejun Heo   blk-throttle: imp...
611
612
613
614
  	throtl_log(&tg->service_queue,
  		   "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
  		   tg->slice_start[rw], tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
615
  }
0f3457f60   Tejun Heo   blk-throttle: add...
616
617
  static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
  				  unsigned long *wait)
e43473b7f   Vivek Goyal   blkio: Core imple...
618
619
  {
  	bool rw = bio_data_dir(bio);
8e89d13f4   Vivek Goyal   blkio: Implementa...
620
  	unsigned int io_allowed;
e43473b7f   Vivek Goyal   blkio: Core imple...
621
  	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
c49c06e49   Vivek Goyal   blkio-throttle: F...
622
  	u64 tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
623

8e89d13f4   Vivek Goyal   blkio: Implementa...
624
  	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
e43473b7f   Vivek Goyal   blkio: Core imple...
625

8e89d13f4   Vivek Goyal   blkio: Implementa...
626
627
628
629
630
  	/* Slice has just started. Consider one slice interval */
  	if (!jiffy_elapsed)
  		jiffy_elapsed_rnd = throtl_slice;
  
  	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice);
c49c06e49   Vivek Goyal   blkio-throttle: F...
631
632
633
634
635
636
637
638
639
640
641
642
643
644
  	/*
  	 * jiffy_elapsed_rnd should not be a big value as minimum iops can be
  	 * 1 then at max jiffy elapsed should be equivalent of 1 second as we
  	 * will allow dispatch after 1 second and after that slice should
  	 * have been trimmed.
  	 */
  
  	tmp = (u64)tg->iops[rw] * jiffy_elapsed_rnd;
  	do_div(tmp, HZ);
  
  	if (tmp > UINT_MAX)
  		io_allowed = UINT_MAX;
  	else
  		io_allowed = tmp;
8e89d13f4   Vivek Goyal   blkio: Implementa...
645
646
  
  	if (tg->io_disp[rw] + 1 <= io_allowed) {
e43473b7f   Vivek Goyal   blkio: Core imple...
647
648
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
649
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
650
  	}
8e89d13f4   Vivek Goyal   blkio: Implementa...
651
652
653
654
655
656
657
658
659
660
661
662
  	/* Calc approx time to dispatch */
  	jiffy_wait = ((tg->io_disp[rw] + 1) * HZ)/tg->iops[rw] + 1;
  
  	if (jiffy_wait > jiffy_elapsed)
  		jiffy_wait = jiffy_wait - jiffy_elapsed;
  	else
  		jiffy_wait = 1;
  
  	if (wait)
  		*wait = jiffy_wait;
  	return 0;
  }
0f3457f60   Tejun Heo   blk-throttle: add...
663
664
  static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
  				 unsigned long *wait)
8e89d13f4   Vivek Goyal   blkio: Implementa...
665
666
  {
  	bool rw = bio_data_dir(bio);
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
667
  	u64 bytes_allowed, extra_bytes, tmp;
8e89d13f4   Vivek Goyal   blkio: Implementa...
668
  	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
e43473b7f   Vivek Goyal   blkio: Core imple...
669
670
671
672
673
674
675
676
  
  	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
  
  	/* Slice has just started. Consider one slice interval */
  	if (!jiffy_elapsed)
  		jiffy_elapsed_rnd = throtl_slice;
  
  	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice);
5e901a2b9   Vivek Goyal   blkio-throttle: T...
677
678
  	tmp = tg->bps[rw] * jiffy_elapsed_rnd;
  	do_div(tmp, HZ);
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
679
  	bytes_allowed = tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
680

4f024f379   Kent Overstreet   block: Abstract o...
681
  	if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) {
e43473b7f   Vivek Goyal   blkio: Core imple...
682
683
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
684
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
685
686
687
  	}
  
  	/* Calc approx time to dispatch */
4f024f379   Kent Overstreet   block: Abstract o...
688
  	extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
e43473b7f   Vivek Goyal   blkio: Core imple...
689
690
691
692
693
694
695
696
697
698
  	jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]);
  
  	if (!jiffy_wait)
  		jiffy_wait = 1;
  
  	/*
  	 * This wait time is without taking into consideration the rounding
  	 * up we did. Add that time also.
  	 */
  	jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
e43473b7f   Vivek Goyal   blkio: Core imple...
699
700
  	if (wait)
  		*wait = jiffy_wait;
8e89d13f4   Vivek Goyal   blkio: Implementa...
701
702
703
704
705
706
707
  	return 0;
  }
  
  /*
   * Returns whether one can dispatch a bio or not. Also returns approx number
   * of jiffies to wait before this bio is with-in IO rate and can be dispatched
   */
0f3457f60   Tejun Heo   blk-throttle: add...
708
709
  static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
  			    unsigned long *wait)
8e89d13f4   Vivek Goyal   blkio: Implementa...
710
711
712
713
714
715
716
717
718
719
  {
  	bool rw = bio_data_dir(bio);
  	unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
  
  	/*
   	 * Currently whole state machine of group depends on first bio
  	 * queued in the group bio list. So one should not be calling
  	 * this function with a different bio if there are other bios
  	 * queued.
  	 */
73f0d49a9   Tejun Heo   blk-throttle: mov...
720
  	BUG_ON(tg->service_queue.nr_queued[rw] &&
c5cc2070b   Tejun Heo   blk-throttle: add...
721
  	       bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
e43473b7f   Vivek Goyal   blkio: Core imple...
722

8e89d13f4   Vivek Goyal   blkio: Implementa...
723
724
725
726
  	/* If tg->bps = -1, then BW is unlimited */
  	if (tg->bps[rw] == -1 && tg->iops[rw] == -1) {
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
727
  		return true;
8e89d13f4   Vivek Goyal   blkio: Implementa...
728
729
730
731
732
733
734
  	}
  
  	/*
  	 * If previous slice expired, start a new one otherwise renew/extend
  	 * existing slice to make sure it is at least throtl_slice interval
  	 * long since now.
  	 */
0f3457f60   Tejun Heo   blk-throttle: add...
735
736
  	if (throtl_slice_used(tg, rw))
  		throtl_start_new_slice(tg, rw);
8e89d13f4   Vivek Goyal   blkio: Implementa...
737
738
  	else {
  		if (time_before(tg->slice_end[rw], jiffies + throtl_slice))
0f3457f60   Tejun Heo   blk-throttle: add...
739
  			throtl_extend_slice(tg, rw, jiffies + throtl_slice);
8e89d13f4   Vivek Goyal   blkio: Implementa...
740
  	}
0f3457f60   Tejun Heo   blk-throttle: add...
741
742
  	if (tg_with_in_bps_limit(tg, bio, &bps_wait) &&
  	    tg_with_in_iops_limit(tg, bio, &iops_wait)) {
8e89d13f4   Vivek Goyal   blkio: Implementa...
743
744
745
746
747
748
749
750
751
752
753
  		if (wait)
  			*wait = 0;
  		return 1;
  	}
  
  	max_wait = max(bps_wait, iops_wait);
  
  	if (wait)
  		*wait = max_wait;
  
  	if (time_before(tg->slice_end[rw], jiffies + max_wait))
0f3457f60   Tejun Heo   blk-throttle: add...
754
  		throtl_extend_slice(tg, rw, jiffies + max_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
755
756
757
758
759
760
761
  
  	return 0;
  }
  
  static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
  {
  	bool rw = bio_data_dir(bio);
e43473b7f   Vivek Goyal   blkio: Core imple...
762
763
  
  	/* Charge the bio to the group */
4f024f379   Kent Overstreet   block: Abstract o...
764
  	tg->bytes_disp[rw] += bio->bi_iter.bi_size;
8e89d13f4   Vivek Goyal   blkio: Implementa...
765
  	tg->io_disp[rw]++;
e43473b7f   Vivek Goyal   blkio: Core imple...
766

2a0f61e6e   Tejun Heo   blk-throttle: set...
767
768
769
770
771
  	/*
  	 * REQ_THROTTLED is used to prevent the same bio to be throttled
  	 * more than once as a throttled bio will go through blk-throtl the
  	 * second time when it eventually gets issued.  Set it when a bio
  	 * is being charged to a tg.
2a0f61e6e   Tejun Heo   blk-throttle: set...
772
  	 */
77ea73388   Tejun Heo   blkcg: move io_se...
773
  	if (!(bio->bi_rw & REQ_THROTTLED))
2a0f61e6e   Tejun Heo   blk-throttle: set...
774
  		bio->bi_rw |= REQ_THROTTLED;
e43473b7f   Vivek Goyal   blkio: Core imple...
775
  }
c5cc2070b   Tejun Heo   blk-throttle: add...
776
777
778
779
780
781
782
783
784
785
786
  /**
   * throtl_add_bio_tg - add a bio to the specified throtl_grp
   * @bio: bio to add
   * @qn: qnode to use
   * @tg: the target throtl_grp
   *
   * Add @bio to @tg's service_queue using @qn.  If @qn is not specified,
   * tg->qnode_on_self[] is used.
   */
  static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
  			      struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
787
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
788
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
789
  	bool rw = bio_data_dir(bio);
c5cc2070b   Tejun Heo   blk-throttle: add...
790
791
  	if (!qn)
  		qn = &tg->qnode_on_self[rw];
0e9f4164b   Tejun Heo   blk-throttle: gen...
792
793
794
795
796
797
798
799
  	/*
  	 * If @tg doesn't currently have any bios queued in the same
  	 * direction, queueing @bio can change when @tg should be
  	 * dispatched.  Mark that @tg was empty.  This is automatically
  	 * cleaered on the next tg_update_disptime().
  	 */
  	if (!sq->nr_queued[rw])
  		tg->flags |= THROTL_TG_WAS_EMPTY;
c5cc2070b   Tejun Heo   blk-throttle: add...
800
  	throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
73f0d49a9   Tejun Heo   blk-throttle: mov...
801
  	sq->nr_queued[rw]++;
77216b048   Tejun Heo   blk-throttle: add...
802
  	throtl_enqueue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
803
  }
77216b048   Tejun Heo   blk-throttle: add...
804
  static void tg_update_disptime(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
805
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
806
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
807
808
  	unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
  	struct bio *bio;
c5cc2070b   Tejun Heo   blk-throttle: add...
809
  	if ((bio = throtl_peek_queued(&sq->queued[READ])))
0f3457f60   Tejun Heo   blk-throttle: add...
810
  		tg_may_dispatch(tg, bio, &read_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
811

c5cc2070b   Tejun Heo   blk-throttle: add...
812
  	if ((bio = throtl_peek_queued(&sq->queued[WRITE])))
0f3457f60   Tejun Heo   blk-throttle: add...
813
  		tg_may_dispatch(tg, bio, &write_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
814
815
816
  
  	min_wait = min(read_wait, write_wait);
  	disptime = jiffies + min_wait;
e43473b7f   Vivek Goyal   blkio: Core imple...
817
  	/* Update dispatch time */
77216b048   Tejun Heo   blk-throttle: add...
818
  	throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
819
  	tg->disptime = disptime;
77216b048   Tejun Heo   blk-throttle: add...
820
  	throtl_enqueue_tg(tg);
0e9f4164b   Tejun Heo   blk-throttle: gen...
821
822
823
  
  	/* see throtl_add_bio_tg() */
  	tg->flags &= ~THROTL_TG_WAS_EMPTY;
e43473b7f   Vivek Goyal   blkio: Core imple...
824
  }
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
825
826
827
828
829
830
831
832
833
  static void start_parent_slice_with_credit(struct throtl_grp *child_tg,
  					struct throtl_grp *parent_tg, bool rw)
  {
  	if (throtl_slice_used(parent_tg, rw)) {
  		throtl_start_new_slice_with_credit(parent_tg, rw,
  				child_tg->slice_start[rw]);
  	}
  
  }
77216b048   Tejun Heo   blk-throttle: add...
834
  static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
835
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
836
  	struct throtl_service_queue *sq = &tg->service_queue;
6bc9c2b46   Tejun Heo   blk-throttle: mak...
837
838
  	struct throtl_service_queue *parent_sq = sq->parent_sq;
  	struct throtl_grp *parent_tg = sq_to_tg(parent_sq);
c5cc2070b   Tejun Heo   blk-throttle: add...
839
  	struct throtl_grp *tg_to_put = NULL;
e43473b7f   Vivek Goyal   blkio: Core imple...
840
  	struct bio *bio;
c5cc2070b   Tejun Heo   blk-throttle: add...
841
842
843
844
845
846
847
  	/*
  	 * @bio is being transferred from @tg to @parent_sq.  Popping a bio
  	 * from @tg may put its reference and @parent_sq might end up
  	 * getting released prematurely.  Remember the tg to put and put it
  	 * after @bio is transferred to @parent_sq.
  	 */
  	bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
73f0d49a9   Tejun Heo   blk-throttle: mov...
848
  	sq->nr_queued[rw]--;
e43473b7f   Vivek Goyal   blkio: Core imple...
849
850
  
  	throtl_charge_bio(tg, bio);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
851
852
853
854
855
856
857
858
859
  
  	/*
  	 * If our parent is another tg, we just need to transfer @bio to
  	 * the parent using throtl_add_bio_tg().  If our parent is
  	 * @td->service_queue, @bio is ready to be issued.  Put it on its
  	 * bio_lists[] and decrease total number queued.  The caller is
  	 * responsible for issuing these bios.
  	 */
  	if (parent_tg) {
c5cc2070b   Tejun Heo   blk-throttle: add...
860
  		throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg);
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
861
  		start_parent_slice_with_credit(tg, parent_tg, rw);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
862
  	} else {
c5cc2070b   Tejun Heo   blk-throttle: add...
863
864
  		throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw],
  				     &parent_sq->queued[rw]);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
865
866
867
  		BUG_ON(tg->td->nr_queued[rw] <= 0);
  		tg->td->nr_queued[rw]--;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
868

0f3457f60   Tejun Heo   blk-throttle: add...
869
  	throtl_trim_slice(tg, rw);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
870

c5cc2070b   Tejun Heo   blk-throttle: add...
871
872
  	if (tg_to_put)
  		blkg_put(tg_to_blkg(tg_to_put));
e43473b7f   Vivek Goyal   blkio: Core imple...
873
  }
77216b048   Tejun Heo   blk-throttle: add...
874
  static int throtl_dispatch_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
875
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
876
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
877
878
  	unsigned int nr_reads = 0, nr_writes = 0;
  	unsigned int max_nr_reads = throtl_grp_quantum*3/4;
c2f6805d4   Vivek Goyal   blk-throttle: Fix...
879
  	unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
e43473b7f   Vivek Goyal   blkio: Core imple...
880
881
882
  	struct bio *bio;
  
  	/* Try to dispatch 75% READS and 25% WRITES */
c5cc2070b   Tejun Heo   blk-throttle: add...
883
  	while ((bio = throtl_peek_queued(&sq->queued[READ])) &&
0f3457f60   Tejun Heo   blk-throttle: add...
884
  	       tg_may_dispatch(tg, bio, NULL)) {
e43473b7f   Vivek Goyal   blkio: Core imple...
885

77216b048   Tejun Heo   blk-throttle: add...
886
  		tg_dispatch_one_bio(tg, bio_data_dir(bio));
e43473b7f   Vivek Goyal   blkio: Core imple...
887
888
889
890
891
  		nr_reads++;
  
  		if (nr_reads >= max_nr_reads)
  			break;
  	}
c5cc2070b   Tejun Heo   blk-throttle: add...
892
  	while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&
0f3457f60   Tejun Heo   blk-throttle: add...
893
  	       tg_may_dispatch(tg, bio, NULL)) {
e43473b7f   Vivek Goyal   blkio: Core imple...
894

77216b048   Tejun Heo   blk-throttle: add...
895
  		tg_dispatch_one_bio(tg, bio_data_dir(bio));
e43473b7f   Vivek Goyal   blkio: Core imple...
896
897
898
899
900
901
902
903
  		nr_writes++;
  
  		if (nr_writes >= max_nr_writes)
  			break;
  	}
  
  	return nr_reads + nr_writes;
  }
651930bc1   Tejun Heo   blk-throttle: dis...
904
  static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
905
906
  {
  	unsigned int nr_disp = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
907
908
  
  	while (1) {
73f0d49a9   Tejun Heo   blk-throttle: mov...
909
910
  		struct throtl_grp *tg = throtl_rb_first(parent_sq);
  		struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
911
912
913
914
915
916
  
  		if (!tg)
  			break;
  
  		if (time_before(jiffies, tg->disptime))
  			break;
77216b048   Tejun Heo   blk-throttle: add...
917
  		throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
918

77216b048   Tejun Heo   blk-throttle: add...
919
  		nr_disp += throtl_dispatch_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
920

73f0d49a9   Tejun Heo   blk-throttle: mov...
921
  		if (sq->nr_queued[0] || sq->nr_queued[1])
77216b048   Tejun Heo   blk-throttle: add...
922
  			tg_update_disptime(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
923
924
925
926
927
928
929
  
  		if (nr_disp >= throtl_quantum)
  			break;
  	}
  
  	return nr_disp;
  }
6e1a5704c   Tejun Heo   blk-throttle: dis...
930
931
932
933
934
935
936
  /**
   * throtl_pending_timer_fn - timer function for service_queue->pending_timer
   * @arg: the throtl_service_queue being serviced
   *
   * This timer is armed when a child throtl_grp with active bio's become
   * pending and queued on the service_queue's pending_tree and expires when
   * the first child throtl_grp should be dispatched.  This function
2e48a530a   Tejun Heo   blk-throttle: mak...
937
938
939
940
941
942
943
   * dispatches bio's from the children throtl_grps to the parent
   * service_queue.
   *
   * If the parent's parent is another throtl_grp, dispatching is propagated
   * by either arming its pending_timer or repeating dispatch directly.  If
   * the top-level service_tree is reached, throtl_data->dispatch_work is
   * kicked so that the ready bio's are issued.
6e1a5704c   Tejun Heo   blk-throttle: dis...
944
   */
69df0ab03   Tejun Heo   blk-throttle: sep...
945
946
947
  static void throtl_pending_timer_fn(unsigned long arg)
  {
  	struct throtl_service_queue *sq = (void *)arg;
2e48a530a   Tejun Heo   blk-throttle: mak...
948
  	struct throtl_grp *tg = sq_to_tg(sq);
69df0ab03   Tejun Heo   blk-throttle: sep...
949
  	struct throtl_data *td = sq_to_td(sq);
cb76199c3   Tejun Heo   blk-throttle: col...
950
  	struct request_queue *q = td->queue;
2e48a530a   Tejun Heo   blk-throttle: mak...
951
952
  	struct throtl_service_queue *parent_sq;
  	bool dispatched;
6e1a5704c   Tejun Heo   blk-throttle: dis...
953
  	int ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
954
955
  
  	spin_lock_irq(q->queue_lock);
2e48a530a   Tejun Heo   blk-throttle: mak...
956
957
958
  again:
  	parent_sq = sq->parent_sq;
  	dispatched = false;
e43473b7f   Vivek Goyal   blkio: Core imple...
959

7f52f98c2   Tejun Heo   blk-throttle: imp...
960
961
  	while (true) {
  		throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u",
2e48a530a   Tejun Heo   blk-throttle: mak...
962
963
  			   sq->nr_queued[READ] + sq->nr_queued[WRITE],
  			   sq->nr_queued[READ], sq->nr_queued[WRITE]);
7f52f98c2   Tejun Heo   blk-throttle: imp...
964
965
966
  
  		ret = throtl_select_dispatch(sq);
  		if (ret) {
7f52f98c2   Tejun Heo   blk-throttle: imp...
967
968
969
  			throtl_log(sq, "bios disp=%u", ret);
  			dispatched = true;
  		}
e43473b7f   Vivek Goyal   blkio: Core imple...
970

7f52f98c2   Tejun Heo   blk-throttle: imp...
971
972
  		if (throtl_schedule_next_dispatch(sq, false))
  			break;
e43473b7f   Vivek Goyal   blkio: Core imple...
973

7f52f98c2   Tejun Heo   blk-throttle: imp...
974
975
976
977
  		/* this dispatch windows is still open, relax and repeat */
  		spin_unlock_irq(q->queue_lock);
  		cpu_relax();
  		spin_lock_irq(q->queue_lock);
651930bc1   Tejun Heo   blk-throttle: dis...
978
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
979

2e48a530a   Tejun Heo   blk-throttle: mak...
980
981
  	if (!dispatched)
  		goto out_unlock;
6e1a5704c   Tejun Heo   blk-throttle: dis...
982

2e48a530a   Tejun Heo   blk-throttle: mak...
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
  	if (parent_sq) {
  		/* @parent_sq is another throl_grp, propagate dispatch */
  		if (tg->flags & THROTL_TG_WAS_EMPTY) {
  			tg_update_disptime(tg);
  			if (!throtl_schedule_next_dispatch(parent_sq, false)) {
  				/* window is already open, repeat dispatching */
  				sq = parent_sq;
  				tg = sq_to_tg(sq);
  				goto again;
  			}
  		}
  	} else {
  		/* reached the top-level, queue issueing */
  		queue_work(kthrotld_workqueue, &td->dispatch_work);
  	}
  out_unlock:
e43473b7f   Vivek Goyal   blkio: Core imple...
999
  	spin_unlock_irq(q->queue_lock);
6e1a5704c   Tejun Heo   blk-throttle: dis...
1000
  }
e43473b7f   Vivek Goyal   blkio: Core imple...
1001

6e1a5704c   Tejun Heo   blk-throttle: dis...
1002
1003
1004
1005
1006
1007
1008
1009
  /**
   * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
   * @work: work item being executed
   *
   * This function is queued for execution when bio's reach the bio_lists[]
   * of throtl_data->service_queue.  Those bio's are ready and issued by this
   * function.
   */
8876e140e   Fabian Frederick   block/blk-throttl...
1010
  static void blk_throtl_dispatch_work_fn(struct work_struct *work)
6e1a5704c   Tejun Heo   blk-throttle: dis...
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
  {
  	struct throtl_data *td = container_of(work, struct throtl_data,
  					      dispatch_work);
  	struct throtl_service_queue *td_sq = &td->service_queue;
  	struct request_queue *q = td->queue;
  	struct bio_list bio_list_on_stack;
  	struct bio *bio;
  	struct blk_plug plug;
  	int rw;
  
  	bio_list_init(&bio_list_on_stack);
  
  	spin_lock_irq(q->queue_lock);
c5cc2070b   Tejun Heo   blk-throttle: add...
1024
1025
1026
  	for (rw = READ; rw <= WRITE; rw++)
  		while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL)))
  			bio_list_add(&bio_list_on_stack, bio);
6e1a5704c   Tejun Heo   blk-throttle: dis...
1027
1028
1029
  	spin_unlock_irq(q->queue_lock);
  
  	if (!bio_list_empty(&bio_list_on_stack)) {
69d60eb96   Vivek Goyal   blk-throttle: Use...
1030
  		blk_start_plug(&plug);
e43473b7f   Vivek Goyal   blkio: Core imple...
1031
1032
  		while((bio = bio_list_pop(&bio_list_on_stack)))
  			generic_make_request(bio);
69d60eb96   Vivek Goyal   blk-throttle: Use...
1033
  		blk_finish_plug(&plug);
e43473b7f   Vivek Goyal   blkio: Core imple...
1034
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
1035
  }
f95a04afa   Tejun Heo   blkcg: embed stru...
1036
1037
  static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
  			      int off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1038
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
1039
1040
  	struct throtl_grp *tg = pd_to_tg(pd);
  	u64 v = *(u64 *)((void *)tg + off);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1041

af133ceb2   Tejun Heo   blkcg: move blkio...
1042
  	if (v == -1)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1043
  		return 0;
f95a04afa   Tejun Heo   blkcg: embed stru...
1044
  	return __blkg_prfill_u64(sf, pd, v);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1045
  }
f95a04afa   Tejun Heo   blkcg: embed stru...
1046
1047
  static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
  			       int off)
e43473b7f   Vivek Goyal   blkio: Core imple...
1048
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
1049
1050
  	struct throtl_grp *tg = pd_to_tg(pd);
  	unsigned int v = *(unsigned int *)((void *)tg + off);
fe0714377   Vivek Goyal   blkio: Recalculat...
1051

af133ceb2   Tejun Heo   blkcg: move blkio...
1052
1053
  	if (v == -1)
  		return 0;
f95a04afa   Tejun Heo   blkcg: embed stru...
1054
  	return __blkg_prfill_u64(sf, pd, v);
e43473b7f   Vivek Goyal   blkio: Core imple...
1055
  }
2da8ca822   Tejun Heo   cgroup: replace c...
1056
  static int tg_print_conf_u64(struct seq_file *sf, void *v)
8e89d13f4   Vivek Goyal   blkio: Implementa...
1057
  {
2da8ca822   Tejun Heo   cgroup: replace c...
1058
1059
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
af133ceb2   Tejun Heo   blkcg: move blkio...
1060
  	return 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
1061
  }
2da8ca822   Tejun Heo   cgroup: replace c...
1062
  static int tg_print_conf_uint(struct seq_file *sf, void *v)
8e89d13f4   Vivek Goyal   blkio: Implementa...
1063
  {
2da8ca822   Tejun Heo   cgroup: replace c...
1064
1065
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
af133ceb2   Tejun Heo   blkcg: move blkio...
1066
  	return 0;
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1067
  }
69948b070   Tejun Heo   blkcg: separate o...
1068
  static void tg_conf_updated(struct throtl_grp *tg)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1069
  {
69948b070   Tejun Heo   blkcg: separate o...
1070
  	struct throtl_service_queue *sq = &tg->service_queue;
492eb21b9   Tejun Heo   cgroup: make hier...
1071
  	struct cgroup_subsys_state *pos_css;
69948b070   Tejun Heo   blkcg: separate o...
1072
  	struct blkcg_gq *blkg;
af133ceb2   Tejun Heo   blkcg: move blkio...
1073

fda6f272c   Tejun Heo   blk-throttle: imp...
1074
1075
1076
1077
  	throtl_log(&tg->service_queue,
  		   "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
  		   tg->bps[READ], tg->bps[WRITE],
  		   tg->iops[READ], tg->iops[WRITE]);
632b44935   Tejun Heo   blk-throttle: rem...
1078
1079
  
  	/*
693e751e7   Tejun Heo   blk-throttle: imp...
1080
1081
1082
1083
1084
1085
  	 * Update has_rules[] flags for the updated tg's subtree.  A tg is
  	 * considered to have rules if either the tg itself or any of its
  	 * ancestors has rules.  This identifies groups without any
  	 * restrictions in the whole hierarchy and allows them to bypass
  	 * blk-throttle.
  	 */
69948b070   Tejun Heo   blkcg: separate o...
1086
  	blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
693e751e7   Tejun Heo   blk-throttle: imp...
1087
1088
1089
  		tg_update_has_rules(blkg_to_tg(blkg));
  
  	/*
632b44935   Tejun Heo   blk-throttle: rem...
1090
1091
1092
1093
1094
1095
1096
  	 * We're already holding queue_lock and know @tg is valid.  Let's
  	 * apply the new config directly.
  	 *
  	 * Restart the slices for both READ and WRITES. It might happen
  	 * that a group's limit are dropped suddenly and we don't want to
  	 * account recently dispatched IO with new low rate.
  	 */
0f3457f60   Tejun Heo   blk-throttle: add...
1097
1098
  	throtl_start_new_slice(tg, 0);
  	throtl_start_new_slice(tg, 1);
632b44935   Tejun Heo   blk-throttle: rem...
1099

5b2c16aae   Tejun Heo   blk-throttle: sim...
1100
  	if (tg->flags & THROTL_TG_PENDING) {
77216b048   Tejun Heo   blk-throttle: add...
1101
  		tg_update_disptime(tg);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1102
  		throtl_schedule_next_dispatch(sq->parent_sq, true);
632b44935   Tejun Heo   blk-throttle: rem...
1103
  	}
69948b070   Tejun Heo   blkcg: separate o...
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
  }
  
  static ssize_t tg_set_conf(struct kernfs_open_file *of,
  			   char *buf, size_t nbytes, loff_t off, bool is_u64)
  {
  	struct blkcg *blkcg = css_to_blkcg(of_css(of));
  	struct blkg_conf_ctx ctx;
  	struct throtl_grp *tg;
  	int ret;
  	u64 v;
  
  	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
  	if (ret)
  		return ret;
  
  	ret = -EINVAL;
  	if (sscanf(ctx.body, "%llu", &v) != 1)
  		goto out_finish;
  	if (!v)
  		v = -1;
  
  	tg = blkg_to_tg(ctx.blkg);
  
  	if (is_u64)
  		*(u64 *)((void *)tg + of_cft(of)->private) = v;
  	else
  		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1131

69948b070   Tejun Heo   blkcg: separate o...
1132
  	tg_conf_updated(tg);
36aa9e5f5   Tejun Heo   blkcg: move body ...
1133
1134
  	ret = 0;
  out_finish:
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1135
  	blkg_conf_finish(&ctx);
36aa9e5f5   Tejun Heo   blkcg: move body ...
1136
  	return ret ?: nbytes;
8e89d13f4   Vivek Goyal   blkio: Implementa...
1137
  }
451af504d   Tejun Heo   cgroup: replace c...
1138
1139
  static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
  			       char *buf, size_t nbytes, loff_t off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1140
  {
451af504d   Tejun Heo   cgroup: replace c...
1141
  	return tg_set_conf(of, buf, nbytes, off, true);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1142
  }
451af504d   Tejun Heo   cgroup: replace c...
1143
1144
  static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
  				char *buf, size_t nbytes, loff_t off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1145
  {
451af504d   Tejun Heo   cgroup: replace c...
1146
  	return tg_set_conf(of, buf, nbytes, off, false);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1147
  }
880f50e22   Tejun Heo   blkcg: mark exist...
1148
  static struct cftype throtl_legacy_files[] = {
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1149
1150
  	{
  		.name = "throttle.read_bps_device",
af133ceb2   Tejun Heo   blkcg: move blkio...
1151
  		.private = offsetof(struct throtl_grp, bps[READ]),
2da8ca822   Tejun Heo   cgroup: replace c...
1152
  		.seq_show = tg_print_conf_u64,
451af504d   Tejun Heo   cgroup: replace c...
1153
  		.write = tg_set_conf_u64,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1154
1155
1156
  	},
  	{
  		.name = "throttle.write_bps_device",
af133ceb2   Tejun Heo   blkcg: move blkio...
1157
  		.private = offsetof(struct throtl_grp, bps[WRITE]),
2da8ca822   Tejun Heo   cgroup: replace c...
1158
  		.seq_show = tg_print_conf_u64,
451af504d   Tejun Heo   cgroup: replace c...
1159
  		.write = tg_set_conf_u64,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1160
1161
1162
  	},
  	{
  		.name = "throttle.read_iops_device",
af133ceb2   Tejun Heo   blkcg: move blkio...
1163
  		.private = offsetof(struct throtl_grp, iops[READ]),
2da8ca822   Tejun Heo   cgroup: replace c...
1164
  		.seq_show = tg_print_conf_uint,
451af504d   Tejun Heo   cgroup: replace c...
1165
  		.write = tg_set_conf_uint,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1166
1167
1168
  	},
  	{
  		.name = "throttle.write_iops_device",
af133ceb2   Tejun Heo   blkcg: move blkio...
1169
  		.private = offsetof(struct throtl_grp, iops[WRITE]),
2da8ca822   Tejun Heo   cgroup: replace c...
1170
  		.seq_show = tg_print_conf_uint,
451af504d   Tejun Heo   cgroup: replace c...
1171
  		.write = tg_set_conf_uint,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1172
1173
1174
  	},
  	{
  		.name = "throttle.io_service_bytes",
77ea73388   Tejun Heo   blkcg: move io_se...
1175
1176
  		.private = (unsigned long)&blkcg_policy_throtl,
  		.seq_show = blkg_print_stat_bytes,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1177
1178
1179
  	},
  	{
  		.name = "throttle.io_serviced",
77ea73388   Tejun Heo   blkcg: move io_se...
1180
1181
  		.private = (unsigned long)&blkcg_policy_throtl,
  		.seq_show = blkg_print_stat_ios,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1182
1183
1184
  	},
  	{ }	/* terminate */
  };
2ee867dcf   Tejun Heo   blkcg: implement ...
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
  static u64 tg_prfill_max(struct seq_file *sf, struct blkg_policy_data *pd,
  			 int off)
  {
  	struct throtl_grp *tg = pd_to_tg(pd);
  	const char *dname = blkg_dev_name(pd->blkg);
  	char bufs[4][21] = { "max", "max", "max", "max" };
  
  	if (!dname)
  		return 0;
  	if (tg->bps[READ] == -1 && tg->bps[WRITE] == -1 &&
  	    tg->iops[READ] == -1 && tg->iops[WRITE] == -1)
  		return 0;
  
  	if (tg->bps[READ] != -1)
  		snprintf(bufs[0], sizeof(bufs[0]), "%llu", tg->bps[READ]);
  	if (tg->bps[WRITE] != -1)
  		snprintf(bufs[1], sizeof(bufs[1]), "%llu", tg->bps[WRITE]);
  	if (tg->iops[READ] != -1)
  		snprintf(bufs[2], sizeof(bufs[2]), "%u", tg->iops[READ]);
  	if (tg->iops[WRITE] != -1)
  		snprintf(bufs[3], sizeof(bufs[3]), "%u", tg->iops[WRITE]);
  
  	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s
  ",
  		   dname, bufs[0], bufs[1], bufs[2], bufs[3]);
  	return 0;
  }
  
  static int tg_print_max(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_max,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
  	return 0;
  }
  
  static ssize_t tg_set_max(struct kernfs_open_file *of,
  			  char *buf, size_t nbytes, loff_t off)
  {
  	struct blkcg *blkcg = css_to_blkcg(of_css(of));
  	struct blkg_conf_ctx ctx;
  	struct throtl_grp *tg;
  	u64 v[4];
  	int ret;
  
  	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
  	if (ret)
  		return ret;
  
  	tg = blkg_to_tg(ctx.blkg);
  
  	v[0] = tg->bps[READ];
  	v[1] = tg->bps[WRITE];
  	v[2] = tg->iops[READ];
  	v[3] = tg->iops[WRITE];
  
  	while (true) {
  		char tok[27];	/* wiops=18446744073709551616 */
  		char *p;
  		u64 val = -1;
  		int len;
  
  		if (sscanf(ctx.body, "%26s%n", tok, &len) != 1)
  			break;
  		if (tok[0] == '\0')
  			break;
  		ctx.body += len;
  
  		ret = -EINVAL;
  		p = tok;
  		strsep(&p, "=");
  		if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max")))
  			goto out_finish;
  
  		ret = -ERANGE;
  		if (!val)
  			goto out_finish;
  
  		ret = -EINVAL;
  		if (!strcmp(tok, "rbps"))
  			v[0] = val;
  		else if (!strcmp(tok, "wbps"))
  			v[1] = val;
  		else if (!strcmp(tok, "riops"))
  			v[2] = min_t(u64, val, UINT_MAX);
  		else if (!strcmp(tok, "wiops"))
  			v[3] = min_t(u64, val, UINT_MAX);
  		else
  			goto out_finish;
  	}
  
  	tg->bps[READ] = v[0];
  	tg->bps[WRITE] = v[1];
  	tg->iops[READ] = v[2];
  	tg->iops[WRITE] = v[3];
  
  	tg_conf_updated(tg);
  	ret = 0;
  out_finish:
  	blkg_conf_finish(&ctx);
  	return ret ?: nbytes;
  }
  
  static struct cftype throtl_files[] = {
  	{
  		.name = "max",
  		.flags = CFTYPE_NOT_ON_ROOT,
  		.seq_show = tg_print_max,
  		.write = tg_set_max,
  	},
  	{ }	/* terminate */
  };
da5277700   Vivek Goyal   block: Move blk_t...
1296
  static void throtl_shutdown_wq(struct request_queue *q)
e43473b7f   Vivek Goyal   blkio: Core imple...
1297
1298
  {
  	struct throtl_data *td = q->td;
69df0ab03   Tejun Heo   blk-throttle: sep...
1299
  	cancel_work_sync(&td->dispatch_work);
e43473b7f   Vivek Goyal   blkio: Core imple...
1300
  }
3c798398e   Tejun Heo   blkcg: mass renam...
1301
  static struct blkcg_policy blkcg_policy_throtl = {
2ee867dcf   Tejun Heo   blkcg: implement ...
1302
  	.dfl_cftypes		= throtl_files,
880f50e22   Tejun Heo   blkcg: mark exist...
1303
  	.legacy_cftypes		= throtl_legacy_files,
f9fcc2d39   Tejun Heo   blkcg: collapse b...
1304

001bea73e   Tejun Heo   blkcg: replace bl...
1305
  	.pd_alloc_fn		= throtl_pd_alloc,
f9fcc2d39   Tejun Heo   blkcg: collapse b...
1306
  	.pd_init_fn		= throtl_pd_init,
693e751e7   Tejun Heo   blk-throttle: imp...
1307
  	.pd_online_fn		= throtl_pd_online,
001bea73e   Tejun Heo   blkcg: replace bl...
1308
  	.pd_free_fn		= throtl_pd_free,
e43473b7f   Vivek Goyal   blkio: Core imple...
1309
  };
ae1188963   Tejun Heo   blkcg: consolidat...
1310
1311
  bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
  		    struct bio *bio)
e43473b7f   Vivek Goyal   blkio: Core imple...
1312
  {
c5cc2070b   Tejun Heo   blk-throttle: add...
1313
  	struct throtl_qnode *qn = NULL;
ae1188963   Tejun Heo   blkcg: consolidat...
1314
  	struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
73f0d49a9   Tejun Heo   blk-throttle: mov...
1315
  	struct throtl_service_queue *sq;
0e9f4164b   Tejun Heo   blk-throttle: gen...
1316
  	bool rw = bio_data_dir(bio);
bc16a4f93   Tejun Heo   block: reorganize...
1317
  	bool throttled = false;
e43473b7f   Vivek Goyal   blkio: Core imple...
1318

ae1188963   Tejun Heo   blkcg: consolidat...
1319
  	WARN_ON_ONCE(!rcu_read_lock_held());
2a0f61e6e   Tejun Heo   blk-throttle: set...
1320
  	/* see throtl_charge_bio() */
ae1188963   Tejun Heo   blkcg: consolidat...
1321
  	if ((bio->bi_rw & REQ_THROTTLED) || !tg->has_rules[rw])
bc16a4f93   Tejun Heo   block: reorganize...
1322
  		goto out;
e43473b7f   Vivek Goyal   blkio: Core imple...
1323
1324
  
  	spin_lock_irq(q->queue_lock);
c9589f03e   Tejun Heo   blk-throttle: imp...
1325
1326
  
  	if (unlikely(blk_queue_bypass(q)))
bc16a4f93   Tejun Heo   block: reorganize...
1327
  		goto out_unlock;
f469a7b4d   Vivek Goyal   blk-cgroup: Allow...
1328

73f0d49a9   Tejun Heo   blk-throttle: mov...
1329
  	sq = &tg->service_queue;
9e660acff   Tejun Heo   blk-throttle: mak...
1330
1331
1332
1333
  	while (true) {
  		/* throtl is FIFO - if bios are already queued, should queue */
  		if (sq->nr_queued[rw])
  			break;
de701c74a   Vivek Goyal   blk-throttle: Som...
1334

9e660acff   Tejun Heo   blk-throttle: mak...
1335
1336
1337
1338
1339
  		/* if above limits, break to queue */
  		if (!tg_may_dispatch(tg, bio, NULL))
  			break;
  
  		/* within limits, let's charge and dispatch directly */
e43473b7f   Vivek Goyal   blkio: Core imple...
1340
  		throtl_charge_bio(tg, bio);
04521db04   Vivek Goyal   blk-throttle: Res...
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
  
  		/*
  		 * We need to trim slice even when bios are not being queued
  		 * otherwise it might happen that a bio is not queued for
  		 * a long time and slice keeps on extending and trim is not
  		 * called for a long time. Now if limits are reduced suddenly
  		 * we take into account all the IO dispatched so far at new
  		 * low rate and * newly queued IO gets a really long dispatch
  		 * time.
  		 *
  		 * So keep on trimming slice even if bio is not queued.
  		 */
0f3457f60   Tejun Heo   blk-throttle: add...
1353
  		throtl_trim_slice(tg, rw);
9e660acff   Tejun Heo   blk-throttle: mak...
1354
1355
1356
1357
1358
1359
  
  		/*
  		 * @bio passed through this layer without being throttled.
  		 * Climb up the ladder.  If we''re already at the top, it
  		 * can be executed directly.
  		 */
c5cc2070b   Tejun Heo   blk-throttle: add...
1360
  		qn = &tg->qnode_on_parent[rw];
9e660acff   Tejun Heo   blk-throttle: mak...
1361
1362
1363
1364
  		sq = sq->parent_sq;
  		tg = sq_to_tg(sq);
  		if (!tg)
  			goto out_unlock;
e43473b7f   Vivek Goyal   blkio: Core imple...
1365
  	}
9e660acff   Tejun Heo   blk-throttle: mak...
1366
  	/* out-of-limit, queue to @tg */
fda6f272c   Tejun Heo   blk-throttle: imp...
1367
1368
  	throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
  		   rw == READ ? 'R' : 'W',
4f024f379   Kent Overstreet   block: Abstract o...
1369
  		   tg->bytes_disp[rw], bio->bi_iter.bi_size, tg->bps[rw],
fda6f272c   Tejun Heo   blk-throttle: imp...
1370
1371
  		   tg->io_disp[rw], tg->iops[rw],
  		   sq->nr_queued[READ], sq->nr_queued[WRITE]);
e43473b7f   Vivek Goyal   blkio: Core imple...
1372

671058fb2   Tejun Heo   block: make blk-t...
1373
  	bio_associate_current(bio);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1374
  	tg->td->nr_queued[rw]++;
c5cc2070b   Tejun Heo   blk-throttle: add...
1375
  	throtl_add_bio_tg(bio, qn, tg);
bc16a4f93   Tejun Heo   block: reorganize...
1376
  	throttled = true;
e43473b7f   Vivek Goyal   blkio: Core imple...
1377

7f52f98c2   Tejun Heo   blk-throttle: imp...
1378
1379
1380
1381
1382
1383
  	/*
  	 * Update @tg's dispatch time and force schedule dispatch if @tg
  	 * was empty before @bio.  The forced scheduling isn't likely to
  	 * cause undue delay as @bio is likely to be dispatched directly if
  	 * its @tg's disptime is not in the future.
  	 */
0e9f4164b   Tejun Heo   blk-throttle: gen...
1384
  	if (tg->flags & THROTL_TG_WAS_EMPTY) {
77216b048   Tejun Heo   blk-throttle: add...
1385
  		tg_update_disptime(tg);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1386
  		throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true);
e43473b7f   Vivek Goyal   blkio: Core imple...
1387
  	}
bc16a4f93   Tejun Heo   block: reorganize...
1388
  out_unlock:
e43473b7f   Vivek Goyal   blkio: Core imple...
1389
  	spin_unlock_irq(q->queue_lock);
bc16a4f93   Tejun Heo   block: reorganize...
1390
  out:
2a0f61e6e   Tejun Heo   blk-throttle: set...
1391
1392
1393
1394
1395
1396
1397
  	/*
  	 * As multiple blk-throtls may stack in the same issue path, we
  	 * don't want bios to leave with the flag set.  Clear the flag if
  	 * being issued.
  	 */
  	if (!throttled)
  		bio->bi_rw &= ~REQ_THROTTLED;
bc16a4f93   Tejun Heo   block: reorganize...
1398
  	return throttled;
e43473b7f   Vivek Goyal   blkio: Core imple...
1399
  }
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
  /*
   * Dispatch all bios from all children tg's queued on @parent_sq.  On
   * return, @parent_sq is guaranteed to not have any active children tg's
   * and all bios from previously active tg's are on @parent_sq->bio_lists[].
   */
  static void tg_drain_bios(struct throtl_service_queue *parent_sq)
  {
  	struct throtl_grp *tg;
  
  	while ((tg = throtl_rb_first(parent_sq))) {
  		struct throtl_service_queue *sq = &tg->service_queue;
  		struct bio *bio;
  
  		throtl_dequeue_tg(tg);
c5cc2070b   Tejun Heo   blk-throttle: add...
1414
  		while ((bio = throtl_peek_queued(&sq->queued[READ])))
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1415
  			tg_dispatch_one_bio(tg, bio_data_dir(bio));
c5cc2070b   Tejun Heo   blk-throttle: add...
1416
  		while ((bio = throtl_peek_queued(&sq->queued[WRITE])))
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1417
1418
1419
  			tg_dispatch_one_bio(tg, bio_data_dir(bio));
  	}
  }
c9a929dde   Tejun Heo   block: fix reques...
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
  /**
   * blk_throtl_drain - drain throttled bios
   * @q: request_queue to drain throttled bios for
   *
   * Dispatch all currently throttled bios on @q through ->make_request_fn().
   */
  void blk_throtl_drain(struct request_queue *q)
  	__releases(q->queue_lock) __acquires(q->queue_lock)
  {
  	struct throtl_data *td = q->td;
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1430
  	struct blkcg_gq *blkg;
492eb21b9   Tejun Heo   cgroup: make hier...
1431
  	struct cgroup_subsys_state *pos_css;
c9a929dde   Tejun Heo   block: fix reques...
1432
  	struct bio *bio;
651930bc1   Tejun Heo   blk-throttle: dis...
1433
  	int rw;
c9a929dde   Tejun Heo   block: fix reques...
1434

8bcb6c7d4   Andi Kleen   block: use lockde...
1435
  	queue_lockdep_assert_held(q);
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1436
  	rcu_read_lock();
c9a929dde   Tejun Heo   block: fix reques...
1437

2a12f0dcd   Tejun Heo   blk-throttle: mak...
1438
1439
1440
1441
1442
1443
  	/*
  	 * Drain each tg while doing post-order walk on the blkg tree, so
  	 * that all bios are propagated to td->service_queue.  It'd be
  	 * better to walk service_queue tree directly but blkg walk is
  	 * easier.
  	 */
492eb21b9   Tejun Heo   cgroup: make hier...
1444
  	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1445
  		tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
73f0d49a9   Tejun Heo   blk-throttle: mov...
1446

2a12f0dcd   Tejun Heo   blk-throttle: mak...
1447
1448
1449
1450
  	/* finally, transfer bios from top-level tg's into the td */
  	tg_drain_bios(&td->service_queue);
  
  	rcu_read_unlock();
c9a929dde   Tejun Heo   block: fix reques...
1451
  	spin_unlock_irq(q->queue_lock);
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1452
  	/* all bios now should be in td->service_queue, issue them */
651930bc1   Tejun Heo   blk-throttle: dis...
1453
  	for (rw = READ; rw <= WRITE; rw++)
c5cc2070b   Tejun Heo   blk-throttle: add...
1454
1455
  		while ((bio = throtl_pop_queued(&td->service_queue.queued[rw],
  						NULL)))
651930bc1   Tejun Heo   blk-throttle: dis...
1456
  			generic_make_request(bio);
c9a929dde   Tejun Heo   block: fix reques...
1457
1458
1459
  
  	spin_lock_irq(q->queue_lock);
  }
e43473b7f   Vivek Goyal   blkio: Core imple...
1460
1461
1462
  int blk_throtl_init(struct request_queue *q)
  {
  	struct throtl_data *td;
a2b1693ba   Tejun Heo   blkcg: implement ...
1463
  	int ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
1464
1465
1466
1467
  
  	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
  	if (!td)
  		return -ENOMEM;
69df0ab03   Tejun Heo   blk-throttle: sep...
1468
  	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
b2ce2643c   Tejun Heo   blk-throttle: cle...
1469
  	throtl_service_queue_init(&td->service_queue);
e43473b7f   Vivek Goyal   blkio: Core imple...
1470

cd1604fab   Tejun Heo   blkcg: factor out...
1471
  	q->td = td;
29b125892   Vivek Goyal   blk-throttle: Dyn...
1472
  	td->queue = q;
02977e4af   Vivek Goyal   blkio: Add root g...
1473

a2b1693ba   Tejun Heo   blkcg: implement ...
1474
  	/* activate policy */
3c798398e   Tejun Heo   blkcg: mass renam...
1475
  	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
a2b1693ba   Tejun Heo   blkcg: implement ...
1476
  	if (ret)
f51b802c1   Tejun Heo   blkcg: use the us...
1477
  		kfree(td);
a2b1693ba   Tejun Heo   blkcg: implement ...
1478
  	return ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
1479
1480
1481
1482
  }
  
  void blk_throtl_exit(struct request_queue *q)
  {
c875f4d02   Tejun Heo   blkcg: drop unnec...
1483
  	BUG_ON(!q->td);
da5277700   Vivek Goyal   block: Move blk_t...
1484
  	throtl_shutdown_wq(q);
3c798398e   Tejun Heo   blkcg: mass renam...
1485
  	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
c9a929dde   Tejun Heo   block: fix reques...
1486
  	kfree(q->td);
e43473b7f   Vivek Goyal   blkio: Core imple...
1487
1488
1489
1490
  }
  
  static int __init throtl_init(void)
  {
450adcbe5   Vivek Goyal   blk-throttle: Do ...
1491
1492
1493
1494
  	kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
  	if (!kthrotld_workqueue)
  		panic("Failed to create kthrotld
  ");
3c798398e   Tejun Heo   blkcg: mass renam...
1495
  	return blkcg_policy_register(&blkcg_policy_throtl);
e43473b7f   Vivek Goyal   blkio: Core imple...
1496
1497
1498
  }
  
  module_init(throtl_init);