Blame view

block/blk-throttle.c 42.7 KB
e43473b7f   Vivek Goyal   blkio: Core imple...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Interface for controlling IO bandwidth on a request queue
   *
   * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com>
   */
  
  #include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/blkdev.h>
  #include <linux/bio.h>
  #include <linux/blktrace_api.h>
eea8f41cc   Tejun Heo   blkcg: move block...
12
  #include <linux/blk-cgroup.h>
bc9fcbf9c   Tejun Heo   block: move blk_t...
13
  #include "blk.h"
e43473b7f   Vivek Goyal   blkio: Core imple...
14
15
16
17
18
19
20
21
22
  
  /* Max dispatch from a group in 1 round */
  static int throtl_grp_quantum = 8;
  
  /* Total max dispatch from all groups in one round */
  static int throtl_quantum = 32;
  
  /* Throttling is performed over 100ms slice and after that slice is renewed */
  static unsigned long throtl_slice = HZ/10;	/* 100 ms */
3c798398e   Tejun Heo   blkcg: mass renam...
23
  static struct blkcg_policy blkcg_policy_throtl;
0381411e4   Tejun Heo   blkcg: let blkcg ...
24

450adcbe5   Vivek Goyal   blk-throttle: Do ...
25
26
  /* A workqueue to queue throttle related work */
  static struct workqueue_struct *kthrotld_workqueue;
450adcbe5   Vivek Goyal   blk-throttle: Do ...
27

c5cc2070b   Tejun Heo   blk-throttle: add...
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
  /*
   * To implement hierarchical throttling, throtl_grps form a tree and bios
   * are dispatched upwards level by level until they reach the top and get
   * issued.  When dispatching bios from the children and local group at each
   * level, if the bios are dispatched into a single bio_list, there's a risk
   * of a local or child group which can queue many bios at once filling up
   * the list starving others.
   *
   * To avoid such starvation, dispatched bios are queued separately
   * according to where they came from.  When they are again dispatched to
   * the parent, they're popped in round-robin order so that no single source
   * hogs the dispatch window.
   *
   * throtl_qnode is used to keep the queued bios separated by their sources.
   * Bios are queued to throtl_qnode which in turn is queued to
   * throtl_service_queue and then dispatched in round-robin order.
   *
   * It's also used to track the reference counts on blkg's.  A qnode always
   * belongs to a throtl_grp and gets queued on itself or the parent, so
   * incrementing the reference of the associated throtl_grp when a qnode is
   * queued and decrementing when dequeued is enough to keep the whole blkg
   * tree pinned while bios are in flight.
   */
  struct throtl_qnode {
  	struct list_head	node;		/* service_queue->queued[] */
  	struct bio_list		bios;		/* queued bios */
  	struct throtl_grp	*tg;		/* tg this qnode belongs to */
  };
c9e0332e8   Tejun Heo   blk-throttle: ren...
56
  struct throtl_service_queue {
77216b048   Tejun Heo   blk-throttle: add...
57
  	struct throtl_service_queue *parent_sq;	/* the parent service_queue */
73f0d49a9   Tejun Heo   blk-throttle: mov...
58
59
60
61
  	/*
  	 * Bios queued directly to this service_queue or dispatched from
  	 * children throtl_grp's.
  	 */
c5cc2070b   Tejun Heo   blk-throttle: add...
62
  	struct list_head	queued[2];	/* throtl_qnode [READ/WRITE] */
73f0d49a9   Tejun Heo   blk-throttle: mov...
63
64
65
66
67
68
  	unsigned int		nr_queued[2];	/* number of queued bios */
  
  	/*
  	 * RB tree of active children throtl_grp's, which are sorted by
  	 * their ->disptime.
  	 */
c9e0332e8   Tejun Heo   blk-throttle: ren...
69
70
71
72
  	struct rb_root		pending_tree;	/* RB tree of active tgs */
  	struct rb_node		*first_pending;	/* first node in the tree */
  	unsigned int		nr_pending;	/* # queued in the tree */
  	unsigned long		first_pending_disptime;	/* disptime of the first tg */
69df0ab03   Tejun Heo   blk-throttle: sep...
73
  	struct timer_list	pending_timer;	/* fires on first_pending_disptime */
e43473b7f   Vivek Goyal   blkio: Core imple...
74
  };
5b2c16aae   Tejun Heo   blk-throttle: sim...
75
76
  enum tg_state_flags {
  	THROTL_TG_PENDING	= 1 << 0,	/* on parent's pending tree */
0e9f4164b   Tejun Heo   blk-throttle: gen...
77
  	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
5b2c16aae   Tejun Heo   blk-throttle: sim...
78
  };
e43473b7f   Vivek Goyal   blkio: Core imple...
79
80
81
  #define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
  
  struct throtl_grp {
f95a04afa   Tejun Heo   blkcg: embed stru...
82
83
  	/* must be the first member */
  	struct blkg_policy_data pd;
c9e0332e8   Tejun Heo   blk-throttle: ren...
84
  	/* active throtl group service_queue member */
e43473b7f   Vivek Goyal   blkio: Core imple...
85
  	struct rb_node rb_node;
0f3457f60   Tejun Heo   blk-throttle: add...
86
87
  	/* throtl_data this group belongs to */
  	struct throtl_data *td;
49a2f1e3f   Tejun Heo   blk-throttle: add...
88
89
  	/* this group's service queue */
  	struct throtl_service_queue service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
90
  	/*
c5cc2070b   Tejun Heo   blk-throttle: add...
91
92
93
94
95
96
97
98
99
100
101
  	 * qnode_on_self is used when bios are directly queued to this
  	 * throtl_grp so that local bios compete fairly with bios
  	 * dispatched from children.  qnode_on_parent is used when bios are
  	 * dispatched from this throtl_grp into its parent and will compete
  	 * with the sibling qnode_on_parents and the parent's
  	 * qnode_on_self.
  	 */
  	struct throtl_qnode qnode_on_self[2];
  	struct throtl_qnode qnode_on_parent[2];
  
  	/*
e43473b7f   Vivek Goyal   blkio: Core imple...
102
103
104
105
106
  	 * Dispatch time in jiffies. This is the estimated time when group
  	 * will unthrottle and is ready to dispatch more bio. It is used as
  	 * key to sort active groups in service tree.
  	 */
  	unsigned long disptime;
e43473b7f   Vivek Goyal   blkio: Core imple...
107
  	unsigned int flags;
693e751e7   Tejun Heo   blk-throttle: imp...
108
109
  	/* are there any throtl rules between this group and td? */
  	bool has_rules[2];
e43473b7f   Vivek Goyal   blkio: Core imple...
110
111
  	/* bytes per second rate limits */
  	uint64_t bps[2];
8e89d13f4   Vivek Goyal   blkio: Implementa...
112
113
  	/* IOPS limits */
  	unsigned int iops[2];
e43473b7f   Vivek Goyal   blkio: Core imple...
114
115
  	/* Number of bytes disptached in current slice */
  	uint64_t bytes_disp[2];
8e89d13f4   Vivek Goyal   blkio: Implementa...
116
117
  	/* Number of bio's dispatched in current slice */
  	unsigned int io_disp[2];
e43473b7f   Vivek Goyal   blkio: Core imple...
118
119
120
121
122
123
124
125
  
  	/* When did we start a new slice */
  	unsigned long slice_start[2];
  	unsigned long slice_end[2];
  };
  
  struct throtl_data
  {
e43473b7f   Vivek Goyal   blkio: Core imple...
126
  	/* service tree for active throtl groups */
c9e0332e8   Tejun Heo   blk-throttle: ren...
127
  	struct throtl_service_queue service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
128

e43473b7f   Vivek Goyal   blkio: Core imple...
129
130
131
132
  	struct request_queue *queue;
  
  	/* Total Number of queued bios on READ and WRITE lists */
  	unsigned int nr_queued[2];
e43473b7f   Vivek Goyal   blkio: Core imple...
133
  	/* Work for dispatching throttled bios */
69df0ab03   Tejun Heo   blk-throttle: sep...
134
  	struct work_struct dispatch_work;
e43473b7f   Vivek Goyal   blkio: Core imple...
135
  };
69df0ab03   Tejun Heo   blk-throttle: sep...
136
  static void throtl_pending_timer_fn(unsigned long arg);
f95a04afa   Tejun Heo   blkcg: embed stru...
137
138
139
140
  static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
  {
  	return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
  }
3c798398e   Tejun Heo   blkcg: mass renam...
141
  static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
142
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
143
  	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
0381411e4   Tejun Heo   blkcg: let blkcg ...
144
  }
3c798398e   Tejun Heo   blkcg: mass renam...
145
  static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
146
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
147
  	return pd_to_blkg(&tg->pd);
0381411e4   Tejun Heo   blkcg: let blkcg ...
148
  }
fda6f272c   Tejun Heo   blk-throttle: imp...
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
  /**
   * sq_to_tg - return the throl_grp the specified service queue belongs to
   * @sq: the throtl_service_queue of interest
   *
   * Return the throtl_grp @sq belongs to.  If @sq is the top-level one
   * embedded in throtl_data, %NULL is returned.
   */
  static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq)
  {
  	if (sq && sq->parent_sq)
  		return container_of(sq, struct throtl_grp, service_queue);
  	else
  		return NULL;
  }
  
  /**
   * sq_to_td - return throtl_data the specified service queue belongs to
   * @sq: the throtl_service_queue of interest
   *
   * A service_queue can be embeded in either a throtl_grp or throtl_data.
   * Determine the associated throtl_data accordingly and return it.
   */
  static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
  {
  	struct throtl_grp *tg = sq_to_tg(sq);
  
  	if (tg)
  		return tg->td;
  	else
  		return container_of(sq, struct throtl_data, service_queue);
  }
  
  /**
   * throtl_log - log debug message via blktrace
   * @sq: the service_queue being reported
   * @fmt: printf format string
   * @args: printf args
   *
   * The messages are prefixed with "throtl BLKG_NAME" if @sq belongs to a
   * throtl_grp; otherwise, just "throtl".
fda6f272c   Tejun Heo   blk-throttle: imp...
189
190
191
192
193
194
   */
  #define throtl_log(sq, fmt, args...)	do {				\
  	struct throtl_grp *__tg = sq_to_tg((sq));			\
  	struct throtl_data *__td = sq_to_td((sq));			\
  									\
  	(void)__td;							\
59fa0224c   Shaohua Li   blk-throttle: don...
195
196
  	if (likely(!blk_trace_note_message_enabled(__td->queue)))	\
  		break;							\
fda6f272c   Tejun Heo   blk-throttle: imp...
197
198
  	if ((__tg)) {							\
  		char __pbuf[128];					\
54e7ed12b   Tejun Heo   blkcg: remove blk...
199
  									\
fda6f272c   Tejun Heo   blk-throttle: imp...
200
201
202
203
204
  		blkg_path(tg_to_blkg(__tg), __pbuf, sizeof(__pbuf));	\
  		blk_add_trace_msg(__td->queue, "throtl %s " fmt, __pbuf, ##args); \
  	} else {							\
  		blk_add_trace_msg(__td->queue, "throtl " fmt, ##args);	\
  	}								\
54e7ed12b   Tejun Heo   blkcg: remove blk...
205
  } while (0)
e43473b7f   Vivek Goyal   blkio: Core imple...
206

c5cc2070b   Tejun Heo   blk-throttle: add...
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
  static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
  {
  	INIT_LIST_HEAD(&qn->node);
  	bio_list_init(&qn->bios);
  	qn->tg = tg;
  }
  
  /**
   * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it
   * @bio: bio being added
   * @qn: qnode to add bio to
   * @queued: the service_queue->queued[] list @qn belongs to
   *
   * Add @bio to @qn and put @qn on @queued if it's not already on.
   * @qn->tg's reference count is bumped when @qn is activated.  See the
   * comment on top of throtl_qnode definition for details.
   */
  static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
  				 struct list_head *queued)
  {
  	bio_list_add(&qn->bios, bio);
  	if (list_empty(&qn->node)) {
  		list_add_tail(&qn->node, queued);
  		blkg_get(tg_to_blkg(qn->tg));
  	}
  }
  
  /**
   * throtl_peek_queued - peek the first bio on a qnode list
   * @queued: the qnode list to peek
   */
  static struct bio *throtl_peek_queued(struct list_head *queued)
  {
  	struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node);
  	struct bio *bio;
  
  	if (list_empty(queued))
  		return NULL;
  
  	bio = bio_list_peek(&qn->bios);
  	WARN_ON_ONCE(!bio);
  	return bio;
  }
  
  /**
   * throtl_pop_queued - pop the first bio form a qnode list
   * @queued: the qnode list to pop a bio from
   * @tg_to_put: optional out argument for throtl_grp to put
   *
   * Pop the first bio from the qnode list @queued.  After popping, the first
   * qnode is removed from @queued if empty or moved to the end of @queued so
   * that the popping order is round-robin.
   *
   * When the first qnode is removed, its associated throtl_grp should be put
   * too.  If @tg_to_put is NULL, this function automatically puts it;
   * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is
   * responsible for putting it.
   */
  static struct bio *throtl_pop_queued(struct list_head *queued,
  				     struct throtl_grp **tg_to_put)
  {
  	struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node);
  	struct bio *bio;
  
  	if (list_empty(queued))
  		return NULL;
  
  	bio = bio_list_pop(&qn->bios);
  	WARN_ON_ONCE(!bio);
  
  	if (bio_list_empty(&qn->bios)) {
  		list_del_init(&qn->node);
  		if (tg_to_put)
  			*tg_to_put = qn->tg;
  		else
  			blkg_put(tg_to_blkg(qn->tg));
  	} else {
  		list_move_tail(&qn->node, queued);
  	}
  
  	return bio;
  }
49a2f1e3f   Tejun Heo   blk-throttle: add...
289
  /* init a service_queue, assumes the caller zeroed it */
b2ce2643c   Tejun Heo   blk-throttle: cle...
290
  static void throtl_service_queue_init(struct throtl_service_queue *sq)
49a2f1e3f   Tejun Heo   blk-throttle: add...
291
  {
c5cc2070b   Tejun Heo   blk-throttle: add...
292
293
  	INIT_LIST_HEAD(&sq->queued[0]);
  	INIT_LIST_HEAD(&sq->queued[1]);
49a2f1e3f   Tejun Heo   blk-throttle: add...
294
  	sq->pending_tree = RB_ROOT;
69df0ab03   Tejun Heo   blk-throttle: sep...
295
296
297
  	setup_timer(&sq->pending_timer, throtl_pending_timer_fn,
  		    (unsigned long)sq);
  }
001bea73e   Tejun Heo   blkcg: replace bl...
298
299
  static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
  {
4fb72036f   Tejun Heo   blk-throttle: rem...
300
  	struct throtl_grp *tg;
24bdb8ef0   Tejun Heo   blkcg: make blkcg...
301
  	int rw;
4fb72036f   Tejun Heo   blk-throttle: rem...
302
303
304
  
  	tg = kzalloc_node(sizeof(*tg), gfp, node);
  	if (!tg)
77ea73388   Tejun Heo   blkcg: move io_se...
305
  		return NULL;
4fb72036f   Tejun Heo   blk-throttle: rem...
306

b2ce2643c   Tejun Heo   blk-throttle: cle...
307
308
309
310
311
312
313
314
315
316
317
318
  	throtl_service_queue_init(&tg->service_queue);
  
  	for (rw = READ; rw <= WRITE; rw++) {
  		throtl_qnode_init(&tg->qnode_on_self[rw], tg);
  		throtl_qnode_init(&tg->qnode_on_parent[rw], tg);
  	}
  
  	RB_CLEAR_NODE(&tg->rb_node);
  	tg->bps[READ] = -1;
  	tg->bps[WRITE] = -1;
  	tg->iops[READ] = -1;
  	tg->iops[WRITE] = -1;
4fb72036f   Tejun Heo   blk-throttle: rem...
319
  	return &tg->pd;
001bea73e   Tejun Heo   blkcg: replace bl...
320
  }
a9520cd6f   Tejun Heo   blkcg: make blkcg...
321
  static void throtl_pd_init(struct blkg_policy_data *pd)
a29a171e7   Vivek Goyal   blk-throttle: Do ...
322
  {
a9520cd6f   Tejun Heo   blkcg: make blkcg...
323
324
  	struct throtl_grp *tg = pd_to_tg(pd);
  	struct blkcg_gq *blkg = tg_to_blkg(tg);
77216b048   Tejun Heo   blk-throttle: add...
325
  	struct throtl_data *td = blkg->q->td;
b2ce2643c   Tejun Heo   blk-throttle: cle...
326
  	struct throtl_service_queue *sq = &tg->service_queue;
cd1604fab   Tejun Heo   blkcg: factor out...
327

9138125be   Tejun Heo   blk-throttle: imp...
328
  	/*
aa6ec29be   Tejun Heo   cgroup: remove sa...
329
  	 * If on the default hierarchy, we switch to properly hierarchical
9138125be   Tejun Heo   blk-throttle: imp...
330
331
332
333
334
  	 * behavior where limits on a given throtl_grp are applied to the
  	 * whole subtree rather than just the group itself.  e.g. If 16M
  	 * read_bps limit is set on the root group, the whole system can't
  	 * exceed 16M for the device.
  	 *
aa6ec29be   Tejun Heo   cgroup: remove sa...
335
  	 * If not on the default hierarchy, the broken flat hierarchy
9138125be   Tejun Heo   blk-throttle: imp...
336
337
338
339
340
  	 * behavior is retained where all throtl_grps are treated as if
  	 * they're all separate root groups right below throtl_data.
  	 * Limits of a group don't interact with limits of other groups
  	 * regardless of the position of the group in the hierarchy.
  	 */
b2ce2643c   Tejun Heo   blk-throttle: cle...
341
  	sq->parent_sq = &td->service_queue;
9e10a130d   Tejun Heo   cgroup: replace c...
342
  	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
b2ce2643c   Tejun Heo   blk-throttle: cle...
343
  		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
77216b048   Tejun Heo   blk-throttle: add...
344
  	tg->td = td;
8a3d26151   Tejun Heo   blkcg: move blkio...
345
  }
693e751e7   Tejun Heo   blk-throttle: imp...
346
347
348
349
350
351
352
353
354
355
356
357
358
359
  /*
   * Set has_rules[] if @tg or any of its parents have limits configured.
   * This doesn't require walking up to the top of the hierarchy as the
   * parent's has_rules[] is guaranteed to be correct.
   */
  static void tg_update_has_rules(struct throtl_grp *tg)
  {
  	struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
  	int rw;
  
  	for (rw = READ; rw <= WRITE; rw++)
  		tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
  				    (tg->bps[rw] != -1 || tg->iops[rw] != -1);
  }
a9520cd6f   Tejun Heo   blkcg: make blkcg...
360
  static void throtl_pd_online(struct blkg_policy_data *pd)
693e751e7   Tejun Heo   blk-throttle: imp...
361
362
363
364
365
  {
  	/*
  	 * We don't want new groups to escape the limits of its ancestors.
  	 * Update has_rules[] after a new group is brought online.
  	 */
a9520cd6f   Tejun Heo   blkcg: make blkcg...
366
  	tg_update_has_rules(pd_to_tg(pd));
693e751e7   Tejun Heo   blk-throttle: imp...
367
  }
001bea73e   Tejun Heo   blkcg: replace bl...
368
369
  static void throtl_pd_free(struct blkg_policy_data *pd)
  {
4fb72036f   Tejun Heo   blk-throttle: rem...
370
  	struct throtl_grp *tg = pd_to_tg(pd);
b2ce2643c   Tejun Heo   blk-throttle: cle...
371
  	del_timer_sync(&tg->service_queue.pending_timer);
4fb72036f   Tejun Heo   blk-throttle: rem...
372
  	kfree(tg);
001bea73e   Tejun Heo   blkcg: replace bl...
373
  }
0049af73b   Tejun Heo   blk-throttle: reo...
374
375
  static struct throtl_grp *
  throtl_rb_first(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
376
377
  {
  	/* Service tree is empty */
0049af73b   Tejun Heo   blk-throttle: reo...
378
  	if (!parent_sq->nr_pending)
e43473b7f   Vivek Goyal   blkio: Core imple...
379
  		return NULL;
0049af73b   Tejun Heo   blk-throttle: reo...
380
381
  	if (!parent_sq->first_pending)
  		parent_sq->first_pending = rb_first(&parent_sq->pending_tree);
e43473b7f   Vivek Goyal   blkio: Core imple...
382

0049af73b   Tejun Heo   blk-throttle: reo...
383
384
  	if (parent_sq->first_pending)
  		return rb_entry_tg(parent_sq->first_pending);
e43473b7f   Vivek Goyal   blkio: Core imple...
385
386
387
388
389
390
391
392
393
  
  	return NULL;
  }
  
  static void rb_erase_init(struct rb_node *n, struct rb_root *root)
  {
  	rb_erase(n, root);
  	RB_CLEAR_NODE(n);
  }
0049af73b   Tejun Heo   blk-throttle: reo...
394
395
  static void throtl_rb_erase(struct rb_node *n,
  			    struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
396
  {
0049af73b   Tejun Heo   blk-throttle: reo...
397
398
399
400
  	if (parent_sq->first_pending == n)
  		parent_sq->first_pending = NULL;
  	rb_erase_init(n, &parent_sq->pending_tree);
  	--parent_sq->nr_pending;
e43473b7f   Vivek Goyal   blkio: Core imple...
401
  }
0049af73b   Tejun Heo   blk-throttle: reo...
402
  static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
403
404
  {
  	struct throtl_grp *tg;
0049af73b   Tejun Heo   blk-throttle: reo...
405
  	tg = throtl_rb_first(parent_sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
406
407
  	if (!tg)
  		return;
0049af73b   Tejun Heo   blk-throttle: reo...
408
  	parent_sq->first_pending_disptime = tg->disptime;
e43473b7f   Vivek Goyal   blkio: Core imple...
409
  }
77216b048   Tejun Heo   blk-throttle: add...
410
  static void tg_service_queue_add(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
411
  {
77216b048   Tejun Heo   blk-throttle: add...
412
  	struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq;
0049af73b   Tejun Heo   blk-throttle: reo...
413
  	struct rb_node **node = &parent_sq->pending_tree.rb_node;
e43473b7f   Vivek Goyal   blkio: Core imple...
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
  	struct rb_node *parent = NULL;
  	struct throtl_grp *__tg;
  	unsigned long key = tg->disptime;
  	int left = 1;
  
  	while (*node != NULL) {
  		parent = *node;
  		__tg = rb_entry_tg(parent);
  
  		if (time_before(key, __tg->disptime))
  			node = &parent->rb_left;
  		else {
  			node = &parent->rb_right;
  			left = 0;
  		}
  	}
  
  	if (left)
0049af73b   Tejun Heo   blk-throttle: reo...
432
  		parent_sq->first_pending = &tg->rb_node;
e43473b7f   Vivek Goyal   blkio: Core imple...
433
434
  
  	rb_link_node(&tg->rb_node, parent, node);
0049af73b   Tejun Heo   blk-throttle: reo...
435
  	rb_insert_color(&tg->rb_node, &parent_sq->pending_tree);
e43473b7f   Vivek Goyal   blkio: Core imple...
436
  }
77216b048   Tejun Heo   blk-throttle: add...
437
  static void __throtl_enqueue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
438
  {
77216b048   Tejun Heo   blk-throttle: add...
439
  	tg_service_queue_add(tg);
5b2c16aae   Tejun Heo   blk-throttle: sim...
440
  	tg->flags |= THROTL_TG_PENDING;
77216b048   Tejun Heo   blk-throttle: add...
441
  	tg->service_queue.parent_sq->nr_pending++;
e43473b7f   Vivek Goyal   blkio: Core imple...
442
  }
77216b048   Tejun Heo   blk-throttle: add...
443
  static void throtl_enqueue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
444
  {
5b2c16aae   Tejun Heo   blk-throttle: sim...
445
  	if (!(tg->flags & THROTL_TG_PENDING))
77216b048   Tejun Heo   blk-throttle: add...
446
  		__throtl_enqueue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
447
  }
77216b048   Tejun Heo   blk-throttle: add...
448
  static void __throtl_dequeue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
449
  {
77216b048   Tejun Heo   blk-throttle: add...
450
  	throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
5b2c16aae   Tejun Heo   blk-throttle: sim...
451
  	tg->flags &= ~THROTL_TG_PENDING;
e43473b7f   Vivek Goyal   blkio: Core imple...
452
  }
77216b048   Tejun Heo   blk-throttle: add...
453
  static void throtl_dequeue_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
454
  {
5b2c16aae   Tejun Heo   blk-throttle: sim...
455
  	if (tg->flags & THROTL_TG_PENDING)
77216b048   Tejun Heo   blk-throttle: add...
456
  		__throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
457
  }
a9131a27e   Tejun Heo   blk-throttle: rel...
458
  /* Call with queue lock held */
69df0ab03   Tejun Heo   blk-throttle: sep...
459
460
  static void throtl_schedule_pending_timer(struct throtl_service_queue *sq,
  					  unsigned long expires)
a9131a27e   Tejun Heo   blk-throttle: rel...
461
  {
69df0ab03   Tejun Heo   blk-throttle: sep...
462
463
464
  	mod_timer(&sq->pending_timer, expires);
  	throtl_log(sq, "schedule timer. delay=%lu jiffies=%lu",
  		   expires - jiffies, jiffies);
a9131a27e   Tejun Heo   blk-throttle: rel...
465
  }
7f52f98c2   Tejun Heo   blk-throttle: imp...
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
  /**
   * throtl_schedule_next_dispatch - schedule the next dispatch cycle
   * @sq: the service_queue to schedule dispatch for
   * @force: force scheduling
   *
   * Arm @sq->pending_timer so that the next dispatch cycle starts on the
   * dispatch time of the first pending child.  Returns %true if either timer
   * is armed or there's no pending child left.  %false if the current
   * dispatch window is still open and the caller should continue
   * dispatching.
   *
   * If @force is %true, the dispatch timer is always scheduled and this
   * function is guaranteed to return %true.  This is to be used when the
   * caller can't dispatch itself and needs to invoke pending_timer
   * unconditionally.  Note that forced scheduling is likely to induce short
   * delay before dispatch starts even if @sq->first_pending_disptime is not
   * in the future and thus shouldn't be used in hot paths.
   */
  static bool throtl_schedule_next_dispatch(struct throtl_service_queue *sq,
  					  bool force)
e43473b7f   Vivek Goyal   blkio: Core imple...
486
  {
6a525600f   Tejun Heo   blk-throttle: rem...
487
  	/* any pending children left? */
c9e0332e8   Tejun Heo   blk-throttle: ren...
488
  	if (!sq->nr_pending)
7f52f98c2   Tejun Heo   blk-throttle: imp...
489
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
490

c9e0332e8   Tejun Heo   blk-throttle: ren...
491
  	update_min_dispatch_time(sq);
e43473b7f   Vivek Goyal   blkio: Core imple...
492

69df0ab03   Tejun Heo   blk-throttle: sep...
493
  	/* is the next dispatch time in the future? */
7f52f98c2   Tejun Heo   blk-throttle: imp...
494
  	if (force || time_after(sq->first_pending_disptime, jiffies)) {
69df0ab03   Tejun Heo   blk-throttle: sep...
495
  		throtl_schedule_pending_timer(sq, sq->first_pending_disptime);
7f52f98c2   Tejun Heo   blk-throttle: imp...
496
  		return true;
69df0ab03   Tejun Heo   blk-throttle: sep...
497
  	}
7f52f98c2   Tejun Heo   blk-throttle: imp...
498
499
  	/* tell the caller to continue dispatching */
  	return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
500
  }
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
  static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
  		bool rw, unsigned long start)
  {
  	tg->bytes_disp[rw] = 0;
  	tg->io_disp[rw] = 0;
  
  	/*
  	 * Previous slice has expired. We must have trimmed it after last
  	 * bio dispatch. That means since start of last slice, we never used
  	 * that bandwidth. Do try to make use of that bandwidth while giving
  	 * credit.
  	 */
  	if (time_after_eq(start, tg->slice_start[rw]))
  		tg->slice_start[rw] = start;
  
  	tg->slice_end[rw] = jiffies + throtl_slice;
  	throtl_log(&tg->service_queue,
  		   "[%c] new slice with credit start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
  }
0f3457f60   Tejun Heo   blk-throttle: add...
522
  static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
523
524
  {
  	tg->bytes_disp[rw] = 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
525
  	tg->io_disp[rw] = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
526
527
  	tg->slice_start[rw] = jiffies;
  	tg->slice_end[rw] = jiffies + throtl_slice;
fda6f272c   Tejun Heo   blk-throttle: imp...
528
529
530
531
  	throtl_log(&tg->service_queue,
  		   "[%c] new slice start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
532
  }
0f3457f60   Tejun Heo   blk-throttle: add...
533
534
  static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
  					unsigned long jiffy_end)
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
535
536
537
  {
  	tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
  }
0f3457f60   Tejun Heo   blk-throttle: add...
538
539
  static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
  				       unsigned long jiffy_end)
e43473b7f   Vivek Goyal   blkio: Core imple...
540
541
  {
  	tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
fda6f272c   Tejun Heo   blk-throttle: imp...
542
543
544
545
  	throtl_log(&tg->service_queue,
  		   "[%c] extend slice start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
  		   tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
546
547
548
  }
  
  /* Determine if previously allocated or extended slice is complete or not */
0f3457f60   Tejun Heo   blk-throttle: add...
549
  static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
550
551
  {
  	if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
5cf8c2277   Fabian Frederick   block/blk-throttl...
552
  		return false;
e43473b7f   Vivek Goyal   blkio: Core imple...
553
554
555
556
557
  
  	return 1;
  }
  
  /* Trim the used slices and adjust slice start accordingly */
0f3457f60   Tejun Heo   blk-throttle: add...
558
  static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
559
  {
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
560
561
  	unsigned long nr_slices, time_elapsed, io_trim;
  	u64 bytes_trim, tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
562
563
564
565
566
567
568
569
  
  	BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
  
  	/*
  	 * If bps are unlimited (-1), then time slice don't get
  	 * renewed. Don't try to trim the slice if slice is used. A new
  	 * slice will start when appropriate.
  	 */
0f3457f60   Tejun Heo   blk-throttle: add...
570
  	if (throtl_slice_used(tg, rw))
e43473b7f   Vivek Goyal   blkio: Core imple...
571
  		return;
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
572
573
574
575
576
577
578
  	/*
  	 * A bio has been dispatched. Also adjust slice_end. It might happen
  	 * that initially cgroup limit was very low resulting in high
  	 * slice_end, but later limit was bumped up and bio was dispached
  	 * sooner, then we need to reduce slice_end. A high bogus slice_end
  	 * is bad because it does not allow new slice to start.
  	 */
0f3457f60   Tejun Heo   blk-throttle: add...
579
  	throtl_set_slice_end(tg, rw, jiffies + throtl_slice);
d1ae8ffdf   Vivek Goyal   blk-throttle: Tri...
580

e43473b7f   Vivek Goyal   blkio: Core imple...
581
582
583
584
585
586
  	time_elapsed = jiffies - tg->slice_start[rw];
  
  	nr_slices = time_elapsed / throtl_slice;
  
  	if (!nr_slices)
  		return;
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
587
588
589
  	tmp = tg->bps[rw] * throtl_slice * nr_slices;
  	do_div(tmp, HZ);
  	bytes_trim = tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
590

8e89d13f4   Vivek Goyal   blkio: Implementa...
591
  	io_trim = (tg->iops[rw] * throtl_slice * nr_slices)/HZ;
e43473b7f   Vivek Goyal   blkio: Core imple...
592

8e89d13f4   Vivek Goyal   blkio: Implementa...
593
  	if (!bytes_trim && !io_trim)
e43473b7f   Vivek Goyal   blkio: Core imple...
594
595
596
597
598
599
  		return;
  
  	if (tg->bytes_disp[rw] >= bytes_trim)
  		tg->bytes_disp[rw] -= bytes_trim;
  	else
  		tg->bytes_disp[rw] = 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
600
601
602
603
  	if (tg->io_disp[rw] >= io_trim)
  		tg->io_disp[rw] -= io_trim;
  	else
  		tg->io_disp[rw] = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
604
  	tg->slice_start[rw] += nr_slices * throtl_slice;
fda6f272c   Tejun Heo   blk-throttle: imp...
605
606
607
608
  	throtl_log(&tg->service_queue,
  		   "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
  		   rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
  		   tg->slice_start[rw], tg->slice_end[rw], jiffies);
e43473b7f   Vivek Goyal   blkio: Core imple...
609
  }
0f3457f60   Tejun Heo   blk-throttle: add...
610
611
  static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
  				  unsigned long *wait)
e43473b7f   Vivek Goyal   blkio: Core imple...
612
613
  {
  	bool rw = bio_data_dir(bio);
8e89d13f4   Vivek Goyal   blkio: Implementa...
614
  	unsigned int io_allowed;
e43473b7f   Vivek Goyal   blkio: Core imple...
615
  	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
c49c06e49   Vivek Goyal   blkio-throttle: F...
616
  	u64 tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
617

8e89d13f4   Vivek Goyal   blkio: Implementa...
618
  	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
e43473b7f   Vivek Goyal   blkio: Core imple...
619

8e89d13f4   Vivek Goyal   blkio: Implementa...
620
621
622
623
624
  	/* Slice has just started. Consider one slice interval */
  	if (!jiffy_elapsed)
  		jiffy_elapsed_rnd = throtl_slice;
  
  	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice);
c49c06e49   Vivek Goyal   blkio-throttle: F...
625
626
627
628
629
630
631
632
633
634
635
636
637
638
  	/*
  	 * jiffy_elapsed_rnd should not be a big value as minimum iops can be
  	 * 1 then at max jiffy elapsed should be equivalent of 1 second as we
  	 * will allow dispatch after 1 second and after that slice should
  	 * have been trimmed.
  	 */
  
  	tmp = (u64)tg->iops[rw] * jiffy_elapsed_rnd;
  	do_div(tmp, HZ);
  
  	if (tmp > UINT_MAX)
  		io_allowed = UINT_MAX;
  	else
  		io_allowed = tmp;
8e89d13f4   Vivek Goyal   blkio: Implementa...
639
640
  
  	if (tg->io_disp[rw] + 1 <= io_allowed) {
e43473b7f   Vivek Goyal   blkio: Core imple...
641
642
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
643
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
644
  	}
8e89d13f4   Vivek Goyal   blkio: Implementa...
645
646
647
648
649
650
651
652
653
654
655
656
  	/* Calc approx time to dispatch */
  	jiffy_wait = ((tg->io_disp[rw] + 1) * HZ)/tg->iops[rw] + 1;
  
  	if (jiffy_wait > jiffy_elapsed)
  		jiffy_wait = jiffy_wait - jiffy_elapsed;
  	else
  		jiffy_wait = 1;
  
  	if (wait)
  		*wait = jiffy_wait;
  	return 0;
  }
0f3457f60   Tejun Heo   blk-throttle: add...
657
658
  static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
  				 unsigned long *wait)
8e89d13f4   Vivek Goyal   blkio: Implementa...
659
660
  {
  	bool rw = bio_data_dir(bio);
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
661
  	u64 bytes_allowed, extra_bytes, tmp;
8e89d13f4   Vivek Goyal   blkio: Implementa...
662
  	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
e43473b7f   Vivek Goyal   blkio: Core imple...
663
664
665
666
667
668
669
670
  
  	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
  
  	/* Slice has just started. Consider one slice interval */
  	if (!jiffy_elapsed)
  		jiffy_elapsed_rnd = throtl_slice;
  
  	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice);
5e901a2b9   Vivek Goyal   blkio-throttle: T...
671
672
  	tmp = tg->bps[rw] * jiffy_elapsed_rnd;
  	do_div(tmp, HZ);
3aad5d3ee   Vivek Goyal   blkio-throttle: F...
673
  	bytes_allowed = tmp;
e43473b7f   Vivek Goyal   blkio: Core imple...
674

4f024f379   Kent Overstreet   block: Abstract o...
675
  	if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) {
e43473b7f   Vivek Goyal   blkio: Core imple...
676
677
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
678
  		return true;
e43473b7f   Vivek Goyal   blkio: Core imple...
679
680
681
  	}
  
  	/* Calc approx time to dispatch */
4f024f379   Kent Overstreet   block: Abstract o...
682
  	extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
e43473b7f   Vivek Goyal   blkio: Core imple...
683
684
685
686
687
688
689
690
691
692
  	jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]);
  
  	if (!jiffy_wait)
  		jiffy_wait = 1;
  
  	/*
  	 * This wait time is without taking into consideration the rounding
  	 * up we did. Add that time also.
  	 */
  	jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
e43473b7f   Vivek Goyal   blkio: Core imple...
693
694
  	if (wait)
  		*wait = jiffy_wait;
8e89d13f4   Vivek Goyal   blkio: Implementa...
695
696
697
698
699
700
701
  	return 0;
  }
  
  /*
   * Returns whether one can dispatch a bio or not. Also returns approx number
   * of jiffies to wait before this bio is with-in IO rate and can be dispatched
   */
0f3457f60   Tejun Heo   blk-throttle: add...
702
703
  static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
  			    unsigned long *wait)
8e89d13f4   Vivek Goyal   blkio: Implementa...
704
705
706
707
708
709
710
711
712
713
  {
  	bool rw = bio_data_dir(bio);
  	unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
  
  	/*
   	 * Currently whole state machine of group depends on first bio
  	 * queued in the group bio list. So one should not be calling
  	 * this function with a different bio if there are other bios
  	 * queued.
  	 */
73f0d49a9   Tejun Heo   blk-throttle: mov...
714
  	BUG_ON(tg->service_queue.nr_queued[rw] &&
c5cc2070b   Tejun Heo   blk-throttle: add...
715
  	       bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
e43473b7f   Vivek Goyal   blkio: Core imple...
716

8e89d13f4   Vivek Goyal   blkio: Implementa...
717
718
719
720
  	/* If tg->bps = -1, then BW is unlimited */
  	if (tg->bps[rw] == -1 && tg->iops[rw] == -1) {
  		if (wait)
  			*wait = 0;
5cf8c2277   Fabian Frederick   block/blk-throttl...
721
  		return true;
8e89d13f4   Vivek Goyal   blkio: Implementa...
722
723
724
725
726
  	}
  
  	/*
  	 * If previous slice expired, start a new one otherwise renew/extend
  	 * existing slice to make sure it is at least throtl_slice interval
164c80ed8   Vivek Goyal   blk-throttle: Ext...
727
728
729
  	 * long since now. New slice is started only for empty throttle group.
  	 * If there is queued bio, that means there should be an active
  	 * slice and it should be extended instead.
8e89d13f4   Vivek Goyal   blkio: Implementa...
730
  	 */
164c80ed8   Vivek Goyal   blk-throttle: Ext...
731
  	if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
0f3457f60   Tejun Heo   blk-throttle: add...
732
  		throtl_start_new_slice(tg, rw);
8e89d13f4   Vivek Goyal   blkio: Implementa...
733
734
  	else {
  		if (time_before(tg->slice_end[rw], jiffies + throtl_slice))
0f3457f60   Tejun Heo   blk-throttle: add...
735
  			throtl_extend_slice(tg, rw, jiffies + throtl_slice);
8e89d13f4   Vivek Goyal   blkio: Implementa...
736
  	}
0f3457f60   Tejun Heo   blk-throttle: add...
737
738
  	if (tg_with_in_bps_limit(tg, bio, &bps_wait) &&
  	    tg_with_in_iops_limit(tg, bio, &iops_wait)) {
8e89d13f4   Vivek Goyal   blkio: Implementa...
739
740
741
742
743
744
745
746
747
748
749
  		if (wait)
  			*wait = 0;
  		return 1;
  	}
  
  	max_wait = max(bps_wait, iops_wait);
  
  	if (wait)
  		*wait = max_wait;
  
  	if (time_before(tg->slice_end[rw], jiffies + max_wait))
0f3457f60   Tejun Heo   blk-throttle: add...
750
  		throtl_extend_slice(tg, rw, jiffies + max_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
751
752
753
754
755
756
757
  
  	return 0;
  }
  
  static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
  {
  	bool rw = bio_data_dir(bio);
e43473b7f   Vivek Goyal   blkio: Core imple...
758
759
  
  	/* Charge the bio to the group */
4f024f379   Kent Overstreet   block: Abstract o...
760
  	tg->bytes_disp[rw] += bio->bi_iter.bi_size;
8e89d13f4   Vivek Goyal   blkio: Implementa...
761
  	tg->io_disp[rw]++;
e43473b7f   Vivek Goyal   blkio: Core imple...
762

2a0f61e6e   Tejun Heo   blk-throttle: set...
763
764
765
766
767
  	/*
  	 * REQ_THROTTLED is used to prevent the same bio to be throttled
  	 * more than once as a throttled bio will go through blk-throtl the
  	 * second time when it eventually gets issued.  Set it when a bio
  	 * is being charged to a tg.
2a0f61e6e   Tejun Heo   blk-throttle: set...
768
  	 */
1eff9d322   Jens Axboe   block: rename bio...
769
770
  	if (!(bio->bi_opf & REQ_THROTTLED))
  		bio->bi_opf |= REQ_THROTTLED;
e43473b7f   Vivek Goyal   blkio: Core imple...
771
  }
c5cc2070b   Tejun Heo   blk-throttle: add...
772
773
774
775
776
777
778
779
780
781
782
  /**
   * throtl_add_bio_tg - add a bio to the specified throtl_grp
   * @bio: bio to add
   * @qn: qnode to use
   * @tg: the target throtl_grp
   *
   * Add @bio to @tg's service_queue using @qn.  If @qn is not specified,
   * tg->qnode_on_self[] is used.
   */
  static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
  			      struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
783
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
784
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
785
  	bool rw = bio_data_dir(bio);
c5cc2070b   Tejun Heo   blk-throttle: add...
786
787
  	if (!qn)
  		qn = &tg->qnode_on_self[rw];
0e9f4164b   Tejun Heo   blk-throttle: gen...
788
789
790
791
792
793
794
795
  	/*
  	 * If @tg doesn't currently have any bios queued in the same
  	 * direction, queueing @bio can change when @tg should be
  	 * dispatched.  Mark that @tg was empty.  This is automatically
  	 * cleaered on the next tg_update_disptime().
  	 */
  	if (!sq->nr_queued[rw])
  		tg->flags |= THROTL_TG_WAS_EMPTY;
c5cc2070b   Tejun Heo   blk-throttle: add...
796
  	throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
73f0d49a9   Tejun Heo   blk-throttle: mov...
797
  	sq->nr_queued[rw]++;
77216b048   Tejun Heo   blk-throttle: add...
798
  	throtl_enqueue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
799
  }
77216b048   Tejun Heo   blk-throttle: add...
800
  static void tg_update_disptime(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
801
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
802
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
803
804
  	unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
  	struct bio *bio;
c5cc2070b   Tejun Heo   blk-throttle: add...
805
  	if ((bio = throtl_peek_queued(&sq->queued[READ])))
0f3457f60   Tejun Heo   blk-throttle: add...
806
  		tg_may_dispatch(tg, bio, &read_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
807

c5cc2070b   Tejun Heo   blk-throttle: add...
808
  	if ((bio = throtl_peek_queued(&sq->queued[WRITE])))
0f3457f60   Tejun Heo   blk-throttle: add...
809
  		tg_may_dispatch(tg, bio, &write_wait);
e43473b7f   Vivek Goyal   blkio: Core imple...
810
811
812
  
  	min_wait = min(read_wait, write_wait);
  	disptime = jiffies + min_wait;
e43473b7f   Vivek Goyal   blkio: Core imple...
813
  	/* Update dispatch time */
77216b048   Tejun Heo   blk-throttle: add...
814
  	throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
815
  	tg->disptime = disptime;
77216b048   Tejun Heo   blk-throttle: add...
816
  	throtl_enqueue_tg(tg);
0e9f4164b   Tejun Heo   blk-throttle: gen...
817
818
819
  
  	/* see throtl_add_bio_tg() */
  	tg->flags &= ~THROTL_TG_WAS_EMPTY;
e43473b7f   Vivek Goyal   blkio: Core imple...
820
  }
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
821
822
823
824
825
826
827
828
829
  static void start_parent_slice_with_credit(struct throtl_grp *child_tg,
  					struct throtl_grp *parent_tg, bool rw)
  {
  	if (throtl_slice_used(parent_tg, rw)) {
  		throtl_start_new_slice_with_credit(parent_tg, rw,
  				child_tg->slice_start[rw]);
  	}
  
  }
77216b048   Tejun Heo   blk-throttle: add...
830
  static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
e43473b7f   Vivek Goyal   blkio: Core imple...
831
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
832
  	struct throtl_service_queue *sq = &tg->service_queue;
6bc9c2b46   Tejun Heo   blk-throttle: mak...
833
834
  	struct throtl_service_queue *parent_sq = sq->parent_sq;
  	struct throtl_grp *parent_tg = sq_to_tg(parent_sq);
c5cc2070b   Tejun Heo   blk-throttle: add...
835
  	struct throtl_grp *tg_to_put = NULL;
e43473b7f   Vivek Goyal   blkio: Core imple...
836
  	struct bio *bio;
c5cc2070b   Tejun Heo   blk-throttle: add...
837
838
839
840
841
842
843
  	/*
  	 * @bio is being transferred from @tg to @parent_sq.  Popping a bio
  	 * from @tg may put its reference and @parent_sq might end up
  	 * getting released prematurely.  Remember the tg to put and put it
  	 * after @bio is transferred to @parent_sq.
  	 */
  	bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
73f0d49a9   Tejun Heo   blk-throttle: mov...
844
  	sq->nr_queued[rw]--;
e43473b7f   Vivek Goyal   blkio: Core imple...
845
846
  
  	throtl_charge_bio(tg, bio);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
847
848
849
850
851
852
853
854
855
  
  	/*
  	 * If our parent is another tg, we just need to transfer @bio to
  	 * the parent using throtl_add_bio_tg().  If our parent is
  	 * @td->service_queue, @bio is ready to be issued.  Put it on its
  	 * bio_lists[] and decrease total number queued.  The caller is
  	 * responsible for issuing these bios.
  	 */
  	if (parent_tg) {
c5cc2070b   Tejun Heo   blk-throttle: add...
856
  		throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg);
32ee5bc47   Vivek Goyal   blk-throttle: Acc...
857
  		start_parent_slice_with_credit(tg, parent_tg, rw);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
858
  	} else {
c5cc2070b   Tejun Heo   blk-throttle: add...
859
860
  		throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw],
  				     &parent_sq->queued[rw]);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
861
862
863
  		BUG_ON(tg->td->nr_queued[rw] <= 0);
  		tg->td->nr_queued[rw]--;
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
864

0f3457f60   Tejun Heo   blk-throttle: add...
865
  	throtl_trim_slice(tg, rw);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
866

c5cc2070b   Tejun Heo   blk-throttle: add...
867
868
  	if (tg_to_put)
  		blkg_put(tg_to_blkg(tg_to_put));
e43473b7f   Vivek Goyal   blkio: Core imple...
869
  }
77216b048   Tejun Heo   blk-throttle: add...
870
  static int throtl_dispatch_tg(struct throtl_grp *tg)
e43473b7f   Vivek Goyal   blkio: Core imple...
871
  {
73f0d49a9   Tejun Heo   blk-throttle: mov...
872
  	struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
873
874
  	unsigned int nr_reads = 0, nr_writes = 0;
  	unsigned int max_nr_reads = throtl_grp_quantum*3/4;
c2f6805d4   Vivek Goyal   blk-throttle: Fix...
875
  	unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
e43473b7f   Vivek Goyal   blkio: Core imple...
876
877
878
  	struct bio *bio;
  
  	/* Try to dispatch 75% READS and 25% WRITES */
c5cc2070b   Tejun Heo   blk-throttle: add...
879
  	while ((bio = throtl_peek_queued(&sq->queued[READ])) &&
0f3457f60   Tejun Heo   blk-throttle: add...
880
  	       tg_may_dispatch(tg, bio, NULL)) {
e43473b7f   Vivek Goyal   blkio: Core imple...
881

77216b048   Tejun Heo   blk-throttle: add...
882
  		tg_dispatch_one_bio(tg, bio_data_dir(bio));
e43473b7f   Vivek Goyal   blkio: Core imple...
883
884
885
886
887
  		nr_reads++;
  
  		if (nr_reads >= max_nr_reads)
  			break;
  	}
c5cc2070b   Tejun Heo   blk-throttle: add...
888
  	while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&
0f3457f60   Tejun Heo   blk-throttle: add...
889
  	       tg_may_dispatch(tg, bio, NULL)) {
e43473b7f   Vivek Goyal   blkio: Core imple...
890

77216b048   Tejun Heo   blk-throttle: add...
891
  		tg_dispatch_one_bio(tg, bio_data_dir(bio));
e43473b7f   Vivek Goyal   blkio: Core imple...
892
893
894
895
896
897
898
899
  		nr_writes++;
  
  		if (nr_writes >= max_nr_writes)
  			break;
  	}
  
  	return nr_reads + nr_writes;
  }
651930bc1   Tejun Heo   blk-throttle: dis...
900
  static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
e43473b7f   Vivek Goyal   blkio: Core imple...
901
902
  {
  	unsigned int nr_disp = 0;
e43473b7f   Vivek Goyal   blkio: Core imple...
903
904
  
  	while (1) {
73f0d49a9   Tejun Heo   blk-throttle: mov...
905
906
  		struct throtl_grp *tg = throtl_rb_first(parent_sq);
  		struct throtl_service_queue *sq = &tg->service_queue;
e43473b7f   Vivek Goyal   blkio: Core imple...
907
908
909
910
911
912
  
  		if (!tg)
  			break;
  
  		if (time_before(jiffies, tg->disptime))
  			break;
77216b048   Tejun Heo   blk-throttle: add...
913
  		throtl_dequeue_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
914

77216b048   Tejun Heo   blk-throttle: add...
915
  		nr_disp += throtl_dispatch_tg(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
916

73f0d49a9   Tejun Heo   blk-throttle: mov...
917
  		if (sq->nr_queued[0] || sq->nr_queued[1])
77216b048   Tejun Heo   blk-throttle: add...
918
  			tg_update_disptime(tg);
e43473b7f   Vivek Goyal   blkio: Core imple...
919
920
921
922
923
924
925
  
  		if (nr_disp >= throtl_quantum)
  			break;
  	}
  
  	return nr_disp;
  }
6e1a5704c   Tejun Heo   blk-throttle: dis...
926
927
928
929
930
931
932
  /**
   * throtl_pending_timer_fn - timer function for service_queue->pending_timer
   * @arg: the throtl_service_queue being serviced
   *
   * This timer is armed when a child throtl_grp with active bio's become
   * pending and queued on the service_queue's pending_tree and expires when
   * the first child throtl_grp should be dispatched.  This function
2e48a530a   Tejun Heo   blk-throttle: mak...
933
934
935
936
937
938
939
   * dispatches bio's from the children throtl_grps to the parent
   * service_queue.
   *
   * If the parent's parent is another throtl_grp, dispatching is propagated
   * by either arming its pending_timer or repeating dispatch directly.  If
   * the top-level service_tree is reached, throtl_data->dispatch_work is
   * kicked so that the ready bio's are issued.
6e1a5704c   Tejun Heo   blk-throttle: dis...
940
   */
69df0ab03   Tejun Heo   blk-throttle: sep...
941
942
943
  static void throtl_pending_timer_fn(unsigned long arg)
  {
  	struct throtl_service_queue *sq = (void *)arg;
2e48a530a   Tejun Heo   blk-throttle: mak...
944
  	struct throtl_grp *tg = sq_to_tg(sq);
69df0ab03   Tejun Heo   blk-throttle: sep...
945
  	struct throtl_data *td = sq_to_td(sq);
cb76199c3   Tejun Heo   blk-throttle: col...
946
  	struct request_queue *q = td->queue;
2e48a530a   Tejun Heo   blk-throttle: mak...
947
948
  	struct throtl_service_queue *parent_sq;
  	bool dispatched;
6e1a5704c   Tejun Heo   blk-throttle: dis...
949
  	int ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
950
951
  
  	spin_lock_irq(q->queue_lock);
2e48a530a   Tejun Heo   blk-throttle: mak...
952
953
954
  again:
  	parent_sq = sq->parent_sq;
  	dispatched = false;
e43473b7f   Vivek Goyal   blkio: Core imple...
955

7f52f98c2   Tejun Heo   blk-throttle: imp...
956
957
  	while (true) {
  		throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u",
2e48a530a   Tejun Heo   blk-throttle: mak...
958
959
  			   sq->nr_queued[READ] + sq->nr_queued[WRITE],
  			   sq->nr_queued[READ], sq->nr_queued[WRITE]);
7f52f98c2   Tejun Heo   blk-throttle: imp...
960
961
962
  
  		ret = throtl_select_dispatch(sq);
  		if (ret) {
7f52f98c2   Tejun Heo   blk-throttle: imp...
963
964
965
  			throtl_log(sq, "bios disp=%u", ret);
  			dispatched = true;
  		}
e43473b7f   Vivek Goyal   blkio: Core imple...
966

7f52f98c2   Tejun Heo   blk-throttle: imp...
967
968
  		if (throtl_schedule_next_dispatch(sq, false))
  			break;
e43473b7f   Vivek Goyal   blkio: Core imple...
969

7f52f98c2   Tejun Heo   blk-throttle: imp...
970
971
972
973
  		/* this dispatch windows is still open, relax and repeat */
  		spin_unlock_irq(q->queue_lock);
  		cpu_relax();
  		spin_lock_irq(q->queue_lock);
651930bc1   Tejun Heo   blk-throttle: dis...
974
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
975

2e48a530a   Tejun Heo   blk-throttle: mak...
976
977
  	if (!dispatched)
  		goto out_unlock;
6e1a5704c   Tejun Heo   blk-throttle: dis...
978

2e48a530a   Tejun Heo   blk-throttle: mak...
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
  	if (parent_sq) {
  		/* @parent_sq is another throl_grp, propagate dispatch */
  		if (tg->flags & THROTL_TG_WAS_EMPTY) {
  			tg_update_disptime(tg);
  			if (!throtl_schedule_next_dispatch(parent_sq, false)) {
  				/* window is already open, repeat dispatching */
  				sq = parent_sq;
  				tg = sq_to_tg(sq);
  				goto again;
  			}
  		}
  	} else {
  		/* reached the top-level, queue issueing */
  		queue_work(kthrotld_workqueue, &td->dispatch_work);
  	}
  out_unlock:
e43473b7f   Vivek Goyal   blkio: Core imple...
995
  	spin_unlock_irq(q->queue_lock);
6e1a5704c   Tejun Heo   blk-throttle: dis...
996
  }
e43473b7f   Vivek Goyal   blkio: Core imple...
997

6e1a5704c   Tejun Heo   blk-throttle: dis...
998
999
1000
1001
1002
1003
1004
1005
  /**
   * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
   * @work: work item being executed
   *
   * This function is queued for execution when bio's reach the bio_lists[]
   * of throtl_data->service_queue.  Those bio's are ready and issued by this
   * function.
   */
8876e140e   Fabian Frederick   block/blk-throttl...
1006
  static void blk_throtl_dispatch_work_fn(struct work_struct *work)
6e1a5704c   Tejun Heo   blk-throttle: dis...
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
  {
  	struct throtl_data *td = container_of(work, struct throtl_data,
  					      dispatch_work);
  	struct throtl_service_queue *td_sq = &td->service_queue;
  	struct request_queue *q = td->queue;
  	struct bio_list bio_list_on_stack;
  	struct bio *bio;
  	struct blk_plug plug;
  	int rw;
  
  	bio_list_init(&bio_list_on_stack);
  
  	spin_lock_irq(q->queue_lock);
c5cc2070b   Tejun Heo   blk-throttle: add...
1020
1021
1022
  	for (rw = READ; rw <= WRITE; rw++)
  		while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL)))
  			bio_list_add(&bio_list_on_stack, bio);
6e1a5704c   Tejun Heo   blk-throttle: dis...
1023
1024
1025
  	spin_unlock_irq(q->queue_lock);
  
  	if (!bio_list_empty(&bio_list_on_stack)) {
69d60eb96   Vivek Goyal   blk-throttle: Use...
1026
  		blk_start_plug(&plug);
e43473b7f   Vivek Goyal   blkio: Core imple...
1027
1028
  		while((bio = bio_list_pop(&bio_list_on_stack)))
  			generic_make_request(bio);
69d60eb96   Vivek Goyal   blk-throttle: Use...
1029
  		blk_finish_plug(&plug);
e43473b7f   Vivek Goyal   blkio: Core imple...
1030
  	}
e43473b7f   Vivek Goyal   blkio: Core imple...
1031
  }
f95a04afa   Tejun Heo   blkcg: embed stru...
1032
1033
  static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
  			      int off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1034
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
1035
1036
  	struct throtl_grp *tg = pd_to_tg(pd);
  	u64 v = *(u64 *)((void *)tg + off);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1037

af133ceb2   Tejun Heo   blkcg: move blkio...
1038
  	if (v == -1)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1039
  		return 0;
f95a04afa   Tejun Heo   blkcg: embed stru...
1040
  	return __blkg_prfill_u64(sf, pd, v);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1041
  }
f95a04afa   Tejun Heo   blkcg: embed stru...
1042
1043
  static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
  			       int off)
e43473b7f   Vivek Goyal   blkio: Core imple...
1044
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
1045
1046
  	struct throtl_grp *tg = pd_to_tg(pd);
  	unsigned int v = *(unsigned int *)((void *)tg + off);
fe0714377   Vivek Goyal   blkio: Recalculat...
1047

af133ceb2   Tejun Heo   blkcg: move blkio...
1048
1049
  	if (v == -1)
  		return 0;
f95a04afa   Tejun Heo   blkcg: embed stru...
1050
  	return __blkg_prfill_u64(sf, pd, v);
e43473b7f   Vivek Goyal   blkio: Core imple...
1051
  }
2da8ca822   Tejun Heo   cgroup: replace c...
1052
  static int tg_print_conf_u64(struct seq_file *sf, void *v)
8e89d13f4   Vivek Goyal   blkio: Implementa...
1053
  {
2da8ca822   Tejun Heo   cgroup: replace c...
1054
1055
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
af133ceb2   Tejun Heo   blkcg: move blkio...
1056
  	return 0;
8e89d13f4   Vivek Goyal   blkio: Implementa...
1057
  }
2da8ca822   Tejun Heo   cgroup: replace c...
1058
  static int tg_print_conf_uint(struct seq_file *sf, void *v)
8e89d13f4   Vivek Goyal   blkio: Implementa...
1059
  {
2da8ca822   Tejun Heo   cgroup: replace c...
1060
1061
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
af133ceb2   Tejun Heo   blkcg: move blkio...
1062
  	return 0;
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1063
  }
69948b070   Tejun Heo   blkcg: separate o...
1064
  static void tg_conf_updated(struct throtl_grp *tg)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1065
  {
69948b070   Tejun Heo   blkcg: separate o...
1066
  	struct throtl_service_queue *sq = &tg->service_queue;
492eb21b9   Tejun Heo   cgroup: make hier...
1067
  	struct cgroup_subsys_state *pos_css;
69948b070   Tejun Heo   blkcg: separate o...
1068
  	struct blkcg_gq *blkg;
af133ceb2   Tejun Heo   blkcg: move blkio...
1069

fda6f272c   Tejun Heo   blk-throttle: imp...
1070
1071
1072
1073
  	throtl_log(&tg->service_queue,
  		   "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
  		   tg->bps[READ], tg->bps[WRITE],
  		   tg->iops[READ], tg->iops[WRITE]);
632b44935   Tejun Heo   blk-throttle: rem...
1074
1075
  
  	/*
693e751e7   Tejun Heo   blk-throttle: imp...
1076
1077
1078
1079
1080
1081
  	 * Update has_rules[] flags for the updated tg's subtree.  A tg is
  	 * considered to have rules if either the tg itself or any of its
  	 * ancestors has rules.  This identifies groups without any
  	 * restrictions in the whole hierarchy and allows them to bypass
  	 * blk-throttle.
  	 */
69948b070   Tejun Heo   blkcg: separate o...
1082
  	blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
693e751e7   Tejun Heo   blk-throttle: imp...
1083
1084
1085
  		tg_update_has_rules(blkg_to_tg(blkg));
  
  	/*
632b44935   Tejun Heo   blk-throttle: rem...
1086
1087
1088
1089
1090
1091
1092
  	 * We're already holding queue_lock and know @tg is valid.  Let's
  	 * apply the new config directly.
  	 *
  	 * Restart the slices for both READ and WRITES. It might happen
  	 * that a group's limit are dropped suddenly and we don't want to
  	 * account recently dispatched IO with new low rate.
  	 */
0f3457f60   Tejun Heo   blk-throttle: add...
1093
1094
  	throtl_start_new_slice(tg, 0);
  	throtl_start_new_slice(tg, 1);
632b44935   Tejun Heo   blk-throttle: rem...
1095

5b2c16aae   Tejun Heo   blk-throttle: sim...
1096
  	if (tg->flags & THROTL_TG_PENDING) {
77216b048   Tejun Heo   blk-throttle: add...
1097
  		tg_update_disptime(tg);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1098
  		throtl_schedule_next_dispatch(sq->parent_sq, true);
632b44935   Tejun Heo   blk-throttle: rem...
1099
  	}
69948b070   Tejun Heo   blkcg: separate o...
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
  }
  
  static ssize_t tg_set_conf(struct kernfs_open_file *of,
  			   char *buf, size_t nbytes, loff_t off, bool is_u64)
  {
  	struct blkcg *blkcg = css_to_blkcg(of_css(of));
  	struct blkg_conf_ctx ctx;
  	struct throtl_grp *tg;
  	int ret;
  	u64 v;
  
  	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
  	if (ret)
  		return ret;
  
  	ret = -EINVAL;
  	if (sscanf(ctx.body, "%llu", &v) != 1)
  		goto out_finish;
  	if (!v)
  		v = -1;
  
  	tg = blkg_to_tg(ctx.blkg);
  
  	if (is_u64)
  		*(u64 *)((void *)tg + of_cft(of)->private) = v;
  	else
  		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1127

69948b070   Tejun Heo   blkcg: separate o...
1128
  	tg_conf_updated(tg);
36aa9e5f5   Tejun Heo   blkcg: move body ...
1129
1130
  	ret = 0;
  out_finish:
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1131
  	blkg_conf_finish(&ctx);
36aa9e5f5   Tejun Heo   blkcg: move body ...
1132
  	return ret ?: nbytes;
8e89d13f4   Vivek Goyal   blkio: Implementa...
1133
  }
451af504d   Tejun Heo   cgroup: replace c...
1134
1135
  static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
  			       char *buf, size_t nbytes, loff_t off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1136
  {
451af504d   Tejun Heo   cgroup: replace c...
1137
  	return tg_set_conf(of, buf, nbytes, off, true);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1138
  }
451af504d   Tejun Heo   cgroup: replace c...
1139
1140
  static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
  				char *buf, size_t nbytes, loff_t off)
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1141
  {
451af504d   Tejun Heo   cgroup: replace c...
1142
  	return tg_set_conf(of, buf, nbytes, off, false);
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1143
  }
880f50e22   Tejun Heo   blkcg: mark exist...
1144
  static struct cftype throtl_legacy_files[] = {
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1145
1146
  	{
  		.name = "throttle.read_bps_device",
af133ceb2   Tejun Heo   blkcg: move blkio...
1147
  		.private = offsetof(struct throtl_grp, bps[READ]),
2da8ca822   Tejun Heo   cgroup: replace c...
1148
  		.seq_show = tg_print_conf_u64,
451af504d   Tejun Heo   cgroup: replace c...
1149
  		.write = tg_set_conf_u64,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1150
1151
1152
  	},
  	{
  		.name = "throttle.write_bps_device",
af133ceb2   Tejun Heo   blkcg: move blkio...
1153
  		.private = offsetof(struct throtl_grp, bps[WRITE]),
2da8ca822   Tejun Heo   cgroup: replace c...
1154
  		.seq_show = tg_print_conf_u64,
451af504d   Tejun Heo   cgroup: replace c...
1155
  		.write = tg_set_conf_u64,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1156
1157
1158
  	},
  	{
  		.name = "throttle.read_iops_device",
af133ceb2   Tejun Heo   blkcg: move blkio...
1159
  		.private = offsetof(struct throtl_grp, iops[READ]),
2da8ca822   Tejun Heo   cgroup: replace c...
1160
  		.seq_show = tg_print_conf_uint,
451af504d   Tejun Heo   cgroup: replace c...
1161
  		.write = tg_set_conf_uint,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1162
1163
1164
  	},
  	{
  		.name = "throttle.write_iops_device",
af133ceb2   Tejun Heo   blkcg: move blkio...
1165
  		.private = offsetof(struct throtl_grp, iops[WRITE]),
2da8ca822   Tejun Heo   cgroup: replace c...
1166
  		.seq_show = tg_print_conf_uint,
451af504d   Tejun Heo   cgroup: replace c...
1167
  		.write = tg_set_conf_uint,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1168
1169
1170
  	},
  	{
  		.name = "throttle.io_service_bytes",
77ea73388   Tejun Heo   blkcg: move io_se...
1171
1172
  		.private = (unsigned long)&blkcg_policy_throtl,
  		.seq_show = blkg_print_stat_bytes,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1173
1174
1175
  	},
  	{
  		.name = "throttle.io_serviced",
77ea73388   Tejun Heo   blkcg: move io_se...
1176
1177
  		.private = (unsigned long)&blkcg_policy_throtl,
  		.seq_show = blkg_print_stat_ios,
60c2bc2d5   Tejun Heo   blkcg: move conf/...
1178
1179
1180
  	},
  	{ }	/* terminate */
  };
2ee867dcf   Tejun Heo   blkcg: implement ...
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
  static u64 tg_prfill_max(struct seq_file *sf, struct blkg_policy_data *pd,
  			 int off)
  {
  	struct throtl_grp *tg = pd_to_tg(pd);
  	const char *dname = blkg_dev_name(pd->blkg);
  	char bufs[4][21] = { "max", "max", "max", "max" };
  
  	if (!dname)
  		return 0;
  	if (tg->bps[READ] == -1 && tg->bps[WRITE] == -1 &&
  	    tg->iops[READ] == -1 && tg->iops[WRITE] == -1)
  		return 0;
  
  	if (tg->bps[READ] != -1)
  		snprintf(bufs[0], sizeof(bufs[0]), "%llu", tg->bps[READ]);
  	if (tg->bps[WRITE] != -1)
  		snprintf(bufs[1], sizeof(bufs[1]), "%llu", tg->bps[WRITE]);
  	if (tg->iops[READ] != -1)
  		snprintf(bufs[2], sizeof(bufs[2]), "%u", tg->iops[READ]);
  	if (tg->iops[WRITE] != -1)
  		snprintf(bufs[3], sizeof(bufs[3]), "%u", tg->iops[WRITE]);
  
  	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s
  ",
  		   dname, bufs[0], bufs[1], bufs[2], bufs[3]);
  	return 0;
  }
  
  static int tg_print_max(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_max,
  			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
  	return 0;
  }
  
  static ssize_t tg_set_max(struct kernfs_open_file *of,
  			  char *buf, size_t nbytes, loff_t off)
  {
  	struct blkcg *blkcg = css_to_blkcg(of_css(of));
  	struct blkg_conf_ctx ctx;
  	struct throtl_grp *tg;
  	u64 v[4];
  	int ret;
  
  	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
  	if (ret)
  		return ret;
  
  	tg = blkg_to_tg(ctx.blkg);
  
  	v[0] = tg->bps[READ];
  	v[1] = tg->bps[WRITE];
  	v[2] = tg->iops[READ];
  	v[3] = tg->iops[WRITE];
  
  	while (true) {
  		char tok[27];	/* wiops=18446744073709551616 */
  		char *p;
  		u64 val = -1;
  		int len;
  
  		if (sscanf(ctx.body, "%26s%n", tok, &len) != 1)
  			break;
  		if (tok[0] == '\0')
  			break;
  		ctx.body += len;
  
  		ret = -EINVAL;
  		p = tok;
  		strsep(&p, "=");
  		if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max")))
  			goto out_finish;
  
  		ret = -ERANGE;
  		if (!val)
  			goto out_finish;
  
  		ret = -EINVAL;
  		if (!strcmp(tok, "rbps"))
  			v[0] = val;
  		else if (!strcmp(tok, "wbps"))
  			v[1] = val;
  		else if (!strcmp(tok, "riops"))
  			v[2] = min_t(u64, val, UINT_MAX);
  		else if (!strcmp(tok, "wiops"))
  			v[3] = min_t(u64, val, UINT_MAX);
  		else
  			goto out_finish;
  	}
  
  	tg->bps[READ] = v[0];
  	tg->bps[WRITE] = v[1];
  	tg->iops[READ] = v[2];
  	tg->iops[WRITE] = v[3];
  
  	tg_conf_updated(tg);
  	ret = 0;
  out_finish:
  	blkg_conf_finish(&ctx);
  	return ret ?: nbytes;
  }
  
  static struct cftype throtl_files[] = {
  	{
  		.name = "max",
  		.flags = CFTYPE_NOT_ON_ROOT,
  		.seq_show = tg_print_max,
  		.write = tg_set_max,
  	},
  	{ }	/* terminate */
  };
da5277700   Vivek Goyal   block: Move blk_t...
1292
  static void throtl_shutdown_wq(struct request_queue *q)
e43473b7f   Vivek Goyal   blkio: Core imple...
1293
1294
  {
  	struct throtl_data *td = q->td;
69df0ab03   Tejun Heo   blk-throttle: sep...
1295
  	cancel_work_sync(&td->dispatch_work);
e43473b7f   Vivek Goyal   blkio: Core imple...
1296
  }
3c798398e   Tejun Heo   blkcg: mass renam...
1297
  static struct blkcg_policy blkcg_policy_throtl = {
2ee867dcf   Tejun Heo   blkcg: implement ...
1298
  	.dfl_cftypes		= throtl_files,
880f50e22   Tejun Heo   blkcg: mark exist...
1299
  	.legacy_cftypes		= throtl_legacy_files,
f9fcc2d39   Tejun Heo   blkcg: collapse b...
1300

001bea73e   Tejun Heo   blkcg: replace bl...
1301
  	.pd_alloc_fn		= throtl_pd_alloc,
f9fcc2d39   Tejun Heo   blkcg: collapse b...
1302
  	.pd_init_fn		= throtl_pd_init,
693e751e7   Tejun Heo   blk-throttle: imp...
1303
  	.pd_online_fn		= throtl_pd_online,
001bea73e   Tejun Heo   blkcg: replace bl...
1304
  	.pd_free_fn		= throtl_pd_free,
e43473b7f   Vivek Goyal   blkio: Core imple...
1305
  };
ae1188963   Tejun Heo   blkcg: consolidat...
1306
1307
  bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
  		    struct bio *bio)
e43473b7f   Vivek Goyal   blkio: Core imple...
1308
  {
c5cc2070b   Tejun Heo   blk-throttle: add...
1309
  	struct throtl_qnode *qn = NULL;
ae1188963   Tejun Heo   blkcg: consolidat...
1310
  	struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
73f0d49a9   Tejun Heo   blk-throttle: mov...
1311
  	struct throtl_service_queue *sq;
0e9f4164b   Tejun Heo   blk-throttle: gen...
1312
  	bool rw = bio_data_dir(bio);
bc16a4f93   Tejun Heo   block: reorganize...
1313
  	bool throttled = false;
e43473b7f   Vivek Goyal   blkio: Core imple...
1314

ae1188963   Tejun Heo   blkcg: consolidat...
1315
  	WARN_ON_ONCE(!rcu_read_lock_held());
2a0f61e6e   Tejun Heo   blk-throttle: set...
1316
  	/* see throtl_charge_bio() */
1eff9d322   Jens Axboe   block: rename bio...
1317
  	if ((bio->bi_opf & REQ_THROTTLED) || !tg->has_rules[rw])
bc16a4f93   Tejun Heo   block: reorganize...
1318
  		goto out;
e43473b7f   Vivek Goyal   blkio: Core imple...
1319
1320
  
  	spin_lock_irq(q->queue_lock);
c9589f03e   Tejun Heo   blk-throttle: imp...
1321
1322
  
  	if (unlikely(blk_queue_bypass(q)))
bc16a4f93   Tejun Heo   block: reorganize...
1323
  		goto out_unlock;
f469a7b4d   Vivek Goyal   blk-cgroup: Allow...
1324

73f0d49a9   Tejun Heo   blk-throttle: mov...
1325
  	sq = &tg->service_queue;
9e660acff   Tejun Heo   blk-throttle: mak...
1326
1327
1328
1329
  	while (true) {
  		/* throtl is FIFO - if bios are already queued, should queue */
  		if (sq->nr_queued[rw])
  			break;
de701c74a   Vivek Goyal   blk-throttle: Som...
1330

9e660acff   Tejun Heo   blk-throttle: mak...
1331
1332
1333
1334
1335
  		/* if above limits, break to queue */
  		if (!tg_may_dispatch(tg, bio, NULL))
  			break;
  
  		/* within limits, let's charge and dispatch directly */
e43473b7f   Vivek Goyal   blkio: Core imple...
1336
  		throtl_charge_bio(tg, bio);
04521db04   Vivek Goyal   blk-throttle: Res...
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
  
  		/*
  		 * We need to trim slice even when bios are not being queued
  		 * otherwise it might happen that a bio is not queued for
  		 * a long time and slice keeps on extending and trim is not
  		 * called for a long time. Now if limits are reduced suddenly
  		 * we take into account all the IO dispatched so far at new
  		 * low rate and * newly queued IO gets a really long dispatch
  		 * time.
  		 *
  		 * So keep on trimming slice even if bio is not queued.
  		 */
0f3457f60   Tejun Heo   blk-throttle: add...
1349
  		throtl_trim_slice(tg, rw);
9e660acff   Tejun Heo   blk-throttle: mak...
1350
1351
1352
1353
1354
1355
  
  		/*
  		 * @bio passed through this layer without being throttled.
  		 * Climb up the ladder.  If we''re already at the top, it
  		 * can be executed directly.
  		 */
c5cc2070b   Tejun Heo   blk-throttle: add...
1356
  		qn = &tg->qnode_on_parent[rw];
9e660acff   Tejun Heo   blk-throttle: mak...
1357
1358
1359
1360
  		sq = sq->parent_sq;
  		tg = sq_to_tg(sq);
  		if (!tg)
  			goto out_unlock;
e43473b7f   Vivek Goyal   blkio: Core imple...
1361
  	}
9e660acff   Tejun Heo   blk-throttle: mak...
1362
  	/* out-of-limit, queue to @tg */
fda6f272c   Tejun Heo   blk-throttle: imp...
1363
1364
  	throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
  		   rw == READ ? 'R' : 'W',
4f024f379   Kent Overstreet   block: Abstract o...
1365
  		   tg->bytes_disp[rw], bio->bi_iter.bi_size, tg->bps[rw],
fda6f272c   Tejun Heo   blk-throttle: imp...
1366
1367
  		   tg->io_disp[rw], tg->iops[rw],
  		   sq->nr_queued[READ], sq->nr_queued[WRITE]);
e43473b7f   Vivek Goyal   blkio: Core imple...
1368

671058fb2   Tejun Heo   block: make blk-t...
1369
  	bio_associate_current(bio);
6bc9c2b46   Tejun Heo   blk-throttle: mak...
1370
  	tg->td->nr_queued[rw]++;
c5cc2070b   Tejun Heo   blk-throttle: add...
1371
  	throtl_add_bio_tg(bio, qn, tg);
bc16a4f93   Tejun Heo   block: reorganize...
1372
  	throttled = true;
e43473b7f   Vivek Goyal   blkio: Core imple...
1373

7f52f98c2   Tejun Heo   blk-throttle: imp...
1374
1375
1376
1377
1378
1379
  	/*
  	 * Update @tg's dispatch time and force schedule dispatch if @tg
  	 * was empty before @bio.  The forced scheduling isn't likely to
  	 * cause undue delay as @bio is likely to be dispatched directly if
  	 * its @tg's disptime is not in the future.
  	 */
0e9f4164b   Tejun Heo   blk-throttle: gen...
1380
  	if (tg->flags & THROTL_TG_WAS_EMPTY) {
77216b048   Tejun Heo   blk-throttle: add...
1381
  		tg_update_disptime(tg);
7f52f98c2   Tejun Heo   blk-throttle: imp...
1382
  		throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true);
e43473b7f   Vivek Goyal   blkio: Core imple...
1383
  	}
bc16a4f93   Tejun Heo   block: reorganize...
1384
  out_unlock:
e43473b7f   Vivek Goyal   blkio: Core imple...
1385
  	spin_unlock_irq(q->queue_lock);
bc16a4f93   Tejun Heo   block: reorganize...
1386
  out:
2a0f61e6e   Tejun Heo   blk-throttle: set...
1387
1388
1389
1390
1391
1392
  	/*
  	 * As multiple blk-throtls may stack in the same issue path, we
  	 * don't want bios to leave with the flag set.  Clear the flag if
  	 * being issued.
  	 */
  	if (!throttled)
1eff9d322   Jens Axboe   block: rename bio...
1393
  		bio->bi_opf &= ~REQ_THROTTLED;
bc16a4f93   Tejun Heo   block: reorganize...
1394
  	return throttled;
e43473b7f   Vivek Goyal   blkio: Core imple...
1395
  }
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
  /*
   * Dispatch all bios from all children tg's queued on @parent_sq.  On
   * return, @parent_sq is guaranteed to not have any active children tg's
   * and all bios from previously active tg's are on @parent_sq->bio_lists[].
   */
  static void tg_drain_bios(struct throtl_service_queue *parent_sq)
  {
  	struct throtl_grp *tg;
  
  	while ((tg = throtl_rb_first(parent_sq))) {
  		struct throtl_service_queue *sq = &tg->service_queue;
  		struct bio *bio;
  
  		throtl_dequeue_tg(tg);
c5cc2070b   Tejun Heo   blk-throttle: add...
1410
  		while ((bio = throtl_peek_queued(&sq->queued[READ])))
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1411
  			tg_dispatch_one_bio(tg, bio_data_dir(bio));
c5cc2070b   Tejun Heo   blk-throttle: add...
1412
  		while ((bio = throtl_peek_queued(&sq->queued[WRITE])))
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1413
1414
1415
  			tg_dispatch_one_bio(tg, bio_data_dir(bio));
  	}
  }
c9a929dde   Tejun Heo   block: fix reques...
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
  /**
   * blk_throtl_drain - drain throttled bios
   * @q: request_queue to drain throttled bios for
   *
   * Dispatch all currently throttled bios on @q through ->make_request_fn().
   */
  void blk_throtl_drain(struct request_queue *q)
  	__releases(q->queue_lock) __acquires(q->queue_lock)
  {
  	struct throtl_data *td = q->td;
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1426
  	struct blkcg_gq *blkg;
492eb21b9   Tejun Heo   cgroup: make hier...
1427
  	struct cgroup_subsys_state *pos_css;
c9a929dde   Tejun Heo   block: fix reques...
1428
  	struct bio *bio;
651930bc1   Tejun Heo   blk-throttle: dis...
1429
  	int rw;
c9a929dde   Tejun Heo   block: fix reques...
1430

8bcb6c7d4   Andi Kleen   block: use lockde...
1431
  	queue_lockdep_assert_held(q);
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1432
  	rcu_read_lock();
c9a929dde   Tejun Heo   block: fix reques...
1433

2a12f0dcd   Tejun Heo   blk-throttle: mak...
1434
1435
1436
1437
1438
1439
  	/*
  	 * Drain each tg while doing post-order walk on the blkg tree, so
  	 * that all bios are propagated to td->service_queue.  It'd be
  	 * better to walk service_queue tree directly but blkg walk is
  	 * easier.
  	 */
492eb21b9   Tejun Heo   cgroup: make hier...
1440
  	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1441
  		tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
73f0d49a9   Tejun Heo   blk-throttle: mov...
1442

2a12f0dcd   Tejun Heo   blk-throttle: mak...
1443
1444
1445
1446
  	/* finally, transfer bios from top-level tg's into the td */
  	tg_drain_bios(&td->service_queue);
  
  	rcu_read_unlock();
c9a929dde   Tejun Heo   block: fix reques...
1447
  	spin_unlock_irq(q->queue_lock);
2a12f0dcd   Tejun Heo   blk-throttle: mak...
1448
  	/* all bios now should be in td->service_queue, issue them */
651930bc1   Tejun Heo   blk-throttle: dis...
1449
  	for (rw = READ; rw <= WRITE; rw++)
c5cc2070b   Tejun Heo   blk-throttle: add...
1450
1451
  		while ((bio = throtl_pop_queued(&td->service_queue.queued[rw],
  						NULL)))
651930bc1   Tejun Heo   blk-throttle: dis...
1452
  			generic_make_request(bio);
c9a929dde   Tejun Heo   block: fix reques...
1453
1454
1455
  
  	spin_lock_irq(q->queue_lock);
  }
e43473b7f   Vivek Goyal   blkio: Core imple...
1456
1457
1458
  int blk_throtl_init(struct request_queue *q)
  {
  	struct throtl_data *td;
a2b1693ba   Tejun Heo   blkcg: implement ...
1459
  	int ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
1460
1461
1462
1463
  
  	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
  	if (!td)
  		return -ENOMEM;
69df0ab03   Tejun Heo   blk-throttle: sep...
1464
  	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
b2ce2643c   Tejun Heo   blk-throttle: cle...
1465
  	throtl_service_queue_init(&td->service_queue);
e43473b7f   Vivek Goyal   blkio: Core imple...
1466

cd1604fab   Tejun Heo   blkcg: factor out...
1467
  	q->td = td;
29b125892   Vivek Goyal   blk-throttle: Dyn...
1468
  	td->queue = q;
02977e4af   Vivek Goyal   blkio: Add root g...
1469

a2b1693ba   Tejun Heo   blkcg: implement ...
1470
  	/* activate policy */
3c798398e   Tejun Heo   blkcg: mass renam...
1471
  	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
a2b1693ba   Tejun Heo   blkcg: implement ...
1472
  	if (ret)
f51b802c1   Tejun Heo   blkcg: use the us...
1473
  		kfree(td);
a2b1693ba   Tejun Heo   blkcg: implement ...
1474
  	return ret;
e43473b7f   Vivek Goyal   blkio: Core imple...
1475
1476
1477
1478
  }
  
  void blk_throtl_exit(struct request_queue *q)
  {
c875f4d02   Tejun Heo   blkcg: drop unnec...
1479
  	BUG_ON(!q->td);
da5277700   Vivek Goyal   block: Move blk_t...
1480
  	throtl_shutdown_wq(q);
3c798398e   Tejun Heo   blkcg: mass renam...
1481
  	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
c9a929dde   Tejun Heo   block: fix reques...
1482
  	kfree(q->td);
e43473b7f   Vivek Goyal   blkio: Core imple...
1483
1484
1485
1486
  }
  
  static int __init throtl_init(void)
  {
450adcbe5   Vivek Goyal   blk-throttle: Do ...
1487
1488
1489
1490
  	kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
  	if (!kthrotld_workqueue)
  		panic("Failed to create kthrotld
  ");
3c798398e   Tejun Heo   blkcg: mass renam...
1491
  	return blkcg_policy_register(&blkcg_policy_throtl);
e43473b7f   Vivek Goyal   blkio: Core imple...
1492
1493
1494
  }
  
  module_init(throtl_init);