Blame view

block/blk-cgroup.c 35.8 KB
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Common Block IO controller cgroup interface
   *
   * Based on ideas and code from CFQ, CFS and BFQ:
   * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
   *
   * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
   *		      Paolo Valente <paolo.valente@unimore.it>
   *
   * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
   * 	              Nauman Rafique <nauman@google.com>
e48453c38   Arianna Avanzini   block, cgroup: im...
12
13
14
15
   *
   * For policy-specific per-blkcg data:
   * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
   *                    Arianna Avanzini <avanzini.arianna@gmail.com>
31e4c28d9   Vivek Goyal   blkio: Introduce ...
16
17
   */
  #include <linux/ioprio.h>
220841906   Vivek Goyal   blkio: Export dis...
18
  #include <linux/kdev_t.h>
9d6a986c0   Vivek Goyal   blkio: Export som...
19
  #include <linux/module.h>
accee7854   Stephen Rothwell   block: include li...
20
  #include <linux/err.h>
9195291e5   Divyesh Shah   blkio: Increment ...
21
  #include <linux/blkdev.h>
52ebea749   Tejun Heo   writeback: make b...
22
  #include <linux/backing-dev.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
23
  #include <linux/slab.h>
34d0f179d   Gui Jianfeng   io-controller: Ad...
24
  #include <linux/genhd.h>
72e06c255   Tejun Heo   blkcg: shoot down...
25
  #include <linux/delay.h>
9a9e8a26d   Tejun Heo   blkcg: add blkcg->id
26
  #include <linux/atomic.h>
36aa9e5f5   Tejun Heo   blkcg: move body ...
27
  #include <linux/ctype.h>
eea8f41cc   Tejun Heo   blkcg: move block...
28
  #include <linux/blk-cgroup.h>
5efd61135   Tejun Heo   blkcg: add blkcg_...
29
  #include "blk.h"
3e2520668   Vivek Goyal   blkio: Implement ...
30

84c124da9   Divyesh Shah   blkio: Changes to...
31
  #define MAX_KEY_LEN 100
838f13bf4   Tejun Heo   blkcg: allow blkc...
32
33
34
35
36
37
38
39
  /*
   * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
   * blkcg_pol_register_mutex nests outside of it and synchronizes entire
   * policy [un]register operations including cgroup file additions /
   * removals.  Putting cgroup file registration outside blkcg_pol_mutex
   * allows grabbing it from cgroup callbacks.
   */
  static DEFINE_MUTEX(blkcg_pol_register_mutex);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
40
  static DEFINE_MUTEX(blkcg_pol_mutex);
923adde1b   Tejun Heo   blkcg: clear all ...
41

e48453c38   Arianna Avanzini   block, cgroup: im...
42
  struct blkcg blkcg_root;
3c798398e   Tejun Heo   blkcg: mass renam...
43
  EXPORT_SYMBOL_GPL(blkcg_root);
9d6a986c0   Vivek Goyal   blkio: Export som...
44

496d5e756   Tejun Heo   blkcg: add blkcg_...
45
  struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
3c798398e   Tejun Heo   blkcg: mass renam...
46
  static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
035d10b2f   Tejun Heo   blkcg: add blkio_...
47

7876f930d   Tejun Heo   blkcg: implement ...
48
  static LIST_HEAD(all_blkcgs);		/* protected by blkcg_pol_mutex */
a2b1693ba   Tejun Heo   blkcg: implement ...
49
  static bool blkcg_policy_enabled(struct request_queue *q,
3c798398e   Tejun Heo   blkcg: mass renam...
50
  				 const struct blkcg_policy *pol)
a2b1693ba   Tejun Heo   blkcg: implement ...
51
52
53
  {
  	return pol && test_bit(pol->plid, q->blkcg_pols);
  }
0381411e4   Tejun Heo   blkcg: let blkcg ...
54
55
56
57
58
59
  /**
   * blkg_free - free a blkg
   * @blkg: blkg to free
   *
   * Free @blkg which may be partially allocated.
   */
3c798398e   Tejun Heo   blkcg: mass renam...
60
  static void blkg_free(struct blkcg_gq *blkg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
61
  {
e8989fae3   Tejun Heo   blkcg: unify blkg...
62
  	int i;
549d3aa87   Tejun Heo   blkcg: make blkg-...
63
64
65
  
  	if (!blkg)
  		return;
db6136703   Tejun Heo   blkcg: invoke blk...
66
  	for (i = 0; i < BLKCG_MAX_POLS; i++)
001bea73e   Tejun Heo   blkcg: replace bl...
67
68
  		if (blkg->pd[i])
  			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
e8989fae3   Tejun Heo   blkcg: unify blkg...
69

994b78327   Tejun Heo   blkcg: use blkg_f...
70
71
  	if (blkg->blkcg != &blkcg_root)
  		blk_exit_rl(&blkg->rl);
77ea73388   Tejun Heo   blkcg: move io_se...
72
73
74
  
  	blkg_rwstat_exit(&blkg->stat_ios);
  	blkg_rwstat_exit(&blkg->stat_bytes);
549d3aa87   Tejun Heo   blkcg: make blkg-...
75
  	kfree(blkg);
0381411e4   Tejun Heo   blkcg: let blkcg ...
76
77
78
79
80
81
  }
  
  /**
   * blkg_alloc - allocate a blkg
   * @blkcg: block cgroup the new blkg is associated with
   * @q: request_queue the new blkg is associated with
159749937   Tejun Heo   blkcg: make root ...
82
   * @gfp_mask: allocation mask to use
0381411e4   Tejun Heo   blkcg: let blkcg ...
83
   *
e8989fae3   Tejun Heo   blkcg: unify blkg...
84
   * Allocate a new blkg assocating @blkcg and @q.
0381411e4   Tejun Heo   blkcg: let blkcg ...
85
   */
159749937   Tejun Heo   blkcg: make root ...
86
87
  static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
  				   gfp_t gfp_mask)
0381411e4   Tejun Heo   blkcg: let blkcg ...
88
  {
3c798398e   Tejun Heo   blkcg: mass renam...
89
  	struct blkcg_gq *blkg;
e8989fae3   Tejun Heo   blkcg: unify blkg...
90
  	int i;
0381411e4   Tejun Heo   blkcg: let blkcg ...
91
92
  
  	/* alloc and init base part */
159749937   Tejun Heo   blkcg: make root ...
93
  	blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
0381411e4   Tejun Heo   blkcg: let blkcg ...
94
95
  	if (!blkg)
  		return NULL;
77ea73388   Tejun Heo   blkcg: move io_se...
96
97
98
  	if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) ||
  	    blkg_rwstat_init(&blkg->stat_ios, gfp_mask))
  		goto err_free;
c875f4d02   Tejun Heo   blkcg: drop unnec...
99
  	blkg->q = q;
e8989fae3   Tejun Heo   blkcg: unify blkg...
100
  	INIT_LIST_HEAD(&blkg->q_node);
0381411e4   Tejun Heo   blkcg: let blkcg ...
101
  	blkg->blkcg = blkcg;
a5049a8ae   Tejun Heo   blkcg: fix use-af...
102
  	atomic_set(&blkg->refcnt, 1);
0381411e4   Tejun Heo   blkcg: let blkcg ...
103

a051661ca   Tejun Heo   blkcg: implement ...
104
105
106
107
108
109
  	/* root blkg uses @q->root_rl, init rl only for !root blkgs */
  	if (blkcg != &blkcg_root) {
  		if (blk_init_rl(&blkg->rl, q, gfp_mask))
  			goto err_free;
  		blkg->rl.blkg = blkg;
  	}
8bd435b30   Tejun Heo   blkcg: remove sta...
110
  	for (i = 0; i < BLKCG_MAX_POLS; i++) {
3c798398e   Tejun Heo   blkcg: mass renam...
111
  		struct blkcg_policy *pol = blkcg_policy[i];
e8989fae3   Tejun Heo   blkcg: unify blkg...
112
  		struct blkg_policy_data *pd;
0381411e4   Tejun Heo   blkcg: let blkcg ...
113

a2b1693ba   Tejun Heo   blkcg: implement ...
114
  		if (!blkcg_policy_enabled(q, pol))
e8989fae3   Tejun Heo   blkcg: unify blkg...
115
116
117
  			continue;
  
  		/* alloc per-policy data and attach it to blkg */
001bea73e   Tejun Heo   blkcg: replace bl...
118
  		pd = pol->pd_alloc_fn(gfp_mask, q->node);
a051661ca   Tejun Heo   blkcg: implement ...
119
120
  		if (!pd)
  			goto err_free;
549d3aa87   Tejun Heo   blkcg: make blkg-...
121

e8989fae3   Tejun Heo   blkcg: unify blkg...
122
123
  		blkg->pd[i] = pd;
  		pd->blkg = blkg;
b276a876a   Tejun Heo   blkcg: add blkg_p...
124
  		pd->plid = i;
e8989fae3   Tejun Heo   blkcg: unify blkg...
125
  	}
0381411e4   Tejun Heo   blkcg: let blkcg ...
126
  	return blkg;
a051661ca   Tejun Heo   blkcg: implement ...
127
128
129
130
  
  err_free:
  	blkg_free(blkg);
  	return NULL;
0381411e4   Tejun Heo   blkcg: let blkcg ...
131
  }
24f290466   Tejun Heo   blkcg: inline [__...
132
133
  struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
  				      struct request_queue *q, bool update_hint)
80fd99792   Tejun Heo   blkcg: make sure ...
134
  {
3c798398e   Tejun Heo   blkcg: mass renam...
135
  	struct blkcg_gq *blkg;
80fd99792   Tejun Heo   blkcg: make sure ...
136

a637120e4   Tejun Heo   blkcg: use radix ...
137
  	/*
86cde6b62   Tejun Heo   blkcg: reorganize...
138
139
140
141
  	 * Hint didn't match.  Look up from the radix tree.  Note that the
  	 * hint can only be updated under queue_lock as otherwise @blkg
  	 * could have already been removed from blkg_tree.  The caller is
  	 * responsible for grabbing queue_lock if @update_hint.
a637120e4   Tejun Heo   blkcg: use radix ...
142
143
  	 */
  	blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
86cde6b62   Tejun Heo   blkcg: reorganize...
144
145
146
147
148
  	if (blkg && blkg->q == q) {
  		if (update_hint) {
  			lockdep_assert_held(q->queue_lock);
  			rcu_assign_pointer(blkcg->blkg_hint, blkg);
  		}
a637120e4   Tejun Heo   blkcg: use radix ...
149
  		return blkg;
86cde6b62   Tejun Heo   blkcg: reorganize...
150
  	}
a637120e4   Tejun Heo   blkcg: use radix ...
151

80fd99792   Tejun Heo   blkcg: make sure ...
152
153
  	return NULL;
  }
ae1188963   Tejun Heo   blkcg: consolidat...
154
  EXPORT_SYMBOL_GPL(blkg_lookup_slowpath);
80fd99792   Tejun Heo   blkcg: make sure ...
155

159749937   Tejun Heo   blkcg: make root ...
156
157
  /*
   * If @new_blkg is %NULL, this function tries to allocate a new one as
d93a11f1c   Tejun Heo   blkcg, cfq-iosche...
158
   * necessary using %GFP_NOWAIT.  @new_blkg is always consumed on return.
159749937   Tejun Heo   blkcg: make root ...
159
   */
86cde6b62   Tejun Heo   blkcg: reorganize...
160
161
162
  static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
  				    struct request_queue *q,
  				    struct blkcg_gq *new_blkg)
5624a4e44   Vivek Goyal   blk-throttle: Mak...
163
  {
3c798398e   Tejun Heo   blkcg: mass renam...
164
  	struct blkcg_gq *blkg;
ce7acfeaf   Tejun Heo   writeback, blkcg:...
165
  	struct bdi_writeback_congested *wb_congested;
f427d9096   Tejun Heo   blkcg: implement ...
166
  	int i, ret;
5624a4e44   Vivek Goyal   blk-throttle: Mak...
167

cd1604fab   Tejun Heo   blkcg: factor out...
168
169
  	WARN_ON_ONCE(!rcu_read_lock_held());
  	lockdep_assert_held(q->queue_lock);
7ee9c5620   Tejun Heo   blkcg: let blkio_...
170
  	/* blkg holds a reference to blkcg */
ec903c0c8   Tejun Heo   cgroup: rename cs...
171
  	if (!css_tryget_online(&blkcg->css)) {
20386ce01   Tejun Heo   blkcg: refine err...
172
  		ret = -ENODEV;
93e6d5d8f   Tejun Heo   blkcg: cosmetic u...
173
  		goto err_free_blkg;
159749937   Tejun Heo   blkcg: make root ...
174
  	}
cd1604fab   Tejun Heo   blkcg: factor out...
175

ce7acfeaf   Tejun Heo   writeback, blkcg:...
176
  	wb_congested = wb_congested_get_create(&q->backing_dev_info,
d93a11f1c   Tejun Heo   blkcg, cfq-iosche...
177
  					       blkcg->css.id, GFP_NOWAIT);
ce7acfeaf   Tejun Heo   writeback, blkcg:...
178
179
180
181
  	if (!wb_congested) {
  		ret = -ENOMEM;
  		goto err_put_css;
  	}
496fb7806   Tejun Heo   blkcg: fix blkcg-...
182
  	/* allocate */
159749937   Tejun Heo   blkcg: make root ...
183
  	if (!new_blkg) {
d93a11f1c   Tejun Heo   blkcg, cfq-iosche...
184
  		new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT);
159749937   Tejun Heo   blkcg: make root ...
185
  		if (unlikely(!new_blkg)) {
93e6d5d8f   Tejun Heo   blkcg: cosmetic u...
186
  			ret = -ENOMEM;
ce7acfeaf   Tejun Heo   writeback, blkcg:...
187
  			goto err_put_congested;
159749937   Tejun Heo   blkcg: make root ...
188
189
190
  		}
  	}
  	blkg = new_blkg;
ce7acfeaf   Tejun Heo   writeback, blkcg:...
191
  	blkg->wb_congested = wb_congested;
cd1604fab   Tejun Heo   blkcg: factor out...
192

db6136703   Tejun Heo   blkcg: invoke blk...
193
  	/* link parent */
3c5478659   Tejun Heo   blkcg: make blkcg...
194
195
196
  	if (blkcg_parent(blkcg)) {
  		blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
  		if (WARN_ON_ONCE(!blkg->parent)) {
20386ce01   Tejun Heo   blkcg: refine err...
197
  			ret = -ENODEV;
ce7acfeaf   Tejun Heo   writeback, blkcg:...
198
  			goto err_put_congested;
3c5478659   Tejun Heo   blkcg: make blkcg...
199
200
201
  		}
  		blkg_get(blkg->parent);
  	}
db6136703   Tejun Heo   blkcg: invoke blk...
202
203
204
205
206
  	/* invoke per-policy init */
  	for (i = 0; i < BLKCG_MAX_POLS; i++) {
  		struct blkcg_policy *pol = blkcg_policy[i];
  
  		if (blkg->pd[i] && pol->pd_init_fn)
a9520cd6f   Tejun Heo   blkcg: make blkcg...
207
  			pol->pd_init_fn(blkg->pd[i]);
db6136703   Tejun Heo   blkcg: invoke blk...
208
209
210
  	}
  
  	/* insert */
cd1604fab   Tejun Heo   blkcg: factor out...
211
  	spin_lock(&blkcg->lock);
a637120e4   Tejun Heo   blkcg: use radix ...
212
213
214
215
  	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
  	if (likely(!ret)) {
  		hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
  		list_add(&blkg->q_node, &q->blkg_list);
f427d9096   Tejun Heo   blkcg: implement ...
216
217
218
219
220
  
  		for (i = 0; i < BLKCG_MAX_POLS; i++) {
  			struct blkcg_policy *pol = blkcg_policy[i];
  
  			if (blkg->pd[i] && pol->pd_online_fn)
a9520cd6f   Tejun Heo   blkcg: make blkcg...
221
  				pol->pd_online_fn(blkg->pd[i]);
f427d9096   Tejun Heo   blkcg: implement ...
222
  		}
a637120e4   Tejun Heo   blkcg: use radix ...
223
  	}
f427d9096   Tejun Heo   blkcg: implement ...
224
  	blkg->online = true;
cd1604fab   Tejun Heo   blkcg: factor out...
225
  	spin_unlock(&blkcg->lock);
496fb7806   Tejun Heo   blkcg: fix blkcg-...
226

ec13b1d6f   Tejun Heo   blkcg: always cre...
227
  	if (!ret)
a637120e4   Tejun Heo   blkcg: use radix ...
228
  		return blkg;
159749937   Tejun Heo   blkcg: make root ...
229

3c5478659   Tejun Heo   blkcg: make blkcg...
230
231
232
  	/* @blkg failed fully initialized, use the usual release path */
  	blkg_put(blkg);
  	return ERR_PTR(ret);
ce7acfeaf   Tejun Heo   writeback, blkcg:...
233
234
  err_put_congested:
  	wb_congested_put(wb_congested);
93e6d5d8f   Tejun Heo   blkcg: cosmetic u...
235
  err_put_css:
496fb7806   Tejun Heo   blkcg: fix blkcg-...
236
  	css_put(&blkcg->css);
93e6d5d8f   Tejun Heo   blkcg: cosmetic u...
237
  err_free_blkg:
159749937   Tejun Heo   blkcg: make root ...
238
  	blkg_free(new_blkg);
93e6d5d8f   Tejun Heo   blkcg: cosmetic u...
239
  	return ERR_PTR(ret);
31e4c28d9   Vivek Goyal   blkio: Introduce ...
240
  }
3c96cb32d   Tejun Heo   blkcg: drop stuff...
241

86cde6b62   Tejun Heo   blkcg: reorganize...
242
243
244
245
246
247
  /**
   * blkg_lookup_create - lookup blkg, try to create one if not there
   * @blkcg: blkcg of interest
   * @q: request_queue of interest
   *
   * Lookup blkg for the @blkcg - @q pair.  If it doesn't exist, try to
3c5478659   Tejun Heo   blkcg: make blkcg...
248
249
250
   * create one.  blkg creation is performed recursively from blkcg_root such
   * that all non-root blkg's have access to the parent blkg.  This function
   * should be called under RCU read lock and @q->queue_lock.
86cde6b62   Tejun Heo   blkcg: reorganize...
251
252
253
254
255
   *
   * Returns pointer to the looked up or created blkg on success, ERR_PTR()
   * value on error.  If @q is dead, returns ERR_PTR(-EINVAL).  If @q is not
   * dead and bypassing, returns ERR_PTR(-EBUSY).
   */
3c798398e   Tejun Heo   blkcg: mass renam...
256
257
  struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
  				    struct request_queue *q)
3c96cb32d   Tejun Heo   blkcg: drop stuff...
258
  {
86cde6b62   Tejun Heo   blkcg: reorganize...
259
260
261
262
  	struct blkcg_gq *blkg;
  
  	WARN_ON_ONCE(!rcu_read_lock_held());
  	lockdep_assert_held(q->queue_lock);
3c96cb32d   Tejun Heo   blkcg: drop stuff...
263
264
265
266
267
  	/*
  	 * This could be the first entry point of blkcg implementation and
  	 * we shouldn't allow anything to go through for a bypassing queue.
  	 */
  	if (unlikely(blk_queue_bypass(q)))
20386ce01   Tejun Heo   blkcg: refine err...
268
  		return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
86cde6b62   Tejun Heo   blkcg: reorganize...
269
270
271
272
  
  	blkg = __blkg_lookup(blkcg, q, true);
  	if (blkg)
  		return blkg;
3c5478659   Tejun Heo   blkcg: make blkcg...
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
  	/*
  	 * Create blkgs walking down from blkcg_root to @blkcg, so that all
  	 * non-root blkgs have access to their parents.
  	 */
  	while (true) {
  		struct blkcg *pos = blkcg;
  		struct blkcg *parent = blkcg_parent(blkcg);
  
  		while (parent && !__blkg_lookup(parent, q, false)) {
  			pos = parent;
  			parent = blkcg_parent(parent);
  		}
  
  		blkg = blkg_create(pos, q, NULL);
  		if (pos == blkcg || IS_ERR(blkg))
  			return blkg;
  	}
3c96cb32d   Tejun Heo   blkcg: drop stuff...
290
  }
31e4c28d9   Vivek Goyal   blkio: Introduce ...
291

3c798398e   Tejun Heo   blkcg: mass renam...
292
  static void blkg_destroy(struct blkcg_gq *blkg)
03aa264ac   Tejun Heo   blkcg: let blkcg ...
293
  {
3c798398e   Tejun Heo   blkcg: mass renam...
294
  	struct blkcg *blkcg = blkg->blkcg;
77ea73388   Tejun Heo   blkcg: move io_se...
295
  	struct blkcg_gq *parent = blkg->parent;
f427d9096   Tejun Heo   blkcg: implement ...
296
  	int i;
03aa264ac   Tejun Heo   blkcg: let blkcg ...
297

27e1f9d1c   Tejun Heo   blkcg: drop local...
298
  	lockdep_assert_held(blkg->q->queue_lock);
9f13ef678   Tejun Heo   blkcg: use double...
299
  	lockdep_assert_held(&blkcg->lock);
03aa264ac   Tejun Heo   blkcg: let blkcg ...
300
301
  
  	/* Something wrong if we are trying to remove same group twice */
e8989fae3   Tejun Heo   blkcg: unify blkg...
302
  	WARN_ON_ONCE(list_empty(&blkg->q_node));
9f13ef678   Tejun Heo   blkcg: use double...
303
  	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
a637120e4   Tejun Heo   blkcg: use radix ...
304

f427d9096   Tejun Heo   blkcg: implement ...
305
306
307
308
  	for (i = 0; i < BLKCG_MAX_POLS; i++) {
  		struct blkcg_policy *pol = blkcg_policy[i];
  
  		if (blkg->pd[i] && pol->pd_offline_fn)
a9520cd6f   Tejun Heo   blkcg: make blkcg...
309
  			pol->pd_offline_fn(blkg->pd[i]);
f427d9096   Tejun Heo   blkcg: implement ...
310
  	}
77ea73388   Tejun Heo   blkcg: move io_se...
311
312
313
314
315
  
  	if (parent) {
  		blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
  		blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
  	}
f427d9096   Tejun Heo   blkcg: implement ...
316
  	blkg->online = false;
a637120e4   Tejun Heo   blkcg: use radix ...
317
  	radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
e8989fae3   Tejun Heo   blkcg: unify blkg...
318
  	list_del_init(&blkg->q_node);
9f13ef678   Tejun Heo   blkcg: use double...
319
  	hlist_del_init_rcu(&blkg->blkcg_node);
03aa264ac   Tejun Heo   blkcg: let blkcg ...
320

03aa264ac   Tejun Heo   blkcg: let blkcg ...
321
  	/*
a637120e4   Tejun Heo   blkcg: use radix ...
322
323
324
325
  	 * Both setting lookup hint to and clearing it from @blkg are done
  	 * under queue_lock.  If it's not pointing to @blkg now, it never
  	 * will.  Hint assignment itself can race safely.
  	 */
ec6c676a0   Paul E. McKenney   block: Substitute...
326
  	if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
a637120e4   Tejun Heo   blkcg: use radix ...
327
328
329
  		rcu_assign_pointer(blkcg->blkg_hint, NULL);
  
  	/*
03aa264ac   Tejun Heo   blkcg: let blkcg ...
330
331
332
333
334
  	 * Put the reference taken at the time of creation so that when all
  	 * queues are gone, group can be destroyed.
  	 */
  	blkg_put(blkg);
  }
9f13ef678   Tejun Heo   blkcg: use double...
335
336
337
  /**
   * blkg_destroy_all - destroy all blkgs associated with a request_queue
   * @q: request_queue of interest
9f13ef678   Tejun Heo   blkcg: use double...
338
   *
3c96cb32d   Tejun Heo   blkcg: drop stuff...
339
   * Destroy all blkgs associated with @q.
9f13ef678   Tejun Heo   blkcg: use double...
340
   */
3c96cb32d   Tejun Heo   blkcg: drop stuff...
341
  static void blkg_destroy_all(struct request_queue *q)
72e06c255   Tejun Heo   blkcg: shoot down...
342
  {
3c798398e   Tejun Heo   blkcg: mass renam...
343
  	struct blkcg_gq *blkg, *n;
72e06c255   Tejun Heo   blkcg: shoot down...
344

6d18b008d   Tejun Heo   blkcg: shoot down...
345
  	lockdep_assert_held(q->queue_lock);
72e06c255   Tejun Heo   blkcg: shoot down...
346

9f13ef678   Tejun Heo   blkcg: use double...
347
  	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
3c798398e   Tejun Heo   blkcg: mass renam...
348
  		struct blkcg *blkcg = blkg->blkcg;
72e06c255   Tejun Heo   blkcg: shoot down...
349

9f13ef678   Tejun Heo   blkcg: use double...
350
351
352
  		spin_lock(&blkcg->lock);
  		blkg_destroy(blkg);
  		spin_unlock(&blkcg->lock);
72e06c255   Tejun Heo   blkcg: shoot down...
353
  	}
6fe810bda   Tejun Heo   block: blkg_destr...
354
355
356
  
  	q->root_blkg = NULL;
  	q->root_rl.blkg = NULL;
72e06c255   Tejun Heo   blkcg: shoot down...
357
  }
2a4fd070e   Tejun Heo   blkcg: move bulk ...
358
359
360
361
362
363
364
365
366
  /*
   * A group is RCU protected, but having an rcu lock does not mean that one
   * can access all the fields of blkg and assume these are valid.  For
   * example, don't try to follow throtl_data and request queue links.
   *
   * Having a reference to blkg under an rcu allows accesses to only values
   * local to groups like group stats and group rate limits.
   */
  void __blkg_release_rcu(struct rcu_head *rcu_head)
1adaf3dde   Tejun Heo   blkcg: move refcn...
367
  {
2a4fd070e   Tejun Heo   blkcg: move bulk ...
368
  	struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
db6136703   Tejun Heo   blkcg: invoke blk...
369

3c5478659   Tejun Heo   blkcg: make blkcg...
370
  	/* release the blkcg and parent blkg refs this blkg has been holding */
1adaf3dde   Tejun Heo   blkcg: move refcn...
371
  	css_put(&blkg->blkcg->css);
a5049a8ae   Tejun Heo   blkcg: fix use-af...
372
  	if (blkg->parent)
3c5478659   Tejun Heo   blkcg: make blkcg...
373
  		blkg_put(blkg->parent);
1adaf3dde   Tejun Heo   blkcg: move refcn...
374

ce7acfeaf   Tejun Heo   writeback, blkcg:...
375
  	wb_congested_put(blkg->wb_congested);
2a4fd070e   Tejun Heo   blkcg: move bulk ...
376
  	blkg_free(blkg);
1adaf3dde   Tejun Heo   blkcg: move refcn...
377
  }
2a4fd070e   Tejun Heo   blkcg: move bulk ...
378
  EXPORT_SYMBOL_GPL(__blkg_release_rcu);
1adaf3dde   Tejun Heo   blkcg: move refcn...
379

a051661ca   Tejun Heo   blkcg: implement ...
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
  /*
   * The next function used by blk_queue_for_each_rl().  It's a bit tricky
   * because the root blkg uses @q->root_rl instead of its own rl.
   */
  struct request_list *__blk_queue_next_rl(struct request_list *rl,
  					 struct request_queue *q)
  {
  	struct list_head *ent;
  	struct blkcg_gq *blkg;
  
  	/*
  	 * Determine the current blkg list_head.  The first entry is
  	 * root_rl which is off @q->blkg_list and mapped to the head.
  	 */
  	if (rl == &q->root_rl) {
  		ent = &q->blkg_list;
65c77fd9e   Jun'ichi Nomura   blkcg: stop itera...
396
397
398
  		/* There are no more block groups, hence no request lists */
  		if (list_empty(ent))
  			return NULL;
a051661ca   Tejun Heo   blkcg: implement ...
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
  	} else {
  		blkg = container_of(rl, struct blkcg_gq, rl);
  		ent = &blkg->q_node;
  	}
  
  	/* walk to the next list_head, skip root blkcg */
  	ent = ent->next;
  	if (ent == &q->root_blkg->q_node)
  		ent = ent->next;
  	if (ent == &q->blkg_list)
  		return NULL;
  
  	blkg = container_of(ent, struct blkcg_gq, q_node);
  	return &blkg->rl;
  }
182446d08   Tejun Heo   cgroup: pass arou...
414
415
  static int blkcg_reset_stats(struct cgroup_subsys_state *css,
  			     struct cftype *cftype, u64 val)
303a3acb2   Divyesh Shah   blkio: Add io con...
416
  {
182446d08   Tejun Heo   cgroup: pass arou...
417
  	struct blkcg *blkcg = css_to_blkcg(css);
3c798398e   Tejun Heo   blkcg: mass renam...
418
  	struct blkcg_gq *blkg;
bc0d6501a   Tejun Heo   blkcg: kill blkio...
419
  	int i;
303a3acb2   Divyesh Shah   blkio: Add io con...
420

838f13bf4   Tejun Heo   blkcg: allow blkc...
421
  	mutex_lock(&blkcg_pol_mutex);
303a3acb2   Divyesh Shah   blkio: Add io con...
422
  	spin_lock_irq(&blkcg->lock);
997a026c8   Tejun Heo   blkcg: simplify s...
423
424
425
426
427
428
  
  	/*
  	 * Note that stat reset is racy - it doesn't synchronize against
  	 * stat updates.  This is a debug feature which shouldn't exist
  	 * anyway.  If you get hit by a race, retry.
  	 */
b67bfe0d4   Sasha Levin   hlist: drop the n...
429
  	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
77ea73388   Tejun Heo   blkcg: move io_se...
430
431
  		blkg_rwstat_reset(&blkg->stat_bytes);
  		blkg_rwstat_reset(&blkg->stat_ios);
8bd435b30   Tejun Heo   blkcg: remove sta...
432
  		for (i = 0; i < BLKCG_MAX_POLS; i++) {
3c798398e   Tejun Heo   blkcg: mass renam...
433
  			struct blkcg_policy *pol = blkcg_policy[i];
549d3aa87   Tejun Heo   blkcg: make blkg-...
434

a9520cd6f   Tejun Heo   blkcg: make blkcg...
435
436
  			if (blkg->pd[i] && pol->pd_reset_stats_fn)
  				pol->pd_reset_stats_fn(blkg->pd[i]);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
437
  		}
303a3acb2   Divyesh Shah   blkio: Add io con...
438
  	}
f0bdc8cdd   Vivek Goyal   blk-cgroup: Make ...
439

303a3acb2   Divyesh Shah   blkio: Add io con...
440
  	spin_unlock_irq(&blkcg->lock);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
441
  	mutex_unlock(&blkcg_pol_mutex);
303a3acb2   Divyesh Shah   blkio: Add io con...
442
443
  	return 0;
  }
dd165eb3b   Tejun Heo   blkcg: misc prepa...
444
  const char *blkg_dev_name(struct blkcg_gq *blkg)
303a3acb2   Divyesh Shah   blkio: Add io con...
445
  {
d3d32e69f   Tejun Heo   blkcg: restructur...
446
447
448
449
  	/* some drivers (floppy) instantiate a queue w/o disk registered */
  	if (blkg->q->backing_dev_info.dev)
  		return dev_name(blkg->q->backing_dev_info.dev);
  	return NULL;
303a3acb2   Divyesh Shah   blkio: Add io con...
450
  }
dd165eb3b   Tejun Heo   blkcg: misc prepa...
451
  EXPORT_SYMBOL_GPL(blkg_dev_name);
303a3acb2   Divyesh Shah   blkio: Add io con...
452

d3d32e69f   Tejun Heo   blkcg: restructur...
453
454
455
456
457
458
459
460
461
462
463
  /**
   * blkcg_print_blkgs - helper for printing per-blkg data
   * @sf: seq_file to print to
   * @blkcg: blkcg of interest
   * @prfill: fill function to print out a blkg
   * @pol: policy in question
   * @data: data to be passed to @prfill
   * @show_total: to print out sum of prfill return values or not
   *
   * This function invokes @prfill on each blkg of @blkcg if pd for the
   * policy specified by @pol exists.  @prfill is invoked with @sf, the
810ecfa76   Tejun Heo   blkcg: make blkcg...
464
465
466
   * policy data and @data and the matching queue lock held.  If @show_total
   * is %true, the sum of the return values from @prfill is printed with
   * "Total" label at the end.
d3d32e69f   Tejun Heo   blkcg: restructur...
467
468
469
470
   *
   * This is to be used to construct print functions for
   * cftype->read_seq_string method.
   */
3c798398e   Tejun Heo   blkcg: mass renam...
471
  void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
f95a04afa   Tejun Heo   blkcg: embed stru...
472
473
  		       u64 (*prfill)(struct seq_file *,
  				     struct blkg_policy_data *, int),
3c798398e   Tejun Heo   blkcg: mass renam...
474
  		       const struct blkcg_policy *pol, int data,
ec399347d   Tejun Heo   blkcg: use @pol i...
475
  		       bool show_total)
5624a4e44   Vivek Goyal   blk-throttle: Mak...
476
  {
3c798398e   Tejun Heo   blkcg: mass renam...
477
  	struct blkcg_gq *blkg;
d3d32e69f   Tejun Heo   blkcg: restructur...
478
  	u64 total = 0;
5624a4e44   Vivek Goyal   blk-throttle: Mak...
479

810ecfa76   Tejun Heo   blkcg: make blkcg...
480
  	rcu_read_lock();
ee89f8125   Linus Torvalds   Merge branch 'for...
481
  	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
810ecfa76   Tejun Heo   blkcg: make blkcg...
482
  		spin_lock_irq(blkg->q->queue_lock);
a2b1693ba   Tejun Heo   blkcg: implement ...
483
  		if (blkcg_policy_enabled(blkg->q, pol))
f95a04afa   Tejun Heo   blkcg: embed stru...
484
  			total += prfill(sf, blkg->pd[pol->plid], data);
810ecfa76   Tejun Heo   blkcg: make blkcg...
485
486
487
  		spin_unlock_irq(blkg->q->queue_lock);
  	}
  	rcu_read_unlock();
d3d32e69f   Tejun Heo   blkcg: restructur...
488
489
490
491
492
  
  	if (show_total)
  		seq_printf(sf, "Total %llu
  ", (unsigned long long)total);
  }
829fdb500   Tejun Heo   blkcg: export con...
493
  EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
d3d32e69f   Tejun Heo   blkcg: restructur...
494
495
496
497
  
  /**
   * __blkg_prfill_u64 - prfill helper for a single u64 value
   * @sf: seq_file to print to
f95a04afa   Tejun Heo   blkcg: embed stru...
498
   * @pd: policy private data of interest
d3d32e69f   Tejun Heo   blkcg: restructur...
499
500
   * @v: value to print
   *
f95a04afa   Tejun Heo   blkcg: embed stru...
501
   * Print @v to @sf for the device assocaited with @pd.
d3d32e69f   Tejun Heo   blkcg: restructur...
502
   */
f95a04afa   Tejun Heo   blkcg: embed stru...
503
  u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
d3d32e69f   Tejun Heo   blkcg: restructur...
504
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
505
  	const char *dname = blkg_dev_name(pd->blkg);
d3d32e69f   Tejun Heo   blkcg: restructur...
506
507
508
509
510
511
512
513
  
  	if (!dname)
  		return 0;
  
  	seq_printf(sf, "%s %llu
  ", dname, (unsigned long long)v);
  	return v;
  }
829fdb500   Tejun Heo   blkcg: export con...
514
  EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
d3d32e69f   Tejun Heo   blkcg: restructur...
515
516
517
518
  
  /**
   * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
   * @sf: seq_file to print to
f95a04afa   Tejun Heo   blkcg: embed stru...
519
   * @pd: policy private data of interest
d3d32e69f   Tejun Heo   blkcg: restructur...
520
521
   * @rwstat: rwstat to print
   *
f95a04afa   Tejun Heo   blkcg: embed stru...
522
   * Print @rwstat to @sf for the device assocaited with @pd.
d3d32e69f   Tejun Heo   blkcg: restructur...
523
   */
f95a04afa   Tejun Heo   blkcg: embed stru...
524
  u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
829fdb500   Tejun Heo   blkcg: export con...
525
  			 const struct blkg_rwstat *rwstat)
d3d32e69f   Tejun Heo   blkcg: restructur...
526
527
528
529
530
531
532
  {
  	static const char *rwstr[] = {
  		[BLKG_RWSTAT_READ]	= "Read",
  		[BLKG_RWSTAT_WRITE]	= "Write",
  		[BLKG_RWSTAT_SYNC]	= "Sync",
  		[BLKG_RWSTAT_ASYNC]	= "Async",
  	};
f95a04afa   Tejun Heo   blkcg: embed stru...
533
  	const char *dname = blkg_dev_name(pd->blkg);
d3d32e69f   Tejun Heo   blkcg: restructur...
534
535
536
537
538
539
540
541
542
  	u64 v;
  	int i;
  
  	if (!dname)
  		return 0;
  
  	for (i = 0; i < BLKG_RWSTAT_NR; i++)
  		seq_printf(sf, "%s %s %llu
  ", dname, rwstr[i],
24bdb8ef0   Tejun Heo   blkcg: make blkcg...
543
  			   (unsigned long long)atomic64_read(&rwstat->aux_cnt[i]));
d3d32e69f   Tejun Heo   blkcg: restructur...
544

24bdb8ef0   Tejun Heo   blkcg: make blkcg...
545
546
  	v = atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_READ]) +
  		atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]);
d3d32e69f   Tejun Heo   blkcg: restructur...
547
548
549
550
  	seq_printf(sf, "%s Total %llu
  ", dname, (unsigned long long)v);
  	return v;
  }
b50da39f5   Tejun Heo   blkcg: export __b...
551
  EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
d3d32e69f   Tejun Heo   blkcg: restructur...
552

5bc4afb1e   Tejun Heo   blkcg: drop BLKCG...
553
554
555
  /**
   * blkg_prfill_stat - prfill callback for blkg_stat
   * @sf: seq_file to print to
f95a04afa   Tejun Heo   blkcg: embed stru...
556
557
   * @pd: policy private data of interest
   * @off: offset to the blkg_stat in @pd
5bc4afb1e   Tejun Heo   blkcg: drop BLKCG...
558
559
560
   *
   * prfill callback for printing a blkg_stat.
   */
f95a04afa   Tejun Heo   blkcg: embed stru...
561
  u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off)
d3d32e69f   Tejun Heo   blkcg: restructur...
562
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
563
  	return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off));
d3d32e69f   Tejun Heo   blkcg: restructur...
564
  }
5bc4afb1e   Tejun Heo   blkcg: drop BLKCG...
565
  EXPORT_SYMBOL_GPL(blkg_prfill_stat);
d3d32e69f   Tejun Heo   blkcg: restructur...
566

5bc4afb1e   Tejun Heo   blkcg: drop BLKCG...
567
568
569
  /**
   * blkg_prfill_rwstat - prfill callback for blkg_rwstat
   * @sf: seq_file to print to
f95a04afa   Tejun Heo   blkcg: embed stru...
570
571
   * @pd: policy private data of interest
   * @off: offset to the blkg_rwstat in @pd
5bc4afb1e   Tejun Heo   blkcg: drop BLKCG...
572
573
574
   *
   * prfill callback for printing a blkg_rwstat.
   */
f95a04afa   Tejun Heo   blkcg: embed stru...
575
576
  u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
  		       int off)
d3d32e69f   Tejun Heo   blkcg: restructur...
577
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
578
  	struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off);
d3d32e69f   Tejun Heo   blkcg: restructur...
579

f95a04afa   Tejun Heo   blkcg: embed stru...
580
  	return __blkg_prfill_rwstat(sf, pd, &rwstat);
d3d32e69f   Tejun Heo   blkcg: restructur...
581
  }
5bc4afb1e   Tejun Heo   blkcg: drop BLKCG...
582
  EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
d3d32e69f   Tejun Heo   blkcg: restructur...
583

77ea73388   Tejun Heo   blkcg: move io_se...
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
  static u64 blkg_prfill_rwstat_field(struct seq_file *sf,
  				    struct blkg_policy_data *pd, int off)
  {
  	struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->blkg + off);
  
  	return __blkg_prfill_rwstat(sf, pd, &rwstat);
  }
  
  /**
   * blkg_print_stat_bytes - seq_show callback for blkg->stat_bytes
   * @sf: seq_file to print to
   * @v: unused
   *
   * To be used as cftype->seq_show to print blkg->stat_bytes.
   * cftype->private must be set to the blkcg_policy.
   */
  int blkg_print_stat_bytes(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
  			  blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
  			  offsetof(struct blkcg_gq, stat_bytes), true);
  	return 0;
  }
  EXPORT_SYMBOL_GPL(blkg_print_stat_bytes);
  
  /**
   * blkg_print_stat_bytes - seq_show callback for blkg->stat_ios
   * @sf: seq_file to print to
   * @v: unused
   *
   * To be used as cftype->seq_show to print blkg->stat_ios.  cftype->private
   * must be set to the blkcg_policy.
   */
  int blkg_print_stat_ios(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
  			  blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
  			  offsetof(struct blkcg_gq, stat_ios), true);
  	return 0;
  }
  EXPORT_SYMBOL_GPL(blkg_print_stat_ios);
  
  static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf,
  					      struct blkg_policy_data *pd,
  					      int off)
  {
  	struct blkg_rwstat rwstat = blkg_rwstat_recursive_sum(pd->blkg,
  							      NULL, off);
  	return __blkg_prfill_rwstat(sf, pd, &rwstat);
  }
  
  /**
   * blkg_print_stat_bytes_recursive - recursive version of blkg_print_stat_bytes
   * @sf: seq_file to print to
   * @v: unused
   */
  int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
  			  blkg_prfill_rwstat_field_recursive,
  			  (void *)seq_cft(sf)->private,
  			  offsetof(struct blkcg_gq, stat_bytes), true);
  	return 0;
  }
  EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive);
  
  /**
   * blkg_print_stat_ios_recursive - recursive version of blkg_print_stat_ios
   * @sf: seq_file to print to
   * @v: unused
   */
  int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
  			  blkg_prfill_rwstat_field_recursive,
  			  (void *)seq_cft(sf)->private,
  			  offsetof(struct blkcg_gq, stat_ios), true);
  	return 0;
  }
  EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive);
3a8b31d39   Tejun Heo   blkcg: restructur...
664
  /**
16b3de665   Tejun Heo   blkcg: implement ...
665
   * blkg_stat_recursive_sum - collect hierarchical blkg_stat
f12c74cab   Tejun Heo   blkcg: make blkg_...
666
667
668
   * @blkg: blkg of interest
   * @pol: blkcg_policy which contains the blkg_stat
   * @off: offset to the blkg_stat in blkg_policy_data or @blkg
16b3de665   Tejun Heo   blkcg: implement ...
669
   *
f12c74cab   Tejun Heo   blkcg: make blkg_...
670
671
672
673
674
675
   * Collect the blkg_stat specified by @blkg, @pol and @off and all its
   * online descendants and their aux counts.  The caller must be holding the
   * queue lock for online tests.
   *
   * If @pol is NULL, blkg_stat is at @off bytes into @blkg; otherwise, it is
   * at @off bytes into @blkg's blkg_policy_data of the policy.
16b3de665   Tejun Heo   blkcg: implement ...
676
   */
f12c74cab   Tejun Heo   blkcg: make blkg_...
677
678
  u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
  			    struct blkcg_policy *pol, int off)
16b3de665   Tejun Heo   blkcg: implement ...
679
  {
16b3de665   Tejun Heo   blkcg: implement ...
680
  	struct blkcg_gq *pos_blkg;
492eb21b9   Tejun Heo   cgroup: make hier...
681
  	struct cgroup_subsys_state *pos_css;
bd8815a6d   Tejun Heo   cgroup: make css_...
682
  	u64 sum = 0;
16b3de665   Tejun Heo   blkcg: implement ...
683

f12c74cab   Tejun Heo   blkcg: make blkg_...
684
  	lockdep_assert_held(blkg->q->queue_lock);
16b3de665   Tejun Heo   blkcg: implement ...
685

16b3de665   Tejun Heo   blkcg: implement ...
686
  	rcu_read_lock();
f12c74cab   Tejun Heo   blkcg: make blkg_...
687
688
689
690
691
  	blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
  		struct blkg_stat *stat;
  
  		if (!pos_blkg->online)
  			continue;
16b3de665   Tejun Heo   blkcg: implement ...
692

f12c74cab   Tejun Heo   blkcg: make blkg_...
693
694
695
696
697
698
  		if (pol)
  			stat = (void *)blkg_to_pd(pos_blkg, pol) + off;
  		else
  			stat = (void *)blkg + off;
  
  		sum += blkg_stat_read(stat) + atomic64_read(&stat->aux_cnt);
16b3de665   Tejun Heo   blkcg: implement ...
699
700
701
702
703
704
705
706
707
  	}
  	rcu_read_unlock();
  
  	return sum;
  }
  EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum);
  
  /**
   * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
f12c74cab   Tejun Heo   blkcg: make blkg_...
708
709
710
   * @blkg: blkg of interest
   * @pol: blkcg_policy which contains the blkg_rwstat
   * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
16b3de665   Tejun Heo   blkcg: implement ...
711
   *
f12c74cab   Tejun Heo   blkcg: make blkg_...
712
713
714
715
716
717
   * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
   * online descendants and their aux counts.  The caller must be holding the
   * queue lock for online tests.
   *
   * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
   * is at @off bytes into @blkg's blkg_policy_data of the policy.
16b3de665   Tejun Heo   blkcg: implement ...
718
   */
f12c74cab   Tejun Heo   blkcg: make blkg_...
719
720
  struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
  					     struct blkcg_policy *pol, int off)
16b3de665   Tejun Heo   blkcg: implement ...
721
  {
16b3de665   Tejun Heo   blkcg: implement ...
722
  	struct blkcg_gq *pos_blkg;
492eb21b9   Tejun Heo   cgroup: make hier...
723
  	struct cgroup_subsys_state *pos_css;
bd8815a6d   Tejun Heo   cgroup: make css_...
724
  	struct blkg_rwstat sum = { };
16b3de665   Tejun Heo   blkcg: implement ...
725
  	int i;
f12c74cab   Tejun Heo   blkcg: make blkg_...
726
  	lockdep_assert_held(blkg->q->queue_lock);
16b3de665   Tejun Heo   blkcg: implement ...
727

16b3de665   Tejun Heo   blkcg: implement ...
728
  	rcu_read_lock();
f12c74cab   Tejun Heo   blkcg: make blkg_...
729
  	blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
3a7faeada   Tejun Heo   blkcg: reduce sta...
730
  		struct blkg_rwstat *rwstat;
16b3de665   Tejun Heo   blkcg: implement ...
731
732
733
  
  		if (!pos_blkg->online)
  			continue;
f12c74cab   Tejun Heo   blkcg: make blkg_...
734
735
736
737
  		if (pol)
  			rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
  		else
  			rwstat = (void *)pos_blkg + off;
16b3de665   Tejun Heo   blkcg: implement ...
738
  		for (i = 0; i < BLKG_RWSTAT_NR; i++)
3a7faeada   Tejun Heo   blkcg: reduce sta...
739
740
741
  			atomic64_add(atomic64_read(&rwstat->aux_cnt[i]) +
  				percpu_counter_sum_positive(&rwstat->cpu_cnt[i]),
  				&sum.aux_cnt[i]);
16b3de665   Tejun Heo   blkcg: implement ...
742
743
744
745
746
747
748
749
  	}
  	rcu_read_unlock();
  
  	return sum;
  }
  EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
  
  /**
3a8b31d39   Tejun Heo   blkcg: restructur...
750
751
   * blkg_conf_prep - parse and prepare for per-blkg config update
   * @blkcg: target block cgroup
da8b06626   Tejun Heo   blkcg: make blkg_...
752
   * @pol: target policy
3a8b31d39   Tejun Heo   blkcg: restructur...
753
754
755
756
   * @input: input string
   * @ctx: blkg_conf_ctx to be filled
   *
   * Parse per-blkg config update from @input and initialize @ctx with the
36aa9e5f5   Tejun Heo   blkcg: move body ...
757
758
759
   * result.  @ctx->blkg points to the blkg to be updated and @ctx->body the
   * part of @input following MAJ:MIN.  This function returns with RCU read
   * lock and queue lock held and must be paired with blkg_conf_finish().
3a8b31d39   Tejun Heo   blkcg: restructur...
760
   */
3c798398e   Tejun Heo   blkcg: mass renam...
761
  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
36aa9e5f5   Tejun Heo   blkcg: move body ...
762
  		   char *input, struct blkg_conf_ctx *ctx)
da8b06626   Tejun Heo   blkcg: make blkg_...
763
  	__acquires(rcu) __acquires(disk->queue->queue_lock)
34d0f179d   Gui Jianfeng   io-controller: Ad...
764
  {
3a8b31d39   Tejun Heo   blkcg: restructur...
765
  	struct gendisk *disk;
3c798398e   Tejun Heo   blkcg: mass renam...
766
  	struct blkcg_gq *blkg;
39a169b62   Roman Pen   block: fix module...
767
  	struct module *owner;
726fa6945   Tejun Heo   blkcg: simplify b...
768
  	unsigned int major, minor;
36aa9e5f5   Tejun Heo   blkcg: move body ...
769
770
  	int key_len, part, ret;
  	char *body;
34d0f179d   Gui Jianfeng   io-controller: Ad...
771

36aa9e5f5   Tejun Heo   blkcg: move body ...
772
  	if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
726fa6945   Tejun Heo   blkcg: simplify b...
773
  		return -EINVAL;
3a8b31d39   Tejun Heo   blkcg: restructur...
774

36aa9e5f5   Tejun Heo   blkcg: move body ...
775
776
777
778
  	body = input + key_len;
  	if (!isspace(*body))
  		return -EINVAL;
  	body = skip_spaces(body);
726fa6945   Tejun Heo   blkcg: simplify b...
779
  	disk = get_gendisk(MKDEV(major, minor), &part);
5f6c2d2b7   Tejun Heo   blkcg: fix gendis...
780
  	if (!disk)
20386ce01   Tejun Heo   blkcg: refine err...
781
  		return -ENODEV;
5f6c2d2b7   Tejun Heo   blkcg: fix gendis...
782
  	if (part) {
39a169b62   Roman Pen   block: fix module...
783
  		owner = disk->fops->owner;
5f6c2d2b7   Tejun Heo   blkcg: fix gendis...
784
  		put_disk(disk);
39a169b62   Roman Pen   block: fix module...
785
  		module_put(owner);
20386ce01   Tejun Heo   blkcg: refine err...
786
  		return -ENODEV;
5f6c2d2b7   Tejun Heo   blkcg: fix gendis...
787
  	}
e56da7e28   Tejun Heo   blkcg: don't allo...
788
789
  
  	rcu_read_lock();
4bfd482e7   Tejun Heo   blkcg: kill blkio...
790
  	spin_lock_irq(disk->queue->queue_lock);
da8b06626   Tejun Heo   blkcg: make blkg_...
791

a2b1693ba   Tejun Heo   blkcg: implement ...
792
  	if (blkcg_policy_enabled(disk->queue, pol))
3c96cb32d   Tejun Heo   blkcg: drop stuff...
793
  		blkg = blkg_lookup_create(blkcg, disk->queue);
a2b1693ba   Tejun Heo   blkcg: implement ...
794
  	else
20386ce01   Tejun Heo   blkcg: refine err...
795
  		blkg = ERR_PTR(-EOPNOTSUPP);
e56da7e28   Tejun Heo   blkcg: don't allo...
796

4bfd482e7   Tejun Heo   blkcg: kill blkio...
797
798
  	if (IS_ERR(blkg)) {
  		ret = PTR_ERR(blkg);
3a8b31d39   Tejun Heo   blkcg: restructur...
799
  		rcu_read_unlock();
da8b06626   Tejun Heo   blkcg: make blkg_...
800
  		spin_unlock_irq(disk->queue->queue_lock);
39a169b62   Roman Pen   block: fix module...
801
  		owner = disk->fops->owner;
3a8b31d39   Tejun Heo   blkcg: restructur...
802
  		put_disk(disk);
39a169b62   Roman Pen   block: fix module...
803
  		module_put(owner);
3a8b31d39   Tejun Heo   blkcg: restructur...
804
805
806
807
808
809
810
811
812
  		/*
  		 * If queue was bypassing, we should retry.  Do so after a
  		 * short msleep().  It isn't strictly necessary but queue
  		 * can be bypassing for some time and it's always nice to
  		 * avoid busy looping.
  		 */
  		if (ret == -EBUSY) {
  			msleep(10);
  			ret = restart_syscall();
7702e8f45   Vivek Goyal   blk-cgroup: cgrou...
813
  		}
726fa6945   Tejun Heo   blkcg: simplify b...
814
  		return ret;
062a644d6   Vivek Goyal   blk-cgroup: Prepa...
815
  	}
3a8b31d39   Tejun Heo   blkcg: restructur...
816
817
818
  
  	ctx->disk = disk;
  	ctx->blkg = blkg;
36aa9e5f5   Tejun Heo   blkcg: move body ...
819
  	ctx->body = body;
726fa6945   Tejun Heo   blkcg: simplify b...
820
  	return 0;
34d0f179d   Gui Jianfeng   io-controller: Ad...
821
  }
829fdb500   Tejun Heo   blkcg: export con...
822
  EXPORT_SYMBOL_GPL(blkg_conf_prep);
34d0f179d   Gui Jianfeng   io-controller: Ad...
823

3a8b31d39   Tejun Heo   blkcg: restructur...
824
825
826
827
828
829
830
  /**
   * blkg_conf_finish - finish up per-blkg config update
   * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
   *
   * Finish up after per-blkg config update.  This function must be paired
   * with blkg_conf_prep().
   */
829fdb500   Tejun Heo   blkcg: export con...
831
  void blkg_conf_finish(struct blkg_conf_ctx *ctx)
da8b06626   Tejun Heo   blkcg: make blkg_...
832
  	__releases(ctx->disk->queue->queue_lock) __releases(rcu)
34d0f179d   Gui Jianfeng   io-controller: Ad...
833
  {
39a169b62   Roman Pen   block: fix module...
834
  	struct module *owner;
da8b06626   Tejun Heo   blkcg: make blkg_...
835
  	spin_unlock_irq(ctx->disk->queue->queue_lock);
3a8b31d39   Tejun Heo   blkcg: restructur...
836
  	rcu_read_unlock();
39a169b62   Roman Pen   block: fix module...
837
  	owner = ctx->disk->fops->owner;
3a8b31d39   Tejun Heo   blkcg: restructur...
838
  	put_disk(ctx->disk);
39a169b62   Roman Pen   block: fix module...
839
  	module_put(owner);
34d0f179d   Gui Jianfeng   io-controller: Ad...
840
  }
829fdb500   Tejun Heo   blkcg: export con...
841
  EXPORT_SYMBOL_GPL(blkg_conf_finish);
34d0f179d   Gui Jianfeng   io-controller: Ad...
842

2ee867dcf   Tejun Heo   blkcg: implement ...
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
  static int blkcg_print_stat(struct seq_file *sf, void *v)
  {
  	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
  	struct blkcg_gq *blkg;
  
  	rcu_read_lock();
  
  	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
  		const char *dname;
  		struct blkg_rwstat rwstat;
  		u64 rbytes, wbytes, rios, wios;
  
  		dname = blkg_dev_name(blkg);
  		if (!dname)
  			continue;
  
  		spin_lock_irq(blkg->q->queue_lock);
  
  		rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
  					offsetof(struct blkcg_gq, stat_bytes));
  		rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
  		wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
  
  		rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
  					offsetof(struct blkcg_gq, stat_ios));
  		rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
  		wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
  
  		spin_unlock_irq(blkg->q->queue_lock);
  
  		if (rbytes || wbytes || rios || wios)
  			seq_printf(sf, "%s rbytes=%llu wbytes=%llu rios=%llu wios=%llu
  ",
  				   dname, rbytes, wbytes, rios, wios);
  	}
  
  	rcu_read_unlock();
  	return 0;
  }
e1f3b9412   Bart Van Assche   block/blk-cgroup....
882
  static struct cftype blkcg_files[] = {
2ee867dcf   Tejun Heo   blkcg: implement ...
883
884
  	{
  		.name = "stat",
ca0752c5e   Tejun Heo   blkcg: don't crea...
885
  		.flags = CFTYPE_NOT_ON_ROOT,
2ee867dcf   Tejun Heo   blkcg: implement ...
886
887
888
889
  		.seq_show = blkcg_print_stat,
  	},
  	{ }	/* terminate */
  };
e1f3b9412   Bart Van Assche   block/blk-cgroup....
890
  static struct cftype blkcg_legacy_files[] = {
31e4c28d9   Vivek Goyal   blkio: Introduce ...
891
  	{
84c124da9   Divyesh Shah   blkio: Changes to...
892
  		.name = "reset_stats",
3c798398e   Tejun Heo   blkcg: mass renam...
893
  		.write_u64 = blkcg_reset_stats,
220841906   Vivek Goyal   blkio: Export dis...
894
  	},
4baf6e332   Tejun Heo   cgroup: convert a...
895
  	{ }	/* terminate */
31e4c28d9   Vivek Goyal   blkio: Introduce ...
896
  };
9f13ef678   Tejun Heo   blkcg: use double...
897
  /**
92fb97487   Tejun Heo   cgroup: rename ->...
898
   * blkcg_css_offline - cgroup css_offline callback
eb95419b0   Tejun Heo   cgroup: pass arou...
899
   * @css: css of interest
9f13ef678   Tejun Heo   blkcg: use double...
900
   *
eb95419b0   Tejun Heo   cgroup: pass arou...
901
902
   * This function is called when @css is about to go away and responsible
   * for shooting down all blkgs associated with @css.  blkgs should be
9f13ef678   Tejun Heo   blkcg: use double...
903
904
905
906
907
   * removed while holding both q and blkcg locks.  As blkcg lock is nested
   * inside q lock, this function performs reverse double lock dancing.
   *
   * This is the blkcg counterpart of ioc_release_fn().
   */
eb95419b0   Tejun Heo   cgroup: pass arou...
908
  static void blkcg_css_offline(struct cgroup_subsys_state *css)
31e4c28d9   Vivek Goyal   blkio: Introduce ...
909
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
910
  	struct blkcg *blkcg = css_to_blkcg(css);
b1c357696   Vivek Goyal   blkio: Take care ...
911

9f13ef678   Tejun Heo   blkcg: use double...
912
  	spin_lock_irq(&blkcg->lock);
7ee9c5620   Tejun Heo   blkcg: let blkio_...
913

9f13ef678   Tejun Heo   blkcg: use double...
914
  	while (!hlist_empty(&blkcg->blkg_list)) {
3c798398e   Tejun Heo   blkcg: mass renam...
915
916
  		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
  						struct blkcg_gq, blkcg_node);
c875f4d02   Tejun Heo   blkcg: drop unnec...
917
  		struct request_queue *q = blkg->q;
b1c357696   Vivek Goyal   blkio: Take care ...
918

9f13ef678   Tejun Heo   blkcg: use double...
919
920
921
922
923
  		if (spin_trylock(q->queue_lock)) {
  			blkg_destroy(blkg);
  			spin_unlock(q->queue_lock);
  		} else {
  			spin_unlock_irq(&blkcg->lock);
9f13ef678   Tejun Heo   blkcg: use double...
924
  			cpu_relax();
a5567932f   Dan Carpenter   blkcg: change a s...
925
  			spin_lock_irq(&blkcg->lock);
0f3942a39   Jens Axboe   block: kill some ...
926
  		}
9f13ef678   Tejun Heo   blkcg: use double...
927
  	}
b1c357696   Vivek Goyal   blkio: Take care ...
928

9f13ef678   Tejun Heo   blkcg: use double...
929
  	spin_unlock_irq(&blkcg->lock);
52ebea749   Tejun Heo   writeback: make b...
930
931
  
  	wb_blkcg_offline(blkcg);
7ee9c5620   Tejun Heo   blkcg: let blkio_...
932
  }
eb95419b0   Tejun Heo   cgroup: pass arou...
933
  static void blkcg_css_free(struct cgroup_subsys_state *css)
7ee9c5620   Tejun Heo   blkcg: let blkio_...
934
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
935
  	struct blkcg *blkcg = css_to_blkcg(css);
bc915e61c   Tejun Heo   blkcg: remove unn...
936
  	int i;
7ee9c5620   Tejun Heo   blkcg: let blkio_...
937

7876f930d   Tejun Heo   blkcg: implement ...
938
  	mutex_lock(&blkcg_pol_mutex);
e4a9bde95   Tejun Heo   blkcg: replace bl...
939

7876f930d   Tejun Heo   blkcg: implement ...
940
  	list_del(&blkcg->all_blkcgs_node);
7876f930d   Tejun Heo   blkcg: implement ...
941

bc915e61c   Tejun Heo   blkcg: remove unn...
942
  	for (i = 0; i < BLKCG_MAX_POLS; i++)
e4a9bde95   Tejun Heo   blkcg: replace bl...
943
944
945
946
  		if (blkcg->cpd[i])
  			blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
  
  	mutex_unlock(&blkcg_pol_mutex);
bc915e61c   Tejun Heo   blkcg: remove unn...
947
  	kfree(blkcg);
31e4c28d9   Vivek Goyal   blkio: Introduce ...
948
  }
eb95419b0   Tejun Heo   cgroup: pass arou...
949
950
  static struct cgroup_subsys_state *
  blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
31e4c28d9   Vivek Goyal   blkio: Introduce ...
951
  {
3c798398e   Tejun Heo   blkcg: mass renam...
952
  	struct blkcg *blkcg;
e48453c38   Arianna Avanzini   block, cgroup: im...
953
954
  	struct cgroup_subsys_state *ret;
  	int i;
31e4c28d9   Vivek Goyal   blkio: Introduce ...
955

7876f930d   Tejun Heo   blkcg: implement ...
956
  	mutex_lock(&blkcg_pol_mutex);
eb95419b0   Tejun Heo   cgroup: pass arou...
957
  	if (!parent_css) {
3c798398e   Tejun Heo   blkcg: mass renam...
958
  		blkcg = &blkcg_root;
bc915e61c   Tejun Heo   blkcg: remove unn...
959
960
961
962
963
964
  	} else {
  		blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
  		if (!blkcg) {
  			ret = ERR_PTR(-ENOMEM);
  			goto free_blkcg;
  		}
e48453c38   Arianna Avanzini   block, cgroup: im...
965
966
967
968
969
970
971
972
973
974
975
976
  	}
  
  	for (i = 0; i < BLKCG_MAX_POLS ; i++) {
  		struct blkcg_policy *pol = blkcg_policy[i];
  		struct blkcg_policy_data *cpd;
  
  		/*
  		 * If the policy hasn't been attached yet, wait for it
  		 * to be attached before doing anything else. Otherwise,
  		 * check if the policy requires any specific per-cgroup
  		 * data: if it does, allocate and initialize it.
  		 */
e4a9bde95   Tejun Heo   blkcg: replace bl...
977
  		if (!pol || !pol->cpd_alloc_fn)
e48453c38   Arianna Avanzini   block, cgroup: im...
978
  			continue;
e4a9bde95   Tejun Heo   blkcg: replace bl...
979
  		cpd = pol->cpd_alloc_fn(GFP_KERNEL);
e48453c38   Arianna Avanzini   block, cgroup: im...
980
981
982
983
  		if (!cpd) {
  			ret = ERR_PTR(-ENOMEM);
  			goto free_pd_blkcg;
  		}
814376483   Tejun Heo   blkcg: minor upda...
984
985
  		blkcg->cpd[i] = cpd;
  		cpd->blkcg = blkcg;
e48453c38   Arianna Avanzini   block, cgroup: im...
986
  		cpd->plid = i;
e4a9bde95   Tejun Heo   blkcg: replace bl...
987
988
  		if (pol->cpd_init_fn)
  			pol->cpd_init_fn(cpd);
e48453c38   Arianna Avanzini   block, cgroup: im...
989
  	}
31e4c28d9   Vivek Goyal   blkio: Introduce ...
990

31e4c28d9   Vivek Goyal   blkio: Introduce ...
991
  	spin_lock_init(&blkcg->lock);
d93a11f1c   Tejun Heo   blkcg, cfq-iosche...
992
  	INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT);
31e4c28d9   Vivek Goyal   blkio: Introduce ...
993
  	INIT_HLIST_HEAD(&blkcg->blkg_list);
52ebea749   Tejun Heo   writeback: make b...
994
995
996
  #ifdef CONFIG_CGROUP_WRITEBACK
  	INIT_LIST_HEAD(&blkcg->cgwb_list);
  #endif
7876f930d   Tejun Heo   blkcg: implement ...
997
998
999
  	list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
  
  	mutex_unlock(&blkcg_pol_mutex);
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1000
  	return &blkcg->css;
e48453c38   Arianna Avanzini   block, cgroup: im...
1001
1002
1003
  
  free_pd_blkcg:
  	for (i--; i >= 0; i--)
e4a9bde95   Tejun Heo   blkcg: replace bl...
1004
1005
  		if (blkcg->cpd[i])
  			blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
e48453c38   Arianna Avanzini   block, cgroup: im...
1006
1007
  free_blkcg:
  	kfree(blkcg);
7876f930d   Tejun Heo   blkcg: implement ...
1008
  	mutex_unlock(&blkcg_pol_mutex);
e48453c38   Arianna Avanzini   block, cgroup: im...
1009
  	return ret;
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1010
  }
5efd61135   Tejun Heo   blkcg: add blkcg_...
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
  /**
   * blkcg_init_queue - initialize blkcg part of request queue
   * @q: request_queue to initialize
   *
   * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
   * part of new request_queue @q.
   *
   * RETURNS:
   * 0 on success, -errno on failure.
   */
  int blkcg_init_queue(struct request_queue *q)
  {
ec13b1d6f   Tejun Heo   blkcg: always cre...
1023
1024
1025
1026
1027
1028
1029
1030
1031
  	struct blkcg_gq *new_blkg, *blkg;
  	bool preloaded;
  	int ret;
  
  	new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
  	if (!new_blkg)
  		return -ENOMEM;
  
  	preloaded = !radix_tree_preload(GFP_KERNEL);
5efd61135   Tejun Heo   blkcg: add blkcg_...
1032

ec13b1d6f   Tejun Heo   blkcg: always cre...
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
  	/*
  	 * Make sure the root blkg exists and count the existing blkgs.  As
  	 * @q is bypassing at this point, blkg_lookup_create() can't be
  	 * used.  Open code insertion.
  	 */
  	rcu_read_lock();
  	spin_lock_irq(q->queue_lock);
  	blkg = blkg_create(&blkcg_root, q, new_blkg);
  	spin_unlock_irq(q->queue_lock);
  	rcu_read_unlock();
  
  	if (preloaded)
  		radix_tree_preload_end();
  
  	if (IS_ERR(blkg)) {
994b78327   Tejun Heo   blkcg: use blkg_f...
1048
  		blkg_free(new_blkg);
ec13b1d6f   Tejun Heo   blkcg: always cre...
1049
1050
1051
1052
1053
  		return PTR_ERR(blkg);
  	}
  
  	q->root_blkg = blkg;
  	q->root_rl.blkg = blkg;
5efd61135   Tejun Heo   blkcg: add blkcg_...
1054

ec13b1d6f   Tejun Heo   blkcg: always cre...
1055
1056
1057
1058
1059
1060
1061
  	ret = blk_throtl_init(q);
  	if (ret) {
  		spin_lock_irq(q->queue_lock);
  		blkg_destroy_all(q);
  		spin_unlock_irq(q->queue_lock);
  	}
  	return ret;
5efd61135   Tejun Heo   blkcg: add blkcg_...
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
  }
  
  /**
   * blkcg_drain_queue - drain blkcg part of request_queue
   * @q: request_queue to drain
   *
   * Called from blk_drain_queue().  Responsible for draining blkcg part.
   */
  void blkcg_drain_queue(struct request_queue *q)
  {
  	lockdep_assert_held(q->queue_lock);
0b462c89e   Tejun Heo   blkcg: don't call...
1073
1074
1075
1076
1077
1078
  	/*
  	 * @q could be exiting and already have destroyed all blkgs as
  	 * indicated by NULL root_blkg.  If so, don't confuse policies.
  	 */
  	if (!q->root_blkg)
  		return;
5efd61135   Tejun Heo   blkcg: add blkcg_...
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
  	blk_throtl_drain(q);
  }
  
  /**
   * blkcg_exit_queue - exit and release blkcg part of request_queue
   * @q: request_queue being released
   *
   * Called from blk_release_queue().  Responsible for exiting blkcg part.
   */
  void blkcg_exit_queue(struct request_queue *q)
  {
6d18b008d   Tejun Heo   blkcg: shoot down...
1090
  	spin_lock_irq(q->queue_lock);
3c96cb32d   Tejun Heo   blkcg: drop stuff...
1091
  	blkg_destroy_all(q);
6d18b008d   Tejun Heo   blkcg: shoot down...
1092
  	spin_unlock_irq(q->queue_lock);
5efd61135   Tejun Heo   blkcg: add blkcg_...
1093
1094
  	blk_throtl_exit(q);
  }
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1095
1096
1097
1098
1099
1100
  /*
   * We cannot support shared io contexts, as we have no mean to support
   * two tasks with the same ioc in two different groups without major rework
   * of the main cic data structures.  For now we allow a task to change
   * its cgroup only if it's the only owner of its ioc.
   */
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1101
  static int blkcg_can_attach(struct cgroup_taskset *tset)
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1102
  {
bb9d97b6d   Tejun Heo   cgroup: don't use...
1103
  	struct task_struct *task;
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1104
  	struct cgroup_subsys_state *dst_css;
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1105
1106
1107
1108
  	struct io_context *ioc;
  	int ret = 0;
  
  	/* task_lock() is needed to avoid races with exit_io_context() */
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1109
  	cgroup_taskset_for_each(task, dst_css, tset) {
bb9d97b6d   Tejun Heo   cgroup: don't use...
1110
1111
1112
1113
1114
1115
1116
1117
  		task_lock(task);
  		ioc = task->io_context;
  		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
  			ret = -EINVAL;
  		task_unlock(task);
  		if (ret)
  			break;
  	}
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1118
1119
  	return ret;
  }
69d7fde59   Tejun Heo   blkcg: use CGROUP...
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
  static void blkcg_bind(struct cgroup_subsys_state *root_css)
  {
  	int i;
  
  	mutex_lock(&blkcg_pol_mutex);
  
  	for (i = 0; i < BLKCG_MAX_POLS; i++) {
  		struct blkcg_policy *pol = blkcg_policy[i];
  		struct blkcg *blkcg;
  
  		if (!pol || !pol->cpd_bind_fn)
  			continue;
  
  		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node)
  			if (blkcg->cpd[pol->plid])
  				pol->cpd_bind_fn(blkcg->cpd[pol->plid]);
  	}
  	mutex_unlock(&blkcg_pol_mutex);
  }
c165b3e3c   Tejun Heo   blkcg: rename sub...
1139
  struct cgroup_subsys io_cgrp_subsys = {
92fb97487   Tejun Heo   cgroup: rename ->...
1140
1141
1142
  	.css_alloc = blkcg_css_alloc,
  	.css_offline = blkcg_css_offline,
  	.css_free = blkcg_css_free,
3c798398e   Tejun Heo   blkcg: mass renam...
1143
  	.can_attach = blkcg_can_attach,
69d7fde59   Tejun Heo   blkcg: use CGROUP...
1144
  	.bind = blkcg_bind,
2ee867dcf   Tejun Heo   blkcg: implement ...
1145
  	.dfl_cftypes = blkcg_files,
880f50e22   Tejun Heo   blkcg: mark exist...
1146
  	.legacy_cftypes = blkcg_legacy_files,
c165b3e3c   Tejun Heo   blkcg: rename sub...
1147
  	.legacy_name = "blkio",
1ced953b1   Tejun Heo   blkcg, memcg: mak...
1148
1149
1150
1151
1152
1153
1154
1155
  #ifdef CONFIG_MEMCG
  	/*
  	 * This ensures that, if available, memcg is automatically enabled
  	 * together on the default hierarchy so that the owner cgroup can
  	 * be retrieved from writeback pages.
  	 */
  	.depends_on = 1 << memory_cgrp_id,
  #endif
676f7c8f8   Tejun Heo   cgroup: relocate ...
1156
  };
c165b3e3c   Tejun Heo   blkcg: rename sub...
1157
  EXPORT_SYMBOL_GPL(io_cgrp_subsys);
676f7c8f8   Tejun Heo   cgroup: relocate ...
1158

8bd435b30   Tejun Heo   blkcg: remove sta...
1159
  /**
a2b1693ba   Tejun Heo   blkcg: implement ...
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
   * blkcg_activate_policy - activate a blkcg policy on a request_queue
   * @q: request_queue of interest
   * @pol: blkcg policy to activate
   *
   * Activate @pol on @q.  Requires %GFP_KERNEL context.  @q goes through
   * bypass mode to populate its blkgs with policy_data for @pol.
   *
   * Activation happens with @q bypassed, so nobody would be accessing blkgs
   * from IO path.  Update of each blkg is protected by both queue and blkcg
   * locks so that holding either lock and testing blkcg_policy_enabled() is
   * always enough for dereferencing policy data.
   *
   * The caller is responsible for synchronizing [de]activations and policy
   * [un]registerations.  Returns 0 on success, -errno on failure.
   */
  int blkcg_activate_policy(struct request_queue *q,
3c798398e   Tejun Heo   blkcg: mass renam...
1176
  			  const struct blkcg_policy *pol)
a2b1693ba   Tejun Heo   blkcg: implement ...
1177
  {
4c55f4f9a   Tejun Heo   blkcg: restructur...
1178
  	struct blkg_policy_data *pd_prealloc = NULL;
ec13b1d6f   Tejun Heo   blkcg: always cre...
1179
  	struct blkcg_gq *blkg;
4c55f4f9a   Tejun Heo   blkcg: restructur...
1180
  	int ret;
a2b1693ba   Tejun Heo   blkcg: implement ...
1181
1182
1183
1184
1185
  
  	if (blkcg_policy_enabled(q, pol))
  		return 0;
  
  	blk_queue_bypass_start(q);
4c55f4f9a   Tejun Heo   blkcg: restructur...
1186
1187
  pd_prealloc:
  	if (!pd_prealloc) {
001bea73e   Tejun Heo   blkcg: replace bl...
1188
  		pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
4c55f4f9a   Tejun Heo   blkcg: restructur...
1189
  		if (!pd_prealloc) {
a2b1693ba   Tejun Heo   blkcg: implement ...
1190
  			ret = -ENOMEM;
4c55f4f9a   Tejun Heo   blkcg: restructur...
1191
  			goto out_bypass_end;
a2b1693ba   Tejun Heo   blkcg: implement ...
1192
  		}
a2b1693ba   Tejun Heo   blkcg: implement ...
1193
  	}
a2b1693ba   Tejun Heo   blkcg: implement ...
1194
1195
1196
  	spin_lock_irq(q->queue_lock);
  
  	list_for_each_entry(blkg, &q->blkg_list, q_node) {
4c55f4f9a   Tejun Heo   blkcg: restructur...
1197
1198
1199
1200
  		struct blkg_policy_data *pd;
  
  		if (blkg->pd[pol->plid])
  			continue;
a2b1693ba   Tejun Heo   blkcg: implement ...
1201

001bea73e   Tejun Heo   blkcg: replace bl...
1202
  		pd = pol->pd_alloc_fn(GFP_NOWAIT, q->node);
4c55f4f9a   Tejun Heo   blkcg: restructur...
1203
1204
1205
1206
1207
1208
  		if (!pd)
  			swap(pd, pd_prealloc);
  		if (!pd) {
  			spin_unlock_irq(q->queue_lock);
  			goto pd_prealloc;
  		}
a2b1693ba   Tejun Heo   blkcg: implement ...
1209
1210
1211
  
  		blkg->pd[pol->plid] = pd;
  		pd->blkg = blkg;
b276a876a   Tejun Heo   blkcg: add blkg_p...
1212
  		pd->plid = pol->plid;
3e4187104   Tejun Heo   blkcg: make blkcg...
1213
  		if (pol->pd_init_fn)
a9520cd6f   Tejun Heo   blkcg: make blkcg...
1214
  			pol->pd_init_fn(pd);
a2b1693ba   Tejun Heo   blkcg: implement ...
1215
1216
1217
1218
  	}
  
  	__set_bit(pol->plid, q->blkcg_pols);
  	ret = 0;
4c55f4f9a   Tejun Heo   blkcg: restructur...
1219

a2b1693ba   Tejun Heo   blkcg: implement ...
1220
  	spin_unlock_irq(q->queue_lock);
4c55f4f9a   Tejun Heo   blkcg: restructur...
1221
  out_bypass_end:
a2b1693ba   Tejun Heo   blkcg: implement ...
1222
  	blk_queue_bypass_end(q);
001bea73e   Tejun Heo   blkcg: replace bl...
1223
1224
  	if (pd_prealloc)
  		pol->pd_free_fn(pd_prealloc);
a2b1693ba   Tejun Heo   blkcg: implement ...
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
  	return ret;
  }
  EXPORT_SYMBOL_GPL(blkcg_activate_policy);
  
  /**
   * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
   * @q: request_queue of interest
   * @pol: blkcg policy to deactivate
   *
   * Deactivate @pol on @q.  Follows the same synchronization rules as
   * blkcg_activate_policy().
   */
  void blkcg_deactivate_policy(struct request_queue *q,
3c798398e   Tejun Heo   blkcg: mass renam...
1238
  			     const struct blkcg_policy *pol)
a2b1693ba   Tejun Heo   blkcg: implement ...
1239
  {
3c798398e   Tejun Heo   blkcg: mass renam...
1240
  	struct blkcg_gq *blkg;
a2b1693ba   Tejun Heo   blkcg: implement ...
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
  
  	if (!blkcg_policy_enabled(q, pol))
  		return;
  
  	blk_queue_bypass_start(q);
  	spin_lock_irq(q->queue_lock);
  
  	__clear_bit(pol->plid, q->blkcg_pols);
  
  	list_for_each_entry(blkg, &q->blkg_list, q_node) {
  		/* grab blkcg lock too while removing @pd from @blkg */
  		spin_lock(&blkg->blkcg->lock);
001bea73e   Tejun Heo   blkcg: replace bl...
1253
  		if (blkg->pd[pol->plid]) {
a9520cd6f   Tejun Heo   blkcg: make blkcg...
1254
1255
  			if (pol->pd_offline_fn)
  				pol->pd_offline_fn(blkg->pd[pol->plid]);
001bea73e   Tejun Heo   blkcg: replace bl...
1256
1257
1258
  			pol->pd_free_fn(blkg->pd[pol->plid]);
  			blkg->pd[pol->plid] = NULL;
  		}
a2b1693ba   Tejun Heo   blkcg: implement ...
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
  
  		spin_unlock(&blkg->blkcg->lock);
  	}
  
  	spin_unlock_irq(q->queue_lock);
  	blk_queue_bypass_end(q);
  }
  EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
  
  /**
3c798398e   Tejun Heo   blkcg: mass renam...
1269
1270
   * blkcg_policy_register - register a blkcg policy
   * @pol: blkcg policy to register
8bd435b30   Tejun Heo   blkcg: remove sta...
1271
   *
3c798398e   Tejun Heo   blkcg: mass renam...
1272
1273
   * Register @pol with blkcg core.  Might sleep and @pol may be modified on
   * successful registration.  Returns 0 on success and -errno on failure.
8bd435b30   Tejun Heo   blkcg: remove sta...
1274
   */
d5bf02914   Jens Axboe   Revert "block: ad...
1275
  int blkcg_policy_register(struct blkcg_policy *pol)
3e2520668   Vivek Goyal   blkio: Implement ...
1276
  {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1277
  	struct blkcg *blkcg;
8bd435b30   Tejun Heo   blkcg: remove sta...
1278
  	int i, ret;
e8989fae3   Tejun Heo   blkcg: unify blkg...
1279

838f13bf4   Tejun Heo   blkcg: allow blkc...
1280
  	mutex_lock(&blkcg_pol_register_mutex);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
1281
  	mutex_lock(&blkcg_pol_mutex);
8bd435b30   Tejun Heo   blkcg: remove sta...
1282
1283
1284
  	/* find an empty slot */
  	ret = -ENOSPC;
  	for (i = 0; i < BLKCG_MAX_POLS; i++)
3c798398e   Tejun Heo   blkcg: mass renam...
1285
  		if (!blkcg_policy[i])
8bd435b30   Tejun Heo   blkcg: remove sta...
1286
1287
  			break;
  	if (i >= BLKCG_MAX_POLS)
838f13bf4   Tejun Heo   blkcg: allow blkc...
1288
  		goto err_unlock;
035d10b2f   Tejun Heo   blkcg: add blkio_...
1289

06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1290
  	/* register @pol */
3c798398e   Tejun Heo   blkcg: mass renam...
1291
  	pol->plid = i;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1292
1293
1294
  	blkcg_policy[pol->plid] = pol;
  
  	/* allocate and install cpd's */
e4a9bde95   Tejun Heo   blkcg: replace bl...
1295
  	if (pol->cpd_alloc_fn) {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1296
1297
  		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
  			struct blkcg_policy_data *cpd;
e4a9bde95   Tejun Heo   blkcg: replace bl...
1298
  			cpd = pol->cpd_alloc_fn(GFP_KERNEL);
bbb427e34   Bart Van Assche   blkcg: Unlock blk...
1299
  			if (!cpd)
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1300
  				goto err_free_cpds;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1301

814376483   Tejun Heo   blkcg: minor upda...
1302
1303
  			blkcg->cpd[pol->plid] = cpd;
  			cpd->blkcg = blkcg;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1304
  			cpd->plid = pol->plid;
814376483   Tejun Heo   blkcg: minor upda...
1305
  			pol->cpd_init_fn(cpd);
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1306
1307
  		}
  	}
838f13bf4   Tejun Heo   blkcg: allow blkc...
1308
  	mutex_unlock(&blkcg_pol_mutex);
8bd435b30   Tejun Heo   blkcg: remove sta...
1309

8bd435b30   Tejun Heo   blkcg: remove sta...
1310
  	/* everything is in place, add intf files for the new policy */
2ee867dcf   Tejun Heo   blkcg: implement ...
1311
1312
1313
  	if (pol->dfl_cftypes)
  		WARN_ON(cgroup_add_dfl_cftypes(&io_cgrp_subsys,
  					       pol->dfl_cftypes));
880f50e22   Tejun Heo   blkcg: mark exist...
1314
  	if (pol->legacy_cftypes)
c165b3e3c   Tejun Heo   blkcg: rename sub...
1315
  		WARN_ON(cgroup_add_legacy_cftypes(&io_cgrp_subsys,
880f50e22   Tejun Heo   blkcg: mark exist...
1316
  						  pol->legacy_cftypes));
838f13bf4   Tejun Heo   blkcg: allow blkc...
1317
1318
  	mutex_unlock(&blkcg_pol_register_mutex);
  	return 0;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1319
  err_free_cpds:
e4a9bde95   Tejun Heo   blkcg: replace bl...
1320
  	if (pol->cpd_alloc_fn) {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1321
  		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
e4a9bde95   Tejun Heo   blkcg: replace bl...
1322
1323
1324
1325
  			if (blkcg->cpd[pol->plid]) {
  				pol->cpd_free_fn(blkcg->cpd[pol->plid]);
  				blkcg->cpd[pol->plid] = NULL;
  			}
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1326
1327
1328
  		}
  	}
  	blkcg_policy[pol->plid] = NULL;
838f13bf4   Tejun Heo   blkcg: allow blkc...
1329
  err_unlock:
bc0d6501a   Tejun Heo   blkcg: kill blkio...
1330
  	mutex_unlock(&blkcg_pol_mutex);
838f13bf4   Tejun Heo   blkcg: allow blkc...
1331
  	mutex_unlock(&blkcg_pol_register_mutex);
8bd435b30   Tejun Heo   blkcg: remove sta...
1332
  	return ret;
3e2520668   Vivek Goyal   blkio: Implement ...
1333
  }
3c798398e   Tejun Heo   blkcg: mass renam...
1334
  EXPORT_SYMBOL_GPL(blkcg_policy_register);
3e2520668   Vivek Goyal   blkio: Implement ...
1335

8bd435b30   Tejun Heo   blkcg: remove sta...
1336
  /**
3c798398e   Tejun Heo   blkcg: mass renam...
1337
1338
   * blkcg_policy_unregister - unregister a blkcg policy
   * @pol: blkcg policy to unregister
8bd435b30   Tejun Heo   blkcg: remove sta...
1339
   *
3c798398e   Tejun Heo   blkcg: mass renam...
1340
   * Undo blkcg_policy_register(@pol).  Might sleep.
8bd435b30   Tejun Heo   blkcg: remove sta...
1341
   */
3c798398e   Tejun Heo   blkcg: mass renam...
1342
  void blkcg_policy_unregister(struct blkcg_policy *pol)
3e2520668   Vivek Goyal   blkio: Implement ...
1343
  {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1344
  	struct blkcg *blkcg;
838f13bf4   Tejun Heo   blkcg: allow blkc...
1345
  	mutex_lock(&blkcg_pol_register_mutex);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
1346

3c798398e   Tejun Heo   blkcg: mass renam...
1347
  	if (WARN_ON(blkcg_policy[pol->plid] != pol))
8bd435b30   Tejun Heo   blkcg: remove sta...
1348
1349
1350
  		goto out_unlock;
  
  	/* kill the intf files first */
2ee867dcf   Tejun Heo   blkcg: implement ...
1351
1352
  	if (pol->dfl_cftypes)
  		cgroup_rm_cftypes(pol->dfl_cftypes);
880f50e22   Tejun Heo   blkcg: mark exist...
1353
1354
  	if (pol->legacy_cftypes)
  		cgroup_rm_cftypes(pol->legacy_cftypes);
44ea53de4   Tejun Heo   blkcg: implement ...
1355

06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1356
  	/* remove cpds and unregister */
838f13bf4   Tejun Heo   blkcg: allow blkc...
1357
  	mutex_lock(&blkcg_pol_mutex);
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1358

e4a9bde95   Tejun Heo   blkcg: replace bl...
1359
  	if (pol->cpd_alloc_fn) {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1360
  		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
e4a9bde95   Tejun Heo   blkcg: replace bl...
1361
1362
1363
1364
  			if (blkcg->cpd[pol->plid]) {
  				pol->cpd_free_fn(blkcg->cpd[pol->plid]);
  				blkcg->cpd[pol->plid] = NULL;
  			}
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1365
1366
  		}
  	}
3c798398e   Tejun Heo   blkcg: mass renam...
1367
  	blkcg_policy[pol->plid] = NULL;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1368

bc0d6501a   Tejun Heo   blkcg: kill blkio...
1369
  	mutex_unlock(&blkcg_pol_mutex);
838f13bf4   Tejun Heo   blkcg: allow blkc...
1370
1371
  out_unlock:
  	mutex_unlock(&blkcg_pol_register_mutex);
3e2520668   Vivek Goyal   blkio: Implement ...
1372
  }
3c798398e   Tejun Heo   blkcg: mass renam...
1373
  EXPORT_SYMBOL_GPL(blkcg_policy_unregister);