Blame view

block/blk-cgroup.c 50 KB
3dcf60bcb   Christoph Hellwig   block: add SPDX t...
1
  // SPDX-License-Identifier: GPL-2.0
31e4c28d9   Vivek Goyal   blkio: Introduce ...
2
3
4
5
6
7
8
9
10
11
12
  /*
   * Common Block IO controller cgroup interface
   *
   * Based on ideas and code from CFQ, CFS and BFQ:
   * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
   *
   * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
   *		      Paolo Valente <paolo.valente@unimore.it>
   *
   * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
   * 	              Nauman Rafique <nauman@google.com>
e48453c38   Arianna Avanzini   block, cgroup: im...
13
14
15
16
   *
   * For policy-specific per-blkcg data:
   * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
   *                    Arianna Avanzini <avanzini.arianna@gmail.com>
31e4c28d9   Vivek Goyal   blkio: Introduce ...
17
18
   */
  #include <linux/ioprio.h>
220841906   Vivek Goyal   blkio: Export dis...
19
  #include <linux/kdev_t.h>
9d6a986c0   Vivek Goyal   blkio: Export som...
20
  #include <linux/module.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
21
  #include <linux/sched/signal.h>
accee7854   Stephen Rothwell   block: include li...
22
  #include <linux/err.h>
9195291e5   Divyesh Shah   blkio: Increment ...
23
  #include <linux/blkdev.h>
52ebea749   Tejun Heo   writeback: make b...
24
  #include <linux/backing-dev.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
25
  #include <linux/slab.h>
34d0f179d   Gui Jianfeng   io-controller: Ad...
26
  #include <linux/genhd.h>
72e06c255   Tejun Heo   blkcg: shoot down...
27
  #include <linux/delay.h>
9a9e8a26d   Tejun Heo   blkcg: add blkcg->id
28
  #include <linux/atomic.h>
36aa9e5f5   Tejun Heo   blkcg: move body ...
29
  #include <linux/ctype.h>
eea8f41cc   Tejun Heo   blkcg: move block...
30
  #include <linux/blk-cgroup.h>
d09d8df3a   Josef Bacik   blkcg: add generi...
31
  #include <linux/tracehook.h>
fd112c746   Josef Bacik   blk-cgroup: turn ...
32
  #include <linux/psi.h>
5efd61135   Tejun Heo   blkcg: add blkcg_...
33
  #include "blk.h"
3e2520668   Vivek Goyal   blkio: Implement ...
34

84c124da9   Divyesh Shah   blkio: Changes to...
35
  #define MAX_KEY_LEN 100
838f13bf4   Tejun Heo   blkcg: allow blkc...
36
37
38
39
40
41
42
43
  /*
   * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
   * blkcg_pol_register_mutex nests outside of it and synchronizes entire
   * policy [un]register operations including cgroup file additions /
   * removals.  Putting cgroup file registration outside blkcg_pol_mutex
   * allows grabbing it from cgroup callbacks.
   */
  static DEFINE_MUTEX(blkcg_pol_register_mutex);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
44
  static DEFINE_MUTEX(blkcg_pol_mutex);
923adde1b   Tejun Heo   blkcg: clear all ...
45

e48453c38   Arianna Avanzini   block, cgroup: im...
46
  struct blkcg blkcg_root;
3c798398e   Tejun Heo   blkcg: mass renam...
47
  EXPORT_SYMBOL_GPL(blkcg_root);
9d6a986c0   Vivek Goyal   blkio: Export som...
48

496d5e756   Tejun Heo   blkcg: add blkcg_...
49
  struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
9b0eb69b7   Tejun Heo   cgroup, blkcg: Pr...
50
  EXPORT_SYMBOL_GPL(blkcg_root_css);
496d5e756   Tejun Heo   blkcg: add blkcg_...
51

3c798398e   Tejun Heo   blkcg: mass renam...
52
  static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
035d10b2f   Tejun Heo   blkcg: add blkio_...
53

7876f930d   Tejun Heo   blkcg: implement ...
54
  static LIST_HEAD(all_blkcgs);		/* protected by blkcg_pol_mutex */
07b0fdecb   Tejun Heo   blkcg: allow blkc...
55
  bool blkcg_debug_stats = false;
d3f77dfdc   Tejun Heo   blkcg: implement ...
56
  static struct workqueue_struct *blkcg_punt_bio_wq;
903d23f0a   Josef Bacik   blk-cgroup: allow...
57

a2b1693ba   Tejun Heo   blkcg: implement ...
58
  static bool blkcg_policy_enabled(struct request_queue *q,
3c798398e   Tejun Heo   blkcg: mass renam...
59
  				 const struct blkcg_policy *pol)
a2b1693ba   Tejun Heo   blkcg: implement ...
60
61
62
  {
  	return pol && test_bit(pol->plid, q->blkcg_pols);
  }
0381411e4   Tejun Heo   blkcg: let blkcg ...
63
64
65
66
67
68
  /**
   * blkg_free - free a blkg
   * @blkg: blkg to free
   *
   * Free @blkg which may be partially allocated.
   */
3c798398e   Tejun Heo   blkcg: mass renam...
69
  static void blkg_free(struct blkcg_gq *blkg)
0381411e4   Tejun Heo   blkcg: let blkcg ...
70
  {
e8989fae3   Tejun Heo   blkcg: unify blkg...
71
  	int i;
549d3aa87   Tejun Heo   blkcg: make blkg-...
72
73
74
  
  	if (!blkg)
  		return;
db6136703   Tejun Heo   blkcg: invoke blk...
75
  	for (i = 0; i < BLKCG_MAX_POLS; i++)
001bea73e   Tejun Heo   blkcg: replace bl...
76
77
  		if (blkg->pd[i])
  			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
e8989fae3   Tejun Heo   blkcg: unify blkg...
78

f73316482   Tejun Heo   blk-cgroup: reimp...
79
  	free_percpu(blkg->iostat_cpu);
ef069b97f   Tejun Heo   blkcg: perpcu_ref...
80
  	percpu_ref_exit(&blkg->refcnt);
549d3aa87   Tejun Heo   blkcg: make blkg-...
81
  	kfree(blkg);
0381411e4   Tejun Heo   blkcg: let blkcg ...
82
  }
7fcf2b033   Dennis Zhou   blkcg: change blk...
83
84
85
  static void __blkg_release(struct rcu_head *rcu)
  {
  	struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
d3f77dfdc   Tejun Heo   blkcg: implement ...
86
  	WARN_ON(!bio_list_empty(&blkg->async_bios));
7fcf2b033   Dennis Zhou   blkcg: change blk...
87
88
89
90
  	/* release the blkcg and parent blkg refs this blkg has been holding */
  	css_put(&blkg->blkcg->css);
  	if (blkg->parent)
  		blkg_put(blkg->parent);
7fcf2b033   Dennis Zhou   blkcg: change blk...
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  	blkg_free(blkg);
  }
  
  /*
   * A group is RCU protected, but having an rcu lock does not mean that one
   * can access all the fields of blkg and assume these are valid.  For
   * example, don't try to follow throtl_data and request queue links.
   *
   * Having a reference to blkg under an rcu allows accesses to only values
   * local to groups like group stats and group rate limits.
   */
  static void blkg_release(struct percpu_ref *ref)
  {
  	struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
  
  	call_rcu(&blkg->rcu_head, __blkg_release);
  }
d3f77dfdc   Tejun Heo   blkcg: implement ...
108
109
110
111
112
113
  static void blkg_async_bio_workfn(struct work_struct *work)
  {
  	struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
  					     async_bio_work);
  	struct bio_list bios = BIO_EMPTY_LIST;
  	struct bio *bio;
192f1c6bc   Xianting Tian   blkcg: add pluggi...
114
115
  	struct blk_plug plug;
  	bool need_plug = false;
d3f77dfdc   Tejun Heo   blkcg: implement ...
116
117
118
119
120
121
  
  	/* as long as there are pending bios, @blkg can't go away */
  	spin_lock_bh(&blkg->async_bio_lock);
  	bio_list_merge(&bios, &blkg->async_bios);
  	bio_list_init(&blkg->async_bios);
  	spin_unlock_bh(&blkg->async_bio_lock);
192f1c6bc   Xianting Tian   blkcg: add pluggi...
122
123
124
125
126
  	/* start plug only when bio_list contains at least 2 bios */
  	if (bios.head && bios.head->bi_next) {
  		need_plug = true;
  		blk_start_plug(&plug);
  	}
d3f77dfdc   Tejun Heo   blkcg: implement ...
127
128
  	while ((bio = bio_list_pop(&bios)))
  		submit_bio(bio);
192f1c6bc   Xianting Tian   blkcg: add pluggi...
129
130
  	if (need_plug)
  		blk_finish_plug(&plug);
d3f77dfdc   Tejun Heo   blkcg: implement ...
131
  }
0381411e4   Tejun Heo   blkcg: let blkcg ...
132
133
134
135
  /**
   * blkg_alloc - allocate a blkg
   * @blkcg: block cgroup the new blkg is associated with
   * @q: request_queue the new blkg is associated with
159749937   Tejun Heo   blkcg: make root ...
136
   * @gfp_mask: allocation mask to use
0381411e4   Tejun Heo   blkcg: let blkcg ...
137
   *
e8989fae3   Tejun Heo   blkcg: unify blkg...
138
   * Allocate a new blkg assocating @blkcg and @q.
0381411e4   Tejun Heo   blkcg: let blkcg ...
139
   */
159749937   Tejun Heo   blkcg: make root ...
140
141
  static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
  				   gfp_t gfp_mask)
0381411e4   Tejun Heo   blkcg: let blkcg ...
142
  {
3c798398e   Tejun Heo   blkcg: mass renam...
143
  	struct blkcg_gq *blkg;
f73316482   Tejun Heo   blk-cgroup: reimp...
144
  	int i, cpu;
0381411e4   Tejun Heo   blkcg: let blkcg ...
145
146
  
  	/* alloc and init base part */
159749937   Tejun Heo   blkcg: make root ...
147
  	blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
0381411e4   Tejun Heo   blkcg: let blkcg ...
148
149
  	if (!blkg)
  		return NULL;
ef069b97f   Tejun Heo   blkcg: perpcu_ref...
150
151
  	if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
  		goto err_free;
f73316482   Tejun Heo   blk-cgroup: reimp...
152
153
  	blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
  	if (!blkg->iostat_cpu)
77ea73388   Tejun Heo   blkcg: move io_se...
154
  		goto err_free;
c875f4d02   Tejun Heo   blkcg: drop unnec...
155
  	blkg->q = q;
e8989fae3   Tejun Heo   blkcg: unify blkg...
156
  	INIT_LIST_HEAD(&blkg->q_node);
d3f77dfdc   Tejun Heo   blkcg: implement ...
157
158
159
  	spin_lock_init(&blkg->async_bio_lock);
  	bio_list_init(&blkg->async_bios);
  	INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
0381411e4   Tejun Heo   blkcg: let blkcg ...
160
  	blkg->blkcg = blkcg;
0381411e4   Tejun Heo   blkcg: let blkcg ...
161

f73316482   Tejun Heo   blk-cgroup: reimp...
162
163
164
  	u64_stats_init(&blkg->iostat.sync);
  	for_each_possible_cpu(cpu)
  		u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
8bd435b30   Tejun Heo   blkcg: remove sta...
165
  	for (i = 0; i < BLKCG_MAX_POLS; i++) {
3c798398e   Tejun Heo   blkcg: mass renam...
166
  		struct blkcg_policy *pol = blkcg_policy[i];
e8989fae3   Tejun Heo   blkcg: unify blkg...
167
  		struct blkg_policy_data *pd;
0381411e4   Tejun Heo   blkcg: let blkcg ...
168

a2b1693ba   Tejun Heo   blkcg: implement ...
169
  		if (!blkcg_policy_enabled(q, pol))
e8989fae3   Tejun Heo   blkcg: unify blkg...
170
171
172
  			continue;
  
  		/* alloc per-policy data and attach it to blkg */
cf09a8ee1   Tejun Heo   blkcg: pass @q an...
173
  		pd = pol->pd_alloc_fn(gfp_mask, q, blkcg);
a051661ca   Tejun Heo   blkcg: implement ...
174
175
  		if (!pd)
  			goto err_free;
549d3aa87   Tejun Heo   blkcg: make blkg-...
176

e8989fae3   Tejun Heo   blkcg: unify blkg...
177
178
  		blkg->pd[i] = pd;
  		pd->blkg = blkg;
b276a876a   Tejun Heo   blkcg: add blkg_p...
179
  		pd->plid = i;
e8989fae3   Tejun Heo   blkcg: unify blkg...
180
  	}
0381411e4   Tejun Heo   blkcg: let blkcg ...
181
  	return blkg;
a051661ca   Tejun Heo   blkcg: implement ...
182
183
184
185
  
  err_free:
  	blkg_free(blkg);
  	return NULL;
0381411e4   Tejun Heo   blkcg: let blkcg ...
186
  }
24f290466   Tejun Heo   blkcg: inline [__...
187
188
  struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
  				      struct request_queue *q, bool update_hint)
80fd99792   Tejun Heo   blkcg: make sure ...
189
  {
3c798398e   Tejun Heo   blkcg: mass renam...
190
  	struct blkcg_gq *blkg;
80fd99792   Tejun Heo   blkcg: make sure ...
191

a637120e4   Tejun Heo   blkcg: use radix ...
192
  	/*
86cde6b62   Tejun Heo   blkcg: reorganize...
193
194
195
196
  	 * Hint didn't match.  Look up from the radix tree.  Note that the
  	 * hint can only be updated under queue_lock as otherwise @blkg
  	 * could have already been removed from blkg_tree.  The caller is
  	 * responsible for grabbing queue_lock if @update_hint.
a637120e4   Tejun Heo   blkcg: use radix ...
197
198
  	 */
  	blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
86cde6b62   Tejun Heo   blkcg: reorganize...
199
200
  	if (blkg && blkg->q == q) {
  		if (update_hint) {
0d945c1f9   Christoph Hellwig   block: remove the...
201
  			lockdep_assert_held(&q->queue_lock);
86cde6b62   Tejun Heo   blkcg: reorganize...
202
203
  			rcu_assign_pointer(blkcg->blkg_hint, blkg);
  		}
a637120e4   Tejun Heo   blkcg: use radix ...
204
  		return blkg;
86cde6b62   Tejun Heo   blkcg: reorganize...
205
  	}
a637120e4   Tejun Heo   blkcg: use radix ...
206

80fd99792   Tejun Heo   blkcg: make sure ...
207
208
  	return NULL;
  }
ae1188963   Tejun Heo   blkcg: consolidat...
209
  EXPORT_SYMBOL_GPL(blkg_lookup_slowpath);
80fd99792   Tejun Heo   blkcg: make sure ...
210

159749937   Tejun Heo   blkcg: make root ...
211
  /*
d708f0d50   Jens Axboe   Revert "blkcg: al...
212
213
   * If @new_blkg is %NULL, this function tries to allocate a new one as
   * necessary using %GFP_NOWAIT.  @new_blkg is always consumed on return.
159749937   Tejun Heo   blkcg: make root ...
214
   */
86cde6b62   Tejun Heo   blkcg: reorganize...
215
  static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
d708f0d50   Jens Axboe   Revert "blkcg: al...
216
217
  				    struct request_queue *q,
  				    struct blkcg_gq *new_blkg)
5624a4e44   Vivek Goyal   blk-throttle: Mak...
218
  {
d708f0d50   Jens Axboe   Revert "blkcg: al...
219
  	struct blkcg_gq *blkg;
f427d9096   Tejun Heo   blkcg: implement ...
220
  	int i, ret;
5624a4e44   Vivek Goyal   blk-throttle: Mak...
221

cd1604fab   Tejun Heo   blkcg: factor out...
222
  	WARN_ON_ONCE(!rcu_read_lock_held());
0d945c1f9   Christoph Hellwig   block: remove the...
223
  	lockdep_assert_held(&q->queue_lock);
cd1604fab   Tejun Heo   blkcg: factor out...
224

0273ac349   Dennis Zhou   blkcg: handle dyi...
225
226
227
228
229
  	/* request_queue is dying, do not create/recreate a blkg */
  	if (blk_queue_dying(q)) {
  		ret = -ENODEV;
  		goto err_free_blkg;
  	}
7ee9c5620   Tejun Heo   blkcg: let blkio_...
230
  	/* blkg holds a reference to blkcg */
ec903c0c8   Tejun Heo   cgroup: rename cs...
231
  	if (!css_tryget_online(&blkcg->css)) {
20386ce01   Tejun Heo   blkcg: refine err...
232
  		ret = -ENODEV;
93e6d5d8f   Tejun Heo   blkcg: cosmetic u...
233
  		goto err_free_blkg;
159749937   Tejun Heo   blkcg: make root ...
234
  	}
cd1604fab   Tejun Heo   blkcg: factor out...
235

d708f0d50   Jens Axboe   Revert "blkcg: al...
236
237
238
239
240
  	/* allocate */
  	if (!new_blkg) {
  		new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN);
  		if (unlikely(!new_blkg)) {
  			ret = -ENOMEM;
8c911f3d4   Christoph Hellwig   writeback: remove...
241
  			goto err_put_css;
159749937   Tejun Heo   blkcg: make root ...
242
243
  		}
  	}
d708f0d50   Jens Axboe   Revert "blkcg: al...
244
  	blkg = new_blkg;
cd1604fab   Tejun Heo   blkcg: factor out...
245

db6136703   Tejun Heo   blkcg: invoke blk...
246
  	/* link parent */
3c5478659   Tejun Heo   blkcg: make blkcg...
247
248
249
  	if (blkcg_parent(blkcg)) {
  		blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
  		if (WARN_ON_ONCE(!blkg->parent)) {
20386ce01   Tejun Heo   blkcg: refine err...
250
  			ret = -ENODEV;
8c911f3d4   Christoph Hellwig   writeback: remove...
251
  			goto err_put_css;
3c5478659   Tejun Heo   blkcg: make blkcg...
252
253
254
  		}
  		blkg_get(blkg->parent);
  	}
db6136703   Tejun Heo   blkcg: invoke blk...
255
256
257
258
259
  	/* invoke per-policy init */
  	for (i = 0; i < BLKCG_MAX_POLS; i++) {
  		struct blkcg_policy *pol = blkcg_policy[i];
  
  		if (blkg->pd[i] && pol->pd_init_fn)
a9520cd6f   Tejun Heo   blkcg: make blkcg...
260
  			pol->pd_init_fn(blkg->pd[i]);
db6136703   Tejun Heo   blkcg: invoke blk...
261
262
263
  	}
  
  	/* insert */
cd1604fab   Tejun Heo   blkcg: factor out...
264
  	spin_lock(&blkcg->lock);
a637120e4   Tejun Heo   blkcg: use radix ...
265
266
267
268
  	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
  	if (likely(!ret)) {
  		hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
  		list_add(&blkg->q_node, &q->blkg_list);
f427d9096   Tejun Heo   blkcg: implement ...
269
270
271
272
273
  
  		for (i = 0; i < BLKCG_MAX_POLS; i++) {
  			struct blkcg_policy *pol = blkcg_policy[i];
  
  			if (blkg->pd[i] && pol->pd_online_fn)
a9520cd6f   Tejun Heo   blkcg: make blkcg...
274
  				pol->pd_online_fn(blkg->pd[i]);
f427d9096   Tejun Heo   blkcg: implement ...
275
  		}
a637120e4   Tejun Heo   blkcg: use radix ...
276
  	}
f427d9096   Tejun Heo   blkcg: implement ...
277
  	blkg->online = true;
cd1604fab   Tejun Heo   blkcg: factor out...
278
  	spin_unlock(&blkcg->lock);
496fb7806   Tejun Heo   blkcg: fix blkcg-...
279

ec13b1d6f   Tejun Heo   blkcg: always cre...
280
  	if (!ret)
a637120e4   Tejun Heo   blkcg: use radix ...
281
  		return blkg;
159749937   Tejun Heo   blkcg: make root ...
282

3c5478659   Tejun Heo   blkcg: make blkcg...
283
284
285
  	/* @blkg failed fully initialized, use the usual release path */
  	blkg_put(blkg);
  	return ERR_PTR(ret);
d708f0d50   Jens Axboe   Revert "blkcg: al...
286
  err_put_css:
496fb7806   Tejun Heo   blkcg: fix blkcg-...
287
  	css_put(&blkcg->css);
93e6d5d8f   Tejun Heo   blkcg: cosmetic u...
288
  err_free_blkg:
d708f0d50   Jens Axboe   Revert "blkcg: al...
289
  	blkg_free(new_blkg);
93e6d5d8f   Tejun Heo   blkcg: cosmetic u...
290
  	return ERR_PTR(ret);
31e4c28d9   Vivek Goyal   blkio: Introduce ...
291
  }
3c96cb32d   Tejun Heo   blkcg: drop stuff...
292

86cde6b62   Tejun Heo   blkcg: reorganize...
293
  /**
8c5462875   Christoph Hellwig   block: merge blkg...
294
   * blkg_lookup_create - lookup blkg, try to create one if not there
86cde6b62   Tejun Heo   blkcg: reorganize...
295
296
297
298
   * @blkcg: blkcg of interest
   * @q: request_queue of interest
   *
   * Lookup blkg for the @blkcg - @q pair.  If it doesn't exist, try to
3c5478659   Tejun Heo   blkcg: make blkcg...
299
300
   * create one.  blkg creation is performed recursively from blkcg_root such
   * that all non-root blkg's have access to the parent blkg.  This function
8c5462875   Christoph Hellwig   block: merge blkg...
301
   * should be called under RCU read lock and takes @q->queue_lock.
86cde6b62   Tejun Heo   blkcg: reorganize...
302
   *
beea9da07   Dennis Zhou   blkcg: convert bl...
303
304
   * Returns the blkg or the closest blkg if blkg_create() fails as it walks
   * down from root.
86cde6b62   Tejun Heo   blkcg: reorganize...
305
   */
8c5462875   Christoph Hellwig   block: merge blkg...
306
307
  static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
  		struct request_queue *q)
3c96cb32d   Tejun Heo   blkcg: drop stuff...
308
  {
86cde6b62   Tejun Heo   blkcg: reorganize...
309
  	struct blkcg_gq *blkg;
8c5462875   Christoph Hellwig   block: merge blkg...
310
  	unsigned long flags;
86cde6b62   Tejun Heo   blkcg: reorganize...
311
312
  
  	WARN_ON_ONCE(!rcu_read_lock_held());
86cde6b62   Tejun Heo   blkcg: reorganize...
313

8c5462875   Christoph Hellwig   block: merge blkg...
314
  	blkg = blkg_lookup(blkcg, q);
86cde6b62   Tejun Heo   blkcg: reorganize...
315
316
  	if (blkg)
  		return blkg;
8c5462875   Christoph Hellwig   block: merge blkg...
317
318
319
320
  	spin_lock_irqsave(&q->queue_lock, flags);
  	blkg = __blkg_lookup(blkcg, q, true);
  	if (blkg)
  		goto found;
3c5478659   Tejun Heo   blkcg: make blkcg...
321
322
  	/*
  	 * Create blkgs walking down from blkcg_root to @blkcg, so that all
beea9da07   Dennis Zhou   blkcg: convert bl...
323
324
  	 * non-root blkgs have access to their parents.  Returns the closest
  	 * blkg to the intended blkg should blkg_create() fail.
3c5478659   Tejun Heo   blkcg: make blkcg...
325
326
327
328
  	 */
  	while (true) {
  		struct blkcg *pos = blkcg;
  		struct blkcg *parent = blkcg_parent(blkcg);
beea9da07   Dennis Zhou   blkcg: convert bl...
329
330
331
332
333
334
335
336
337
  		struct blkcg_gq *ret_blkg = q->root_blkg;
  
  		while (parent) {
  			blkg = __blkg_lookup(parent, q, false);
  			if (blkg) {
  				/* remember closest blkg */
  				ret_blkg = blkg;
  				break;
  			}
3c5478659   Tejun Heo   blkcg: make blkcg...
338
339
340
  			pos = parent;
  			parent = blkcg_parent(parent);
  		}
d708f0d50   Jens Axboe   Revert "blkcg: al...
341
  		blkg = blkg_create(pos, q, NULL);
8c5462875   Christoph Hellwig   block: merge blkg...
342
343
344
345
  		if (IS_ERR(blkg)) {
  			blkg = ret_blkg;
  			break;
  		}
beea9da07   Dennis Zhou   blkcg: convert bl...
346
  		if (pos == blkcg)
8c5462875   Christoph Hellwig   block: merge blkg...
347
  			break;
b978962ad   Dennis Zhou   blkcg: update blk...
348
  	}
8c5462875   Christoph Hellwig   block: merge blkg...
349
350
  found:
  	spin_unlock_irqrestore(&q->queue_lock, flags);
b978962ad   Dennis Zhou   blkcg: update blk...
351
352
  	return blkg;
  }
3c798398e   Tejun Heo   blkcg: mass renam...
353
  static void blkg_destroy(struct blkcg_gq *blkg)
03aa264ac   Tejun Heo   blkcg: let blkcg ...
354
  {
3c798398e   Tejun Heo   blkcg: mass renam...
355
  	struct blkcg *blkcg = blkg->blkcg;
6b0654620   Dennis Zhou (Facebook)   Revert "blk-throt...
356
  	int i;
03aa264ac   Tejun Heo   blkcg: let blkcg ...
357

0d945c1f9   Christoph Hellwig   block: remove the...
358
  	lockdep_assert_held(&blkg->q->queue_lock);
9f13ef678   Tejun Heo   blkcg: use double...
359
  	lockdep_assert_held(&blkcg->lock);
03aa264ac   Tejun Heo   blkcg: let blkcg ...
360
361
  
  	/* Something wrong if we are trying to remove same group twice */
e8989fae3   Tejun Heo   blkcg: unify blkg...
362
  	WARN_ON_ONCE(list_empty(&blkg->q_node));
9f13ef678   Tejun Heo   blkcg: use double...
363
  	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
a637120e4   Tejun Heo   blkcg: use radix ...
364

6b0654620   Dennis Zhou (Facebook)   Revert "blk-throt...
365
366
367
368
369
370
  	for (i = 0; i < BLKCG_MAX_POLS; i++) {
  		struct blkcg_policy *pol = blkcg_policy[i];
  
  		if (blkg->pd[i] && pol->pd_offline_fn)
  			pol->pd_offline_fn(blkg->pd[i]);
  	}
f427d9096   Tejun Heo   blkcg: implement ...
371
  	blkg->online = false;
a637120e4   Tejun Heo   blkcg: use radix ...
372
  	radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
e8989fae3   Tejun Heo   blkcg: unify blkg...
373
  	list_del_init(&blkg->q_node);
9f13ef678   Tejun Heo   blkcg: use double...
374
  	hlist_del_init_rcu(&blkg->blkcg_node);
03aa264ac   Tejun Heo   blkcg: let blkcg ...
375

03aa264ac   Tejun Heo   blkcg: let blkcg ...
376
  	/*
a637120e4   Tejun Heo   blkcg: use radix ...
377
378
379
380
  	 * Both setting lookup hint to and clearing it from @blkg are done
  	 * under queue_lock.  If it's not pointing to @blkg now, it never
  	 * will.  Hint assignment itself can race safely.
  	 */
ec6c676a0   Paul E. McKenney   block: Substitute...
381
  	if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
a637120e4   Tejun Heo   blkcg: use radix ...
382
383
384
  		rcu_assign_pointer(blkcg->blkg_hint, NULL);
  
  	/*
03aa264ac   Tejun Heo   blkcg: let blkcg ...
385
386
387
  	 * Put the reference taken at the time of creation so that when all
  	 * queues are gone, group can be destroyed.
  	 */
7fcf2b033   Dennis Zhou   blkcg: change blk...
388
  	percpu_ref_kill(&blkg->refcnt);
03aa264ac   Tejun Heo   blkcg: let blkcg ...
389
  }
9f13ef678   Tejun Heo   blkcg: use double...
390
391
392
  /**
   * blkg_destroy_all - destroy all blkgs associated with a request_queue
   * @q: request_queue of interest
9f13ef678   Tejun Heo   blkcg: use double...
393
   *
3c96cb32d   Tejun Heo   blkcg: drop stuff...
394
   * Destroy all blkgs associated with @q.
9f13ef678   Tejun Heo   blkcg: use double...
395
   */
3c96cb32d   Tejun Heo   blkcg: drop stuff...
396
  static void blkg_destroy_all(struct request_queue *q)
72e06c255   Tejun Heo   blkcg: shoot down...
397
  {
3c798398e   Tejun Heo   blkcg: mass renam...
398
  	struct blkcg_gq *blkg, *n;
72e06c255   Tejun Heo   blkcg: shoot down...
399

0d945c1f9   Christoph Hellwig   block: remove the...
400
  	spin_lock_irq(&q->queue_lock);
9f13ef678   Tejun Heo   blkcg: use double...
401
  	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
3c798398e   Tejun Heo   blkcg: mass renam...
402
  		struct blkcg *blkcg = blkg->blkcg;
72e06c255   Tejun Heo   blkcg: shoot down...
403

9f13ef678   Tejun Heo   blkcg: use double...
404
405
406
  		spin_lock(&blkcg->lock);
  		blkg_destroy(blkg);
  		spin_unlock(&blkcg->lock);
72e06c255   Tejun Heo   blkcg: shoot down...
407
  	}
6fe810bda   Tejun Heo   block: blkg_destr...
408
409
  
  	q->root_blkg = NULL;
0d945c1f9   Christoph Hellwig   block: remove the...
410
  	spin_unlock_irq(&q->queue_lock);
72e06c255   Tejun Heo   blkcg: shoot down...
411
  }
182446d08   Tejun Heo   cgroup: pass arou...
412
413
  static int blkcg_reset_stats(struct cgroup_subsys_state *css,
  			     struct cftype *cftype, u64 val)
303a3acb2   Divyesh Shah   blkio: Add io con...
414
  {
182446d08   Tejun Heo   cgroup: pass arou...
415
  	struct blkcg *blkcg = css_to_blkcg(css);
3c798398e   Tejun Heo   blkcg: mass renam...
416
  	struct blkcg_gq *blkg;
f73316482   Tejun Heo   blk-cgroup: reimp...
417
  	int i, cpu;
303a3acb2   Divyesh Shah   blkio: Add io con...
418

838f13bf4   Tejun Heo   blkcg: allow blkc...
419
  	mutex_lock(&blkcg_pol_mutex);
303a3acb2   Divyesh Shah   blkio: Add io con...
420
  	spin_lock_irq(&blkcg->lock);
997a026c8   Tejun Heo   blkcg: simplify s...
421
422
423
424
425
426
  
  	/*
  	 * Note that stat reset is racy - it doesn't synchronize against
  	 * stat updates.  This is a debug feature which shouldn't exist
  	 * anyway.  If you get hit by a race, retry.
  	 */
b67bfe0d4   Sasha Levin   hlist: drop the n...
427
  	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
f73316482   Tejun Heo   blk-cgroup: reimp...
428
429
430
431
432
433
  		for_each_possible_cpu(cpu) {
  			struct blkg_iostat_set *bis =
  				per_cpu_ptr(blkg->iostat_cpu, cpu);
  			memset(bis, 0, sizeof(*bis));
  		}
  		memset(&blkg->iostat, 0, sizeof(blkg->iostat));
77ea73388   Tejun Heo   blkcg: move io_se...
434

8bd435b30   Tejun Heo   blkcg: remove sta...
435
  		for (i = 0; i < BLKCG_MAX_POLS; i++) {
3c798398e   Tejun Heo   blkcg: mass renam...
436
  			struct blkcg_policy *pol = blkcg_policy[i];
549d3aa87   Tejun Heo   blkcg: make blkg-...
437

a9520cd6f   Tejun Heo   blkcg: make blkcg...
438
439
  			if (blkg->pd[i] && pol->pd_reset_stats_fn)
  				pol->pd_reset_stats_fn(blkg->pd[i]);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
440
  		}
303a3acb2   Divyesh Shah   blkio: Add io con...
441
  	}
f0bdc8cdd   Vivek Goyal   blk-cgroup: Make ...
442

303a3acb2   Divyesh Shah   blkio: Add io con...
443
  	spin_unlock_irq(&blkcg->lock);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
444
  	mutex_unlock(&blkcg_pol_mutex);
303a3acb2   Divyesh Shah   blkio: Add io con...
445
446
  	return 0;
  }
dd165eb3b   Tejun Heo   blkcg: misc prepa...
447
  const char *blkg_dev_name(struct blkcg_gq *blkg)
303a3acb2   Divyesh Shah   blkio: Add io con...
448
  {
d3d32e69f   Tejun Heo   blkcg: restructur...
449
  	/* some drivers (floppy) instantiate a queue w/o disk registered */
dc3b17cc8   Jan Kara   block: Use pointe...
450
  	if (blkg->q->backing_dev_info->dev)
d51cfc53a   Yufen Yu   bdi: use bdi_dev_...
451
  		return bdi_dev_name(blkg->q->backing_dev_info);
d3d32e69f   Tejun Heo   blkcg: restructur...
452
  	return NULL;
303a3acb2   Divyesh Shah   blkio: Add io con...
453
  }
d3d32e69f   Tejun Heo   blkcg: restructur...
454
455
456
457
458
459
460
461
462
463
464
  /**
   * blkcg_print_blkgs - helper for printing per-blkg data
   * @sf: seq_file to print to
   * @blkcg: blkcg of interest
   * @prfill: fill function to print out a blkg
   * @pol: policy in question
   * @data: data to be passed to @prfill
   * @show_total: to print out sum of prfill return values or not
   *
   * This function invokes @prfill on each blkg of @blkcg if pd for the
   * policy specified by @pol exists.  @prfill is invoked with @sf, the
810ecfa76   Tejun Heo   blkcg: make blkcg...
465
466
467
   * policy data and @data and the matching queue lock held.  If @show_total
   * is %true, the sum of the return values from @prfill is printed with
   * "Total" label at the end.
d3d32e69f   Tejun Heo   blkcg: restructur...
468
469
470
471
   *
   * This is to be used to construct print functions for
   * cftype->read_seq_string method.
   */
3c798398e   Tejun Heo   blkcg: mass renam...
472
  void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
f95a04afa   Tejun Heo   blkcg: embed stru...
473
474
  		       u64 (*prfill)(struct seq_file *,
  				     struct blkg_policy_data *, int),
3c798398e   Tejun Heo   blkcg: mass renam...
475
  		       const struct blkcg_policy *pol, int data,
ec399347d   Tejun Heo   blkcg: use @pol i...
476
  		       bool show_total)
5624a4e44   Vivek Goyal   blk-throttle: Mak...
477
  {
3c798398e   Tejun Heo   blkcg: mass renam...
478
  	struct blkcg_gq *blkg;
d3d32e69f   Tejun Heo   blkcg: restructur...
479
  	u64 total = 0;
5624a4e44   Vivek Goyal   blk-throttle: Mak...
480

810ecfa76   Tejun Heo   blkcg: make blkcg...
481
  	rcu_read_lock();
ee89f8125   Linus Torvalds   Merge branch 'for...
482
  	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
0d945c1f9   Christoph Hellwig   block: remove the...
483
  		spin_lock_irq(&blkg->q->queue_lock);
a2b1693ba   Tejun Heo   blkcg: implement ...
484
  		if (blkcg_policy_enabled(blkg->q, pol))
f95a04afa   Tejun Heo   blkcg: embed stru...
485
  			total += prfill(sf, blkg->pd[pol->plid], data);
0d945c1f9   Christoph Hellwig   block: remove the...
486
  		spin_unlock_irq(&blkg->q->queue_lock);
810ecfa76   Tejun Heo   blkcg: make blkcg...
487
488
  	}
  	rcu_read_unlock();
d3d32e69f   Tejun Heo   blkcg: restructur...
489
490
491
492
493
  
  	if (show_total)
  		seq_printf(sf, "Total %llu
  ", (unsigned long long)total);
  }
829fdb500   Tejun Heo   blkcg: export con...
494
  EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
d3d32e69f   Tejun Heo   blkcg: restructur...
495
496
497
498
  
  /**
   * __blkg_prfill_u64 - prfill helper for a single u64 value
   * @sf: seq_file to print to
f95a04afa   Tejun Heo   blkcg: embed stru...
499
   * @pd: policy private data of interest
d3d32e69f   Tejun Heo   blkcg: restructur...
500
501
   * @v: value to print
   *
f95a04afa   Tejun Heo   blkcg: embed stru...
502
   * Print @v to @sf for the device assocaited with @pd.
d3d32e69f   Tejun Heo   blkcg: restructur...
503
   */
f95a04afa   Tejun Heo   blkcg: embed stru...
504
  u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
d3d32e69f   Tejun Heo   blkcg: restructur...
505
  {
f95a04afa   Tejun Heo   blkcg: embed stru...
506
  	const char *dname = blkg_dev_name(pd->blkg);
d3d32e69f   Tejun Heo   blkcg: restructur...
507
508
509
510
511
512
513
514
  
  	if (!dname)
  		return 0;
  
  	seq_printf(sf, "%s %llu
  ", dname, (unsigned long long)v);
  	return v;
  }
829fdb500   Tejun Heo   blkcg: export con...
515
  EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
d3d32e69f   Tejun Heo   blkcg: restructur...
516

457e490f2   Tahsin Erdogan   blkcg: allocate s...
517
518
519
520
521
522
  /* Performs queue bypass and policy enabled checks then looks up blkg. */
  static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
  					  const struct blkcg_policy *pol,
  					  struct request_queue *q)
  {
  	WARN_ON_ONCE(!rcu_read_lock_held());
0d945c1f9   Christoph Hellwig   block: remove the...
523
  	lockdep_assert_held(&q->queue_lock);
457e490f2   Tahsin Erdogan   blkcg: allocate s...
524
525
526
  
  	if (!blkcg_policy_enabled(q, pol))
  		return ERR_PTR(-EOPNOTSUPP);
457e490f2   Tahsin Erdogan   blkcg: allocate s...
527
528
  	return __blkg_lookup(blkcg, q, true /* update_hint */);
  }
16b3de665   Tejun Heo   blkcg: implement ...
529
  /**
3a8b31d39   Tejun Heo   blkcg: restructur...
530
   * blkg_conf_prep - parse and prepare for per-blkg config update
015d254cb   Tejun Heo   blkcg: separate b...
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
   * @inputp: input string pointer
   *
   * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
   * from @input and get and return the matching gendisk.  *@inputp is
   * updated to point past the device node prefix.  Returns an ERR_PTR()
   * value on error.
   *
   * Use this function iff blkg_conf_prep() can't be used for some reason.
   */
  struct gendisk *blkcg_conf_get_disk(char **inputp)
  {
  	char *input = *inputp;
  	unsigned int major, minor;
  	struct gendisk *disk;
  	int key_len, part;
  
  	if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
  		return ERR_PTR(-EINVAL);
  
  	input += key_len;
  	if (!isspace(*input))
  		return ERR_PTR(-EINVAL);
  	input = skip_spaces(input);
  
  	disk = get_gendisk(MKDEV(major, minor), &part);
  	if (!disk)
  		return ERR_PTR(-ENODEV);
  	if (part) {
  		put_disk_and_module(disk);
  		return ERR_PTR(-ENODEV);
  	}
  
  	*inputp = input;
  	return disk;
  }
  
  /**
   * blkg_conf_prep - parse and prepare for per-blkg config update
3a8b31d39   Tejun Heo   blkcg: restructur...
569
   * @blkcg: target block cgroup
da8b06626   Tejun Heo   blkcg: make blkg_...
570
   * @pol: target policy
3a8b31d39   Tejun Heo   blkcg: restructur...
571
572
573
574
   * @input: input string
   * @ctx: blkg_conf_ctx to be filled
   *
   * Parse per-blkg config update from @input and initialize @ctx with the
36aa9e5f5   Tejun Heo   blkcg: move body ...
575
576
577
   * result.  @ctx->blkg points to the blkg to be updated and @ctx->body the
   * part of @input following MAJ:MIN.  This function returns with RCU read
   * lock and queue lock held and must be paired with blkg_conf_finish().
3a8b31d39   Tejun Heo   blkcg: restructur...
578
   */
3c798398e   Tejun Heo   blkcg: mass renam...
579
  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
36aa9e5f5   Tejun Heo   blkcg: move body ...
580
  		   char *input, struct blkg_conf_ctx *ctx)
0d945c1f9   Christoph Hellwig   block: remove the...
581
  	__acquires(rcu) __acquires(&disk->queue->queue_lock)
34d0f179d   Gui Jianfeng   io-controller: Ad...
582
  {
3a8b31d39   Tejun Heo   blkcg: restructur...
583
  	struct gendisk *disk;
457e490f2   Tahsin Erdogan   blkcg: allocate s...
584
  	struct request_queue *q;
3c798398e   Tejun Heo   blkcg: mass renam...
585
  	struct blkcg_gq *blkg;
015d254cb   Tejun Heo   blkcg: separate b...
586
  	int ret;
36aa9e5f5   Tejun Heo   blkcg: move body ...
587

015d254cb   Tejun Heo   blkcg: separate b...
588
589
590
  	disk = blkcg_conf_get_disk(&input);
  	if (IS_ERR(disk))
  		return PTR_ERR(disk);
e56da7e28   Tejun Heo   blkcg: don't allo...
591

457e490f2   Tahsin Erdogan   blkcg: allocate s...
592
  	q = disk->queue;
da8b06626   Tejun Heo   blkcg: make blkg_...
593

457e490f2   Tahsin Erdogan   blkcg: allocate s...
594
  	rcu_read_lock();
0d945c1f9   Christoph Hellwig   block: remove the...
595
  	spin_lock_irq(&q->queue_lock);
e56da7e28   Tejun Heo   blkcg: don't allo...
596

457e490f2   Tahsin Erdogan   blkcg: allocate s...
597
  	blkg = blkg_lookup_check(blkcg, pol, q);
4bfd482e7   Tejun Heo   blkcg: kill blkio...
598
599
  	if (IS_ERR(blkg)) {
  		ret = PTR_ERR(blkg);
457e490f2   Tahsin Erdogan   blkcg: allocate s...
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
  		goto fail_unlock;
  	}
  
  	if (blkg)
  		goto success;
  
  	/*
  	 * Create blkgs walking down from blkcg_root to @blkcg, so that all
  	 * non-root blkgs have access to their parents.
  	 */
  	while (true) {
  		struct blkcg *pos = blkcg;
  		struct blkcg *parent;
  		struct blkcg_gq *new_blkg;
  
  		parent = blkcg_parent(blkcg);
  		while (parent && !__blkg_lookup(parent, q, false)) {
  			pos = parent;
  			parent = blkcg_parent(parent);
  		}
  
  		/* Drop locks to do new blkg allocation with GFP_KERNEL. */
0d945c1f9   Christoph Hellwig   block: remove the...
622
  		spin_unlock_irq(&q->queue_lock);
3a8b31d39   Tejun Heo   blkcg: restructur...
623
  		rcu_read_unlock();
457e490f2   Tahsin Erdogan   blkcg: allocate s...
624
625
626
627
628
  
  		new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
  		if (unlikely(!new_blkg)) {
  			ret = -ENOMEM;
  			goto fail;
7702e8f45   Vivek Goyal   blk-cgroup: cgrou...
629
  		}
3a8b31d39   Tejun Heo   blkcg: restructur...
630

f255c19b3   Gabriel Krisman Bertazi   blk-cgroup: Pre-a...
631
632
633
634
635
  		if (radix_tree_preload(GFP_KERNEL)) {
  			blkg_free(new_blkg);
  			ret = -ENOMEM;
  			goto fail;
  		}
457e490f2   Tahsin Erdogan   blkcg: allocate s...
636
  		rcu_read_lock();
0d945c1f9   Christoph Hellwig   block: remove the...
637
  		spin_lock_irq(&q->queue_lock);
457e490f2   Tahsin Erdogan   blkcg: allocate s...
638
639
640
641
  
  		blkg = blkg_lookup_check(pos, pol, q);
  		if (IS_ERR(blkg)) {
  			ret = PTR_ERR(blkg);
52abfcbd5   Gabriel Krisman Bertazi   blk-cgroup: Fix m...
642
  			blkg_free(new_blkg);
f255c19b3   Gabriel Krisman Bertazi   blk-cgroup: Pre-a...
643
  			goto fail_preloaded;
457e490f2   Tahsin Erdogan   blkcg: allocate s...
644
645
646
647
648
649
  		}
  
  		if (blkg) {
  			blkg_free(new_blkg);
  		} else {
  			blkg = blkg_create(pos, q, new_blkg);
98d669b49   Kefeng Wang   block: Drop unlik...
650
  			if (IS_ERR(blkg)) {
457e490f2   Tahsin Erdogan   blkcg: allocate s...
651
  				ret = PTR_ERR(blkg);
f255c19b3   Gabriel Krisman Bertazi   blk-cgroup: Pre-a...
652
  				goto fail_preloaded;
457e490f2   Tahsin Erdogan   blkcg: allocate s...
653
654
  			}
  		}
f255c19b3   Gabriel Krisman Bertazi   blk-cgroup: Pre-a...
655
  		radix_tree_preload_end();
457e490f2   Tahsin Erdogan   blkcg: allocate s...
656
657
658
659
  		if (pos == blkcg)
  			goto success;
  	}
  success:
3a8b31d39   Tejun Heo   blkcg: restructur...
660
661
  	ctx->disk = disk;
  	ctx->blkg = blkg;
015d254cb   Tejun Heo   blkcg: separate b...
662
  	ctx->body = input;
726fa6945   Tejun Heo   blkcg: simplify b...
663
  	return 0;
457e490f2   Tahsin Erdogan   blkcg: allocate s...
664

f255c19b3   Gabriel Krisman Bertazi   blk-cgroup: Pre-a...
665
666
  fail_preloaded:
  	radix_tree_preload_end();
457e490f2   Tahsin Erdogan   blkcg: allocate s...
667
  fail_unlock:
0d945c1f9   Christoph Hellwig   block: remove the...
668
  	spin_unlock_irq(&q->queue_lock);
457e490f2   Tahsin Erdogan   blkcg: allocate s...
669
670
  	rcu_read_unlock();
  fail:
9df6c2991   Jan Kara   genhd: Add helper...
671
  	put_disk_and_module(disk);
457e490f2   Tahsin Erdogan   blkcg: allocate s...
672
673
674
675
676
677
678
679
680
681
682
  	/*
  	 * If queue was bypassing, we should retry.  Do so after a
  	 * short msleep().  It isn't strictly necessary but queue
  	 * can be bypassing for some time and it's always nice to
  	 * avoid busy looping.
  	 */
  	if (ret == -EBUSY) {
  		msleep(10);
  		ret = restart_syscall();
  	}
  	return ret;
34d0f179d   Gui Jianfeng   io-controller: Ad...
683
  }
89f3b6d62   Pavel Begunkov   bfq: Fix bfq link...
684
  EXPORT_SYMBOL_GPL(blkg_conf_prep);
34d0f179d   Gui Jianfeng   io-controller: Ad...
685

3a8b31d39   Tejun Heo   blkcg: restructur...
686
687
688
689
690
691
692
  /**
   * blkg_conf_finish - finish up per-blkg config update
   * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
   *
   * Finish up after per-blkg config update.  This function must be paired
   * with blkg_conf_prep().
   */
829fdb500   Tejun Heo   blkcg: export con...
693
  void blkg_conf_finish(struct blkg_conf_ctx *ctx)
0d945c1f9   Christoph Hellwig   block: remove the...
694
  	__releases(&ctx->disk->queue->queue_lock) __releases(rcu)
34d0f179d   Gui Jianfeng   io-controller: Ad...
695
  {
0d945c1f9   Christoph Hellwig   block: remove the...
696
  	spin_unlock_irq(&ctx->disk->queue->queue_lock);
3a8b31d39   Tejun Heo   blkcg: restructur...
697
  	rcu_read_unlock();
9df6c2991   Jan Kara   genhd: Add helper...
698
  	put_disk_and_module(ctx->disk);
34d0f179d   Gui Jianfeng   io-controller: Ad...
699
  }
89f3b6d62   Pavel Begunkov   bfq: Fix bfq link...
700
  EXPORT_SYMBOL_GPL(blkg_conf_finish);
34d0f179d   Gui Jianfeng   io-controller: Ad...
701

cd1fc4b98   Boris Burkov   blk-cgroup: make ...
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
  static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
  {
  	int i;
  
  	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
  		dst->bytes[i] = src->bytes[i];
  		dst->ios[i] = src->ios[i];
  	}
  }
  
  static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
  {
  	int i;
  
  	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
  		dst->bytes[i] += src->bytes[i];
  		dst->ios[i] += src->ios[i];
  	}
  }
  
  static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
  {
  	int i;
  
  	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
  		dst->bytes[i] -= src->bytes[i];
  		dst->ios[i] -= src->ios[i];
  	}
  }
  
  static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
  {
  	struct blkcg *blkcg = css_to_blkcg(css);
  	struct blkcg_gq *blkg;
  
  	rcu_read_lock();
  
  	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
  		struct blkcg_gq *parent = blkg->parent;
  		struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
  		struct blkg_iostat cur, delta;
  		unsigned int seq;
  
  		/* fetch the current per-cpu values */
  		do {
  			seq = u64_stats_fetch_begin(&bisc->sync);
  			blkg_iostat_set(&cur, &bisc->cur);
  		} while (u64_stats_fetch_retry(&bisc->sync, seq));
  
  		/* propagate percpu delta to global */
  		u64_stats_update_begin(&blkg->iostat.sync);
  		blkg_iostat_set(&delta, &cur);
  		blkg_iostat_sub(&delta, &bisc->last);
  		blkg_iostat_add(&blkg->iostat.cur, &delta);
  		blkg_iostat_add(&bisc->last, &delta);
  		u64_stats_update_end(&blkg->iostat.sync);
  
  		/* propagate global delta to parent */
  		if (parent) {
  			u64_stats_update_begin(&parent->iostat.sync);
  			blkg_iostat_set(&delta, &blkg->iostat.cur);
  			blkg_iostat_sub(&delta, &blkg->iostat.last);
  			blkg_iostat_add(&parent->iostat.cur, &delta);
  			blkg_iostat_add(&blkg->iostat.last, &delta);
  			u64_stats_update_end(&parent->iostat.sync);
  		}
  	}
  
  	rcu_read_unlock();
  }
ef45fe470   Boris Burkov   blk-cgroup: show ...
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
  /*
   * The rstat algorithms intentionally don't handle the root cgroup to avoid
   * incurring overhead when no cgroups are defined. For that reason,
   * cgroup_rstat_flush in blkcg_print_stat does not actually fill out the
   * iostat in the root cgroup's blkcg_gq.
   *
   * However, we would like to re-use the printing code between the root and
   * non-root cgroups to the extent possible. For that reason, we simulate
   * flushing the root cgroup's stats by explicitly filling in the iostat
   * with disk level statistics.
   */
  static void blkcg_fill_root_iostats(void)
  {
  	struct class_dev_iter iter;
  	struct device *dev;
  
  	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
  	while ((dev = class_dev_iter_next(&iter))) {
  		struct gendisk *disk = dev_to_disk(dev);
  		struct hd_struct *part = disk_get_part(disk, 0);
  		struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue);
  		struct blkg_iostat tmp;
  		int cpu;
  
  		memset(&tmp, 0, sizeof(tmp));
  		for_each_possible_cpu(cpu) {
  			struct disk_stats *cpu_dkstats;
  
  			cpu_dkstats = per_cpu_ptr(part->dkstats, cpu);
  			tmp.ios[BLKG_IOSTAT_READ] +=
  				cpu_dkstats->ios[STAT_READ];
  			tmp.ios[BLKG_IOSTAT_WRITE] +=
  				cpu_dkstats->ios[STAT_WRITE];
  			tmp.ios[BLKG_IOSTAT_DISCARD] +=
  				cpu_dkstats->ios[STAT_DISCARD];
  			// convert sectors to bytes
  			tmp.bytes[BLKG_IOSTAT_READ] +=
  				cpu_dkstats->sectors[STAT_READ] << 9;
  			tmp.bytes[BLKG_IOSTAT_WRITE] +=
  				cpu_dkstats->sectors[STAT_WRITE] << 9;
  			tmp.bytes[BLKG_IOSTAT_DISCARD] +=
  				cpu_dkstats->sectors[STAT_DISCARD] << 9;
  
  			u64_stats_update_begin(&blkg->iostat.sync);
  			blkg_iostat_set(&blkg->iostat.cur, &tmp);
  			u64_stats_update_end(&blkg->iostat.sync);
  		}
b7131ee0b   Christoph Hellwig   blk-cgroup: fix a...
819
  		disk_put_part(part);
ef45fe470   Boris Burkov   blk-cgroup: show ...
820
821
  	}
  }
2ee867dcf   Tejun Heo   blkcg: implement ...
822
823
824
825
  static int blkcg_print_stat(struct seq_file *sf, void *v)
  {
  	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
  	struct blkcg_gq *blkg;
ef45fe470   Boris Burkov   blk-cgroup: show ...
826
827
828
829
  	if (!seq_css(sf)->parent)
  		blkcg_fill_root_iostats();
  	else
  		cgroup_rstat_flush(blkcg->css.cgroup);
2ee867dcf   Tejun Heo   blkcg: implement ...
830
831
832
  	rcu_read_lock();
  
  	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
f73316482   Tejun Heo   blk-cgroup: reimp...
833
  		struct blkg_iostat_set *bis = &blkg->iostat;
2ee867dcf   Tejun Heo   blkcg: implement ...
834
  		const char *dname;
903d23f0a   Josef Bacik   blk-cgroup: allow...
835
  		char *buf;
636620b66   Tejun Heo   blkcg: Track DISC...
836
  		u64 rbytes, wbytes, rios, wios, dbytes, dios;
903d23f0a   Josef Bacik   blk-cgroup: allow...
837
838
839
  		size_t size = seq_get_buf(sf, &buf), off = 0;
  		int i;
  		bool has_stats = false;
f73316482   Tejun Heo   blk-cgroup: reimp...
840
  		unsigned seq;
2ee867dcf   Tejun Heo   blkcg: implement ...
841

b0814361a   Tejun Heo   blkcg: make blkcg...
842
843
844
845
  		spin_lock_irq(&blkg->q->queue_lock);
  
  		if (!blkg->online)
  			goto skip;
2ee867dcf   Tejun Heo   blkcg: implement ...
846
847
  		dname = blkg_dev_name(blkg);
  		if (!dname)
b0814361a   Tejun Heo   blkcg: make blkcg...
848
  			goto skip;
2ee867dcf   Tejun Heo   blkcg: implement ...
849

903d23f0a   Josef Bacik   blk-cgroup: allow...
850
851
852
853
854
855
856
  		/*
  		 * Hooray string manipulation, count is the size written NOT
  		 * INCLUDING THE \0, so size is now count+1 less than what we
  		 * had before, but we want to start writing the next bit from
  		 * the \0 so we only add count to buf.
  		 */
  		off += scnprintf(buf+off, size-off, "%s ", dname);
f73316482   Tejun Heo   blk-cgroup: reimp...
857
858
  		do {
  			seq = u64_stats_fetch_begin(&bis->sync);
2ee867dcf   Tejun Heo   blkcg: implement ...
859

f73316482   Tejun Heo   blk-cgroup: reimp...
860
861
862
863
864
865
866
  			rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
  			wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
  			dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
  			rios = bis->cur.ios[BLKG_IOSTAT_READ];
  			wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
  			dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
  		} while (u64_stats_fetch_retry(&bis->sync, seq));
2ee867dcf   Tejun Heo   blkcg: implement ...
867

903d23f0a   Josef Bacik   blk-cgroup: allow...
868
869
870
  		if (rbytes || wbytes || rios || wios) {
  			has_stats = true;
  			off += scnprintf(buf+off, size-off,
636620b66   Tejun Heo   blkcg: Track DISC...
871
872
873
  					 "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
  					 rbytes, wbytes, rios, wios,
  					 dbytes, dios);
903d23f0a   Josef Bacik   blk-cgroup: allow...
874
  		}
07b0fdecb   Tejun Heo   blkcg: allow blkc...
875
  		if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
d09d8df3a   Josef Bacik   blkcg: add generi...
876
877
878
879
880
881
  			has_stats = true;
  			off += scnprintf(buf+off, size-off,
  					 " use_delay=%d delay_nsec=%llu",
  					 atomic_read(&blkg->use_delay),
  					(unsigned long long)atomic64_read(&blkg->delay_nsec));
  		}
903d23f0a   Josef Bacik   blk-cgroup: allow...
882
883
884
885
886
887
888
889
890
891
892
893
  		for (i = 0; i < BLKCG_MAX_POLS; i++) {
  			struct blkcg_policy *pol = blkcg_policy[i];
  			size_t written;
  
  			if (!blkg->pd[i] || !pol->pd_stat_fn)
  				continue;
  
  			written = pol->pd_stat_fn(blkg->pd[i], buf+off, size-off);
  			if (written)
  				has_stats = true;
  			off += written;
  		}
07b0fdecb   Tejun Heo   blkcg: allow blkc...
894

903d23f0a   Josef Bacik   blk-cgroup: allow...
895
  		if (has_stats) {
f539da82f   Tejun Heo   blkcg: update blk...
896
897
898
899
900
901
902
  			if (off < size - 1) {
  				off += scnprintf(buf+off, size-off, "
  ");
  				seq_commit(sf, off);
  			} else {
  				seq_commit(sf, -1);
  			}
903d23f0a   Josef Bacik   blk-cgroup: allow...
903
  		}
b0814361a   Tejun Heo   blkcg: make blkcg...
904
905
  	skip:
  		spin_unlock_irq(&blkg->q->queue_lock);
2ee867dcf   Tejun Heo   blkcg: implement ...
906
907
908
909
910
  	}
  
  	rcu_read_unlock();
  	return 0;
  }
e1f3b9412   Bart Van Assche   block/blk-cgroup....
911
  static struct cftype blkcg_files[] = {
2ee867dcf   Tejun Heo   blkcg: implement ...
912
913
914
915
916
917
  	{
  		.name = "stat",
  		.seq_show = blkcg_print_stat,
  	},
  	{ }	/* terminate */
  };
e1f3b9412   Bart Van Assche   block/blk-cgroup....
918
  static struct cftype blkcg_legacy_files[] = {
31e4c28d9   Vivek Goyal   blkio: Introduce ...
919
  	{
84c124da9   Divyesh Shah   blkio: Changes to...
920
  		.name = "reset_stats",
3c798398e   Tejun Heo   blkcg: mass renam...
921
  		.write_u64 = blkcg_reset_stats,
220841906   Vivek Goyal   blkio: Export dis...
922
  	},
4baf6e332   Tejun Heo   cgroup: convert a...
923
  	{ }	/* terminate */
31e4c28d9   Vivek Goyal   blkio: Introduce ...
924
  };
59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
  /*
   * blkcg destruction is a three-stage process.
   *
   * 1. Destruction starts.  The blkcg_css_offline() callback is invoked
   *    which offlines writeback.  Here we tie the next stage of blkg destruction
   *    to the completion of writeback associated with the blkcg.  This lets us
   *    avoid punting potentially large amounts of outstanding writeback to root
   *    while maintaining any ongoing policies.  The next stage is triggered when
   *    the nr_cgwbs count goes to zero.
   *
   * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
   *    and handles the destruction of blkgs.  Here the css reference held by
   *    the blkg is put back eventually allowing blkcg_css_free() to be called.
   *    This work may occur in cgwb_release_workfn() on the cgwb_release
   *    workqueue.  Any submitted ios that fail to get the blkg ref will be
   *    punted to the root_blkg.
   *
   * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
   *    This finally frees the blkcg.
   */
9f13ef678   Tejun Heo   blkcg: use double...
945
  /**
92fb97487   Tejun Heo   cgroup: rename ->...
946
   * blkcg_css_offline - cgroup css_offline callback
eb95419b0   Tejun Heo   cgroup: pass arou...
947
   * @css: css of interest
9f13ef678   Tejun Heo   blkcg: use double...
948
   *
59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
949
950
951
   * This function is called when @css is about to go away.  Here the cgwbs are
   * offlined first and only once writeback associated with the blkcg has
   * finished do we start step 2 (see above).
9f13ef678   Tejun Heo   blkcg: use double...
952
   */
eb95419b0   Tejun Heo   cgroup: pass arou...
953
  static void blkcg_css_offline(struct cgroup_subsys_state *css)
31e4c28d9   Vivek Goyal   blkio: Introduce ...
954
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
955
  	struct blkcg *blkcg = css_to_blkcg(css);
b1c357696   Vivek Goyal   blkio: Take care ...
956

59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
957
958
  	/* this prevents anyone from attaching or migrating to this blkcg */
  	wb_blkcg_offline(blkcg);
d866dbf61   Tejun Heo   blkcg: rename blk...
959
960
  	/* put the base online pin allowing step 2 to be triggered */
  	blkcg_unpin_online(blkcg);
59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
  }
  
  /**
   * blkcg_destroy_blkgs - responsible for shooting down blkgs
   * @blkcg: blkcg of interest
   *
   * blkgs should be removed while holding both q and blkcg locks.  As blkcg lock
   * is nested inside q lock, this function performs reverse double lock dancing.
   * Destroying the blkgs releases the reference held on the blkcg's css allowing
   * blkcg_css_free to eventually be called.
   *
   * This is the blkcg counterpart of ioc_release_fn().
   */
  void blkcg_destroy_blkgs(struct blkcg *blkcg)
  {
9f13ef678   Tejun Heo   blkcg: use double...
976
  	spin_lock_irq(&blkcg->lock);
7ee9c5620   Tejun Heo   blkcg: let blkio_...
977

4c6994806   Joseph Qi   blk-throttle: fix...
978
979
  	while (!hlist_empty(&blkcg->blkg_list)) {
  		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
6b0654620   Dennis Zhou (Facebook)   Revert "blk-throt...
980
  						struct blkcg_gq, blkcg_node);
4c6994806   Joseph Qi   blk-throttle: fix...
981
  		struct request_queue *q = blkg->q;
0d945c1f9   Christoph Hellwig   block: remove the...
982
  		if (spin_trylock(&q->queue_lock)) {
4c6994806   Joseph Qi   blk-throttle: fix...
983
  			blkg_destroy(blkg);
0d945c1f9   Christoph Hellwig   block: remove the...
984
  			spin_unlock(&q->queue_lock);
4c6994806   Joseph Qi   blk-throttle: fix...
985
986
987
988
989
990
  		} else {
  			spin_unlock_irq(&blkcg->lock);
  			cpu_relax();
  			spin_lock_irq(&blkcg->lock);
  		}
  	}
6b0654620   Dennis Zhou (Facebook)   Revert "blk-throt...
991

4c6994806   Joseph Qi   blk-throttle: fix...
992
993
  	spin_unlock_irq(&blkcg->lock);
  }
eb95419b0   Tejun Heo   cgroup: pass arou...
994
  static void blkcg_css_free(struct cgroup_subsys_state *css)
7ee9c5620   Tejun Heo   blkcg: let blkio_...
995
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
996
  	struct blkcg *blkcg = css_to_blkcg(css);
bc915e61c   Tejun Heo   blkcg: remove unn...
997
  	int i;
7ee9c5620   Tejun Heo   blkcg: let blkio_...
998

7876f930d   Tejun Heo   blkcg: implement ...
999
  	mutex_lock(&blkcg_pol_mutex);
e4a9bde95   Tejun Heo   blkcg: replace bl...
1000

7876f930d   Tejun Heo   blkcg: implement ...
1001
  	list_del(&blkcg->all_blkcgs_node);
7876f930d   Tejun Heo   blkcg: implement ...
1002

bc915e61c   Tejun Heo   blkcg: remove unn...
1003
  	for (i = 0; i < BLKCG_MAX_POLS; i++)
e4a9bde95   Tejun Heo   blkcg: replace bl...
1004
1005
1006
1007
  		if (blkcg->cpd[i])
  			blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
  
  	mutex_unlock(&blkcg_pol_mutex);
bc915e61c   Tejun Heo   blkcg: remove unn...
1008
  	kfree(blkcg);
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1009
  }
eb95419b0   Tejun Heo   cgroup: pass arou...
1010
1011
  static struct cgroup_subsys_state *
  blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1012
  {
3c798398e   Tejun Heo   blkcg: mass renam...
1013
  	struct blkcg *blkcg;
e48453c38   Arianna Avanzini   block, cgroup: im...
1014
1015
  	struct cgroup_subsys_state *ret;
  	int i;
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1016

7876f930d   Tejun Heo   blkcg: implement ...
1017
  	mutex_lock(&blkcg_pol_mutex);
eb95419b0   Tejun Heo   cgroup: pass arou...
1018
  	if (!parent_css) {
3c798398e   Tejun Heo   blkcg: mass renam...
1019
  		blkcg = &blkcg_root;
bc915e61c   Tejun Heo   blkcg: remove unn...
1020
1021
1022
1023
  	} else {
  		blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
  		if (!blkcg) {
  			ret = ERR_PTR(-ENOMEM);
4c18c9e96   weiping zhang   blkcg: avoid free...
1024
  			goto unlock;
bc915e61c   Tejun Heo   blkcg: remove unn...
1025
  		}
e48453c38   Arianna Avanzini   block, cgroup: im...
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
  	}
  
  	for (i = 0; i < BLKCG_MAX_POLS ; i++) {
  		struct blkcg_policy *pol = blkcg_policy[i];
  		struct blkcg_policy_data *cpd;
  
  		/*
  		 * If the policy hasn't been attached yet, wait for it
  		 * to be attached before doing anything else. Otherwise,
  		 * check if the policy requires any specific per-cgroup
  		 * data: if it does, allocate and initialize it.
  		 */
e4a9bde95   Tejun Heo   blkcg: replace bl...
1038
  		if (!pol || !pol->cpd_alloc_fn)
e48453c38   Arianna Avanzini   block, cgroup: im...
1039
  			continue;
e4a9bde95   Tejun Heo   blkcg: replace bl...
1040
  		cpd = pol->cpd_alloc_fn(GFP_KERNEL);
e48453c38   Arianna Avanzini   block, cgroup: im...
1041
1042
1043
1044
  		if (!cpd) {
  			ret = ERR_PTR(-ENOMEM);
  			goto free_pd_blkcg;
  		}
814376483   Tejun Heo   blkcg: minor upda...
1045
1046
  		blkcg->cpd[i] = cpd;
  		cpd->blkcg = blkcg;
e48453c38   Arianna Avanzini   block, cgroup: im...
1047
  		cpd->plid = i;
e4a9bde95   Tejun Heo   blkcg: replace bl...
1048
1049
  		if (pol->cpd_init_fn)
  			pol->cpd_init_fn(cpd);
e48453c38   Arianna Avanzini   block, cgroup: im...
1050
  	}
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1051

31e4c28d9   Vivek Goyal   blkio: Introduce ...
1052
  	spin_lock_init(&blkcg->lock);
d866dbf61   Tejun Heo   blkcg: rename blk...
1053
  	refcount_set(&blkcg->online_pin, 1);
e00f4f4d0   Tejun Heo   block,blkcg: use ...
1054
  	INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN);
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1055
  	INIT_HLIST_HEAD(&blkcg->blkg_list);
52ebea749   Tejun Heo   writeback: make b...
1056
1057
1058
  #ifdef CONFIG_CGROUP_WRITEBACK
  	INIT_LIST_HEAD(&blkcg->cgwb_list);
  #endif
7876f930d   Tejun Heo   blkcg: implement ...
1059
1060
1061
  	list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
  
  	mutex_unlock(&blkcg_pol_mutex);
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1062
  	return &blkcg->css;
e48453c38   Arianna Avanzini   block, cgroup: im...
1063
1064
1065
  
  free_pd_blkcg:
  	for (i--; i >= 0; i--)
e4a9bde95   Tejun Heo   blkcg: replace bl...
1066
1067
  		if (blkcg->cpd[i])
  			blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
4c18c9e96   weiping zhang   blkcg: avoid free...
1068
1069
1070
1071
  
  	if (blkcg != &blkcg_root)
  		kfree(blkcg);
  unlock:
7876f930d   Tejun Heo   blkcg: implement ...
1072
  	mutex_unlock(&blkcg_pol_mutex);
e48453c38   Arianna Avanzini   block, cgroup: im...
1073
  	return ret;
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1074
  }
4308a434e   Tejun Heo   blkcg: don't offl...
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
  static int blkcg_css_online(struct cgroup_subsys_state *css)
  {
  	struct blkcg *blkcg = css_to_blkcg(css);
  	struct blkcg *parent = blkcg_parent(blkcg);
  
  	/*
  	 * blkcg_pin_online() is used to delay blkcg offline so that blkgs
  	 * don't go offline while cgwbs are still active on them.  Pin the
  	 * parent so that offline always happens towards the root.
  	 */
  	if (parent)
  		blkcg_pin_online(parent);
  	return 0;
  }
5efd61135   Tejun Heo   blkcg: add blkcg_...
1089
1090
1091
1092
  /**
   * blkcg_init_queue - initialize blkcg part of request queue
   * @q: request_queue to initialize
   *
c62b37d96   Christoph Hellwig   block: move ->mak...
1093
   * Called from blk_alloc_queue(). Responsible for initializing blkcg
5efd61135   Tejun Heo   blkcg: add blkcg_...
1094
1095
1096
1097
1098
1099
1100
   * part of new request_queue @q.
   *
   * RETURNS:
   * 0 on success, -errno on failure.
   */
  int blkcg_init_queue(struct request_queue *q)
  {
d708f0d50   Jens Axboe   Revert "blkcg: al...
1101
1102
  	struct blkcg_gq *new_blkg, *blkg;
  	bool preloaded;
ec13b1d6f   Tejun Heo   blkcg: always cre...
1103
  	int ret;
d708f0d50   Jens Axboe   Revert "blkcg: al...
1104
1105
1106
1107
1108
  	new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
  	if (!new_blkg)
  		return -ENOMEM;
  
  	preloaded = !radix_tree_preload(GFP_KERNEL);
bea548831   Jiang Biao   blkcg: small fix ...
1109
  	/* Make sure the root blkg exists. */
ec13b1d6f   Tejun Heo   blkcg: always cre...
1110
  	rcu_read_lock();
0d945c1f9   Christoph Hellwig   block: remove the...
1111
  	spin_lock_irq(&q->queue_lock);
d708f0d50   Jens Axboe   Revert "blkcg: al...
1112
  	blkg = blkg_create(&blkcg_root, q, new_blkg);
901932a3f   Jiang Biao   blkcg: init root ...
1113
1114
1115
  	if (IS_ERR(blkg))
  		goto err_unlock;
  	q->root_blkg = blkg;
0d945c1f9   Christoph Hellwig   block: remove the...
1116
  	spin_unlock_irq(&q->queue_lock);
ec13b1d6f   Tejun Heo   blkcg: always cre...
1117
  	rcu_read_unlock();
d708f0d50   Jens Axboe   Revert "blkcg: al...
1118
1119
  	if (preloaded)
  		radix_tree_preload_end();
27029b4b1   Yufen Yu   blkcg: fix memlea...
1120
  	ret = blk_throtl_init(q);
04be60b5e   Christoph Hellwig   blk-cgroup: conso...
1121
1122
  	if (ret)
  		goto err_destroy_all;
d70675121   Josef Bacik   block: introduce ...
1123

27029b4b1   Yufen Yu   blkcg: fix memlea...
1124
1125
1126
  	ret = blk_iolatency_init(q);
  	if (ret) {
  		blk_throtl_exit(q);
04be60b5e   Christoph Hellwig   blk-cgroup: conso...
1127
  		goto err_destroy_all;
27029b4b1   Yufen Yu   blkcg: fix memlea...
1128
  	}
04be60b5e   Christoph Hellwig   blk-cgroup: conso...
1129
  	return 0;
901932a3f   Jiang Biao   blkcg: init root ...
1130

04be60b5e   Christoph Hellwig   blk-cgroup: conso...
1131
  err_destroy_all:
04be60b5e   Christoph Hellwig   blk-cgroup: conso...
1132
  	blkg_destroy_all(q);
04be60b5e   Christoph Hellwig   blk-cgroup: conso...
1133
  	return ret;
901932a3f   Jiang Biao   blkcg: init root ...
1134
  err_unlock:
0d945c1f9   Christoph Hellwig   block: remove the...
1135
  	spin_unlock_irq(&q->queue_lock);
901932a3f   Jiang Biao   blkcg: init root ...
1136
1137
1138
1139
  	rcu_read_unlock();
  	if (preloaded)
  		radix_tree_preload_end();
  	return PTR_ERR(blkg);
5efd61135   Tejun Heo   blkcg: add blkcg_...
1140
1141
1142
  }
  
  /**
5efd61135   Tejun Heo   blkcg: add blkcg_...
1143
1144
1145
   * blkcg_exit_queue - exit and release blkcg part of request_queue
   * @q: request_queue being released
   *
7585d5082   Marcos Paulo de Souza   blk-cgroup: Fix d...
1146
   * Called from blk_exit_queue().  Responsible for exiting blkcg part.
5efd61135   Tejun Heo   blkcg: add blkcg_...
1147
1148
1149
   */
  void blkcg_exit_queue(struct request_queue *q)
  {
3c96cb32d   Tejun Heo   blkcg: drop stuff...
1150
  	blkg_destroy_all(q);
5efd61135   Tejun Heo   blkcg: add blkcg_...
1151
1152
  	blk_throtl_exit(q);
  }
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1153
1154
1155
1156
1157
1158
  /*
   * We cannot support shared io contexts, as we have no mean to support
   * two tasks with the same ioc in two different groups without major rework
   * of the main cic data structures.  For now we allow a task to change
   * its cgroup only if it's the only owner of its ioc.
   */
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1159
  static int blkcg_can_attach(struct cgroup_taskset *tset)
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1160
  {
bb9d97b6d   Tejun Heo   cgroup: don't use...
1161
  	struct task_struct *task;
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1162
  	struct cgroup_subsys_state *dst_css;
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1163
1164
1165
1166
  	struct io_context *ioc;
  	int ret = 0;
  
  	/* task_lock() is needed to avoid races with exit_io_context() */
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1167
  	cgroup_taskset_for_each(task, dst_css, tset) {
bb9d97b6d   Tejun Heo   cgroup: don't use...
1168
1169
1170
1171
1172
1173
1174
1175
  		task_lock(task);
  		ioc = task->io_context;
  		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
  			ret = -EINVAL;
  		task_unlock(task);
  		if (ret)
  			break;
  	}
31e4c28d9   Vivek Goyal   blkio: Introduce ...
1176
1177
  	return ret;
  }
69d7fde59   Tejun Heo   blkcg: use CGROUP...
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
  static void blkcg_bind(struct cgroup_subsys_state *root_css)
  {
  	int i;
  
  	mutex_lock(&blkcg_pol_mutex);
  
  	for (i = 0; i < BLKCG_MAX_POLS; i++) {
  		struct blkcg_policy *pol = blkcg_policy[i];
  		struct blkcg *blkcg;
  
  		if (!pol || !pol->cpd_bind_fn)
  			continue;
  
  		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node)
  			if (blkcg->cpd[pol->plid])
  				pol->cpd_bind_fn(blkcg->cpd[pol->plid]);
  	}
  	mutex_unlock(&blkcg_pol_mutex);
  }
d09d8df3a   Josef Bacik   blkcg: add generi...
1197
1198
1199
1200
1201
1202
  static void blkcg_exit(struct task_struct *tsk)
  {
  	if (tsk->throttle_queue)
  		blk_put_queue(tsk->throttle_queue);
  	tsk->throttle_queue = NULL;
  }
c165b3e3c   Tejun Heo   blkcg: rename sub...
1203
  struct cgroup_subsys io_cgrp_subsys = {
92fb97487   Tejun Heo   cgroup: rename ->...
1204
  	.css_alloc = blkcg_css_alloc,
4308a434e   Tejun Heo   blkcg: don't offl...
1205
  	.css_online = blkcg_css_online,
92fb97487   Tejun Heo   cgroup: rename ->...
1206
1207
  	.css_offline = blkcg_css_offline,
  	.css_free = blkcg_css_free,
3c798398e   Tejun Heo   blkcg: mass renam...
1208
  	.can_attach = blkcg_can_attach,
f73316482   Tejun Heo   blk-cgroup: reimp...
1209
  	.css_rstat_flush = blkcg_rstat_flush,
69d7fde59   Tejun Heo   blkcg: use CGROUP...
1210
  	.bind = blkcg_bind,
2ee867dcf   Tejun Heo   blkcg: implement ...
1211
  	.dfl_cftypes = blkcg_files,
880f50e22   Tejun Heo   blkcg: mark exist...
1212
  	.legacy_cftypes = blkcg_legacy_files,
c165b3e3c   Tejun Heo   blkcg: rename sub...
1213
  	.legacy_name = "blkio",
d09d8df3a   Josef Bacik   blkcg: add generi...
1214
  	.exit = blkcg_exit,
1ced953b1   Tejun Heo   blkcg, memcg: mak...
1215
1216
1217
1218
1219
1220
1221
1222
  #ifdef CONFIG_MEMCG
  	/*
  	 * This ensures that, if available, memcg is automatically enabled
  	 * together on the default hierarchy so that the owner cgroup can
  	 * be retrieved from writeback pages.
  	 */
  	.depends_on = 1 << memory_cgrp_id,
  #endif
676f7c8f8   Tejun Heo   cgroup: relocate ...
1223
  };
c165b3e3c   Tejun Heo   blkcg: rename sub...
1224
  EXPORT_SYMBOL_GPL(io_cgrp_subsys);
676f7c8f8   Tejun Heo   cgroup: relocate ...
1225

8bd435b30   Tejun Heo   blkcg: remove sta...
1226
  /**
a2b1693ba   Tejun Heo   blkcg: implement ...
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
   * blkcg_activate_policy - activate a blkcg policy on a request_queue
   * @q: request_queue of interest
   * @pol: blkcg policy to activate
   *
   * Activate @pol on @q.  Requires %GFP_KERNEL context.  @q goes through
   * bypass mode to populate its blkgs with policy_data for @pol.
   *
   * Activation happens with @q bypassed, so nobody would be accessing blkgs
   * from IO path.  Update of each blkg is protected by both queue and blkcg
   * locks so that holding either lock and testing blkcg_policy_enabled() is
   * always enough for dereferencing policy data.
   *
   * The caller is responsible for synchronizing [de]activations and policy
   * [un]registerations.  Returns 0 on success, -errno on failure.
   */
  int blkcg_activate_policy(struct request_queue *q,
3c798398e   Tejun Heo   blkcg: mass renam...
1243
  			  const struct blkcg_policy *pol)
a2b1693ba   Tejun Heo   blkcg: implement ...
1244
  {
4c55f4f9a   Tejun Heo   blkcg: restructur...
1245
  	struct blkg_policy_data *pd_prealloc = NULL;
9d179b865   Tejun Heo   blkcg: Fix multip...
1246
  	struct blkcg_gq *blkg, *pinned_blkg = NULL;
4c55f4f9a   Tejun Heo   blkcg: restructur...
1247
  	int ret;
a2b1693ba   Tejun Heo   blkcg: implement ...
1248
1249
1250
  
  	if (blkcg_policy_enabled(q, pol))
  		return 0;
344e9ffcb   Jens Axboe   block: add queue_...
1251
  	if (queue_is_mq(q))
bd166ef18   Jens Axboe   blk-mq-sched: add...
1252
  		blk_mq_freeze_queue(q);
9d179b865   Tejun Heo   blkcg: Fix multip...
1253
  retry:
0d945c1f9   Christoph Hellwig   block: remove the...
1254
  	spin_lock_irq(&q->queue_lock);
a2b1693ba   Tejun Heo   blkcg: implement ...
1255

9d179b865   Tejun Heo   blkcg: Fix multip...
1256
  	/* blkg_list is pushed at the head, reverse walk to allocate parents first */
71c814077   Tejun Heo   blkcg: blkcg_acti...
1257
  	list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
4c55f4f9a   Tejun Heo   blkcg: restructur...
1258
1259
1260
1261
  		struct blkg_policy_data *pd;
  
  		if (blkg->pd[pol->plid])
  			continue;
a2b1693ba   Tejun Heo   blkcg: implement ...
1262

9d179b865   Tejun Heo   blkcg: Fix multip...
1263
1264
1265
1266
1267
1268
1269
1270
  		/* If prealloc matches, use it; otherwise try GFP_NOWAIT */
  		if (blkg == pinned_blkg) {
  			pd = pd_prealloc;
  			pd_prealloc = NULL;
  		} else {
  			pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
  					      blkg->blkcg);
  		}
4c55f4f9a   Tejun Heo   blkcg: restructur...
1271
  		if (!pd) {
9d179b865   Tejun Heo   blkcg: Fix multip...
1272
1273
1274
1275
1276
1277
1278
1279
  			/*
  			 * GFP_NOWAIT failed.  Free the existing one and
  			 * prealloc for @blkg w/ GFP_KERNEL.
  			 */
  			if (pinned_blkg)
  				blkg_put(pinned_blkg);
  			blkg_get(blkg);
  			pinned_blkg = blkg;
0d945c1f9   Christoph Hellwig   block: remove the...
1280
  			spin_unlock_irq(&q->queue_lock);
9d179b865   Tejun Heo   blkcg: Fix multip...
1281
1282
1283
1284
1285
1286
1287
1288
1289
  
  			if (pd_prealloc)
  				pol->pd_free_fn(pd_prealloc);
  			pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
  						       blkg->blkcg);
  			if (pd_prealloc)
  				goto retry;
  			else
  				goto enomem;
4c55f4f9a   Tejun Heo   blkcg: restructur...
1290
  		}
a2b1693ba   Tejun Heo   blkcg: implement ...
1291
1292
1293
  
  		blkg->pd[pol->plid] = pd;
  		pd->blkg = blkg;
b276a876a   Tejun Heo   blkcg: add blkg_p...
1294
  		pd->plid = pol->plid;
a2b1693ba   Tejun Heo   blkcg: implement ...
1295
  	}
9d179b865   Tejun Heo   blkcg: Fix multip...
1296
1297
1298
1299
  	/* all allocated, init in the same order */
  	if (pol->pd_init_fn)
  		list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
  			pol->pd_init_fn(blkg->pd[pol->plid]);
a2b1693ba   Tejun Heo   blkcg: implement ...
1300
1301
  	__set_bit(pol->plid, q->blkcg_pols);
  	ret = 0;
4c55f4f9a   Tejun Heo   blkcg: restructur...
1302

0d945c1f9   Christoph Hellwig   block: remove the...
1303
  	spin_unlock_irq(&q->queue_lock);
9d179b865   Tejun Heo   blkcg: Fix multip...
1304
  out:
344e9ffcb   Jens Axboe   block: add queue_...
1305
  	if (queue_is_mq(q))
bd166ef18   Jens Axboe   blk-mq-sched: add...
1306
  		blk_mq_unfreeze_queue(q);
9d179b865   Tejun Heo   blkcg: Fix multip...
1307
1308
  	if (pinned_blkg)
  		blkg_put(pinned_blkg);
001bea73e   Tejun Heo   blkcg: replace bl...
1309
1310
  	if (pd_prealloc)
  		pol->pd_free_fn(pd_prealloc);
a2b1693ba   Tejun Heo   blkcg: implement ...
1311
  	return ret;
9d179b865   Tejun Heo   blkcg: Fix multip...
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
  
  enomem:
  	/* alloc failed, nothing's initialized yet, free everything */
  	spin_lock_irq(&q->queue_lock);
  	list_for_each_entry(blkg, &q->blkg_list, q_node) {
  		if (blkg->pd[pol->plid]) {
  			pol->pd_free_fn(blkg->pd[pol->plid]);
  			blkg->pd[pol->plid] = NULL;
  		}
  	}
  	spin_unlock_irq(&q->queue_lock);
  	ret = -ENOMEM;
  	goto out;
a2b1693ba   Tejun Heo   blkcg: implement ...
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
  }
  EXPORT_SYMBOL_GPL(blkcg_activate_policy);
  
  /**
   * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
   * @q: request_queue of interest
   * @pol: blkcg policy to deactivate
   *
   * Deactivate @pol on @q.  Follows the same synchronization rules as
   * blkcg_activate_policy().
   */
  void blkcg_deactivate_policy(struct request_queue *q,
3c798398e   Tejun Heo   blkcg: mass renam...
1337
  			     const struct blkcg_policy *pol)
a2b1693ba   Tejun Heo   blkcg: implement ...
1338
  {
3c798398e   Tejun Heo   blkcg: mass renam...
1339
  	struct blkcg_gq *blkg;
a2b1693ba   Tejun Heo   blkcg: implement ...
1340
1341
1342
  
  	if (!blkcg_policy_enabled(q, pol))
  		return;
344e9ffcb   Jens Axboe   block: add queue_...
1343
  	if (queue_is_mq(q))
bd166ef18   Jens Axboe   blk-mq-sched: add...
1344
  		blk_mq_freeze_queue(q);
bd166ef18   Jens Axboe   blk-mq-sched: add...
1345

0d945c1f9   Christoph Hellwig   block: remove the...
1346
  	spin_lock_irq(&q->queue_lock);
a2b1693ba   Tejun Heo   blkcg: implement ...
1347
1348
1349
1350
  
  	__clear_bit(pol->plid, q->blkcg_pols);
  
  	list_for_each_entry(blkg, &q->blkg_list, q_node) {
001bea73e   Tejun Heo   blkcg: replace bl...
1351
  		if (blkg->pd[pol->plid]) {
6b0654620   Dennis Zhou (Facebook)   Revert "blk-throt...
1352
  			if (pol->pd_offline_fn)
a9520cd6f   Tejun Heo   blkcg: make blkcg...
1353
  				pol->pd_offline_fn(blkg->pd[pol->plid]);
001bea73e   Tejun Heo   blkcg: replace bl...
1354
1355
1356
  			pol->pd_free_fn(blkg->pd[pol->plid]);
  			blkg->pd[pol->plid] = NULL;
  		}
a2b1693ba   Tejun Heo   blkcg: implement ...
1357
  	}
0d945c1f9   Christoph Hellwig   block: remove the...
1358
  	spin_unlock_irq(&q->queue_lock);
bd166ef18   Jens Axboe   blk-mq-sched: add...
1359

344e9ffcb   Jens Axboe   block: add queue_...
1360
  	if (queue_is_mq(q))
bd166ef18   Jens Axboe   blk-mq-sched: add...
1361
  		blk_mq_unfreeze_queue(q);
a2b1693ba   Tejun Heo   blkcg: implement ...
1362
1363
1364
1365
  }
  EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
  
  /**
3c798398e   Tejun Heo   blkcg: mass renam...
1366
1367
   * blkcg_policy_register - register a blkcg policy
   * @pol: blkcg policy to register
8bd435b30   Tejun Heo   blkcg: remove sta...
1368
   *
3c798398e   Tejun Heo   blkcg: mass renam...
1369
1370
   * Register @pol with blkcg core.  Might sleep and @pol may be modified on
   * successful registration.  Returns 0 on success and -errno on failure.
8bd435b30   Tejun Heo   blkcg: remove sta...
1371
   */
d5bf02914   Jens Axboe   Revert "block: ad...
1372
  int blkcg_policy_register(struct blkcg_policy *pol)
3e2520668   Vivek Goyal   blkio: Implement ...
1373
  {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1374
  	struct blkcg *blkcg;
8bd435b30   Tejun Heo   blkcg: remove sta...
1375
  	int i, ret;
e8989fae3   Tejun Heo   blkcg: unify blkg...
1376

838f13bf4   Tejun Heo   blkcg: allow blkc...
1377
  	mutex_lock(&blkcg_pol_register_mutex);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
1378
  	mutex_lock(&blkcg_pol_mutex);
8bd435b30   Tejun Heo   blkcg: remove sta...
1379
1380
1381
  	/* find an empty slot */
  	ret = -ENOSPC;
  	for (i = 0; i < BLKCG_MAX_POLS; i++)
3c798398e   Tejun Heo   blkcg: mass renam...
1382
  		if (!blkcg_policy[i])
8bd435b30   Tejun Heo   blkcg: remove sta...
1383
  			break;
01c5f85ae   Jens Axboe   blk-cgroup: incre...
1384
1385
1386
  	if (i >= BLKCG_MAX_POLS) {
  		pr_warn("blkcg_policy_register: BLKCG_MAX_POLS too small
  ");
838f13bf4   Tejun Heo   blkcg: allow blkc...
1387
  		goto err_unlock;
01c5f85ae   Jens Axboe   blk-cgroup: incre...
1388
  	}
035d10b2f   Tejun Heo   blkcg: add blkio_...
1389

e84010732   weiping zhang   blkcg: add sanity...
1390
1391
1392
1393
  	/* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */
  	if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) ||
  		(!pol->pd_alloc_fn ^ !pol->pd_free_fn))
  		goto err_unlock;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1394
  	/* register @pol */
3c798398e   Tejun Heo   blkcg: mass renam...
1395
  	pol->plid = i;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1396
1397
1398
  	blkcg_policy[pol->plid] = pol;
  
  	/* allocate and install cpd's */
e4a9bde95   Tejun Heo   blkcg: replace bl...
1399
  	if (pol->cpd_alloc_fn) {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1400
1401
  		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
  			struct blkcg_policy_data *cpd;
e4a9bde95   Tejun Heo   blkcg: replace bl...
1402
  			cpd = pol->cpd_alloc_fn(GFP_KERNEL);
bbb427e34   Bart Van Assche   blkcg: Unlock blk...
1403
  			if (!cpd)
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1404
  				goto err_free_cpds;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1405

814376483   Tejun Heo   blkcg: minor upda...
1406
1407
  			blkcg->cpd[pol->plid] = cpd;
  			cpd->blkcg = blkcg;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1408
  			cpd->plid = pol->plid;
86a5bba5c   Tejun Heo   blkcg: make ->cpd...
1409
1410
  			if (pol->cpd_init_fn)
  				pol->cpd_init_fn(cpd);
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1411
1412
  		}
  	}
838f13bf4   Tejun Heo   blkcg: allow blkc...
1413
  	mutex_unlock(&blkcg_pol_mutex);
8bd435b30   Tejun Heo   blkcg: remove sta...
1414

8bd435b30   Tejun Heo   blkcg: remove sta...
1415
  	/* everything is in place, add intf files for the new policy */
2ee867dcf   Tejun Heo   blkcg: implement ...
1416
1417
1418
  	if (pol->dfl_cftypes)
  		WARN_ON(cgroup_add_dfl_cftypes(&io_cgrp_subsys,
  					       pol->dfl_cftypes));
880f50e22   Tejun Heo   blkcg: mark exist...
1419
  	if (pol->legacy_cftypes)
c165b3e3c   Tejun Heo   blkcg: rename sub...
1420
  		WARN_ON(cgroup_add_legacy_cftypes(&io_cgrp_subsys,
880f50e22   Tejun Heo   blkcg: mark exist...
1421
  						  pol->legacy_cftypes));
838f13bf4   Tejun Heo   blkcg: allow blkc...
1422
1423
  	mutex_unlock(&blkcg_pol_register_mutex);
  	return 0;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1424
  err_free_cpds:
58a9edce0   weiping zhang   blkcg: check pol-...
1425
  	if (pol->cpd_free_fn) {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1426
  		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
e4a9bde95   Tejun Heo   blkcg: replace bl...
1427
1428
1429
1430
  			if (blkcg->cpd[pol->plid]) {
  				pol->cpd_free_fn(blkcg->cpd[pol->plid]);
  				blkcg->cpd[pol->plid] = NULL;
  			}
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1431
1432
1433
  		}
  	}
  	blkcg_policy[pol->plid] = NULL;
838f13bf4   Tejun Heo   blkcg: allow blkc...
1434
  err_unlock:
bc0d6501a   Tejun Heo   blkcg: kill blkio...
1435
  	mutex_unlock(&blkcg_pol_mutex);
838f13bf4   Tejun Heo   blkcg: allow blkc...
1436
  	mutex_unlock(&blkcg_pol_register_mutex);
8bd435b30   Tejun Heo   blkcg: remove sta...
1437
  	return ret;
3e2520668   Vivek Goyal   blkio: Implement ...
1438
  }
3c798398e   Tejun Heo   blkcg: mass renam...
1439
  EXPORT_SYMBOL_GPL(blkcg_policy_register);
3e2520668   Vivek Goyal   blkio: Implement ...
1440

8bd435b30   Tejun Heo   blkcg: remove sta...
1441
  /**
3c798398e   Tejun Heo   blkcg: mass renam...
1442
1443
   * blkcg_policy_unregister - unregister a blkcg policy
   * @pol: blkcg policy to unregister
8bd435b30   Tejun Heo   blkcg: remove sta...
1444
   *
3c798398e   Tejun Heo   blkcg: mass renam...
1445
   * Undo blkcg_policy_register(@pol).  Might sleep.
8bd435b30   Tejun Heo   blkcg: remove sta...
1446
   */
3c798398e   Tejun Heo   blkcg: mass renam...
1447
  void blkcg_policy_unregister(struct blkcg_policy *pol)
3e2520668   Vivek Goyal   blkio: Implement ...
1448
  {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1449
  	struct blkcg *blkcg;
838f13bf4   Tejun Heo   blkcg: allow blkc...
1450
  	mutex_lock(&blkcg_pol_register_mutex);
bc0d6501a   Tejun Heo   blkcg: kill blkio...
1451

3c798398e   Tejun Heo   blkcg: mass renam...
1452
  	if (WARN_ON(blkcg_policy[pol->plid] != pol))
8bd435b30   Tejun Heo   blkcg: remove sta...
1453
1454
1455
  		goto out_unlock;
  
  	/* kill the intf files first */
2ee867dcf   Tejun Heo   blkcg: implement ...
1456
1457
  	if (pol->dfl_cftypes)
  		cgroup_rm_cftypes(pol->dfl_cftypes);
880f50e22   Tejun Heo   blkcg: mark exist...
1458
1459
  	if (pol->legacy_cftypes)
  		cgroup_rm_cftypes(pol->legacy_cftypes);
44ea53de4   Tejun Heo   blkcg: implement ...
1460

06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1461
  	/* remove cpds and unregister */
838f13bf4   Tejun Heo   blkcg: allow blkc...
1462
  	mutex_lock(&blkcg_pol_mutex);
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1463

58a9edce0   weiping zhang   blkcg: check pol-...
1464
  	if (pol->cpd_free_fn) {
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1465
  		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
e4a9bde95   Tejun Heo   blkcg: replace bl...
1466
1467
1468
1469
  			if (blkcg->cpd[pol->plid]) {
  				pol->cpd_free_fn(blkcg->cpd[pol->plid]);
  				blkcg->cpd[pol->plid] = NULL;
  			}
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1470
1471
  		}
  	}
3c798398e   Tejun Heo   blkcg: mass renam...
1472
  	blkcg_policy[pol->plid] = NULL;
06b285bd1   Tejun Heo   blkcg: fix blkcg_...
1473

bc0d6501a   Tejun Heo   blkcg: kill blkio...
1474
  	mutex_unlock(&blkcg_pol_mutex);
838f13bf4   Tejun Heo   blkcg: allow blkc...
1475
1476
  out_unlock:
  	mutex_unlock(&blkcg_pol_register_mutex);
3e2520668   Vivek Goyal   blkio: Implement ...
1477
  }
3c798398e   Tejun Heo   blkcg: mass renam...
1478
  EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
903d23f0a   Josef Bacik   blk-cgroup: allow...
1479

d3f77dfdc   Tejun Heo   blkcg: implement ...
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
  bool __blkcg_punt_bio_submit(struct bio *bio)
  {
  	struct blkcg_gq *blkg = bio->bi_blkg;
  
  	/* consume the flag first */
  	bio->bi_opf &= ~REQ_CGROUP_PUNT;
  
  	/* never bounce for the root cgroup */
  	if (!blkg->parent)
  		return false;
  
  	spin_lock_bh(&blkg->async_bio_lock);
  	bio_list_add(&blkg->async_bios, bio);
  	spin_unlock_bh(&blkg->async_bio_lock);
  
  	queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
  	return true;
  }
d09d8df3a   Josef Bacik   blkcg: add generi...
1498
1499
1500
1501
1502
1503
1504
1505
1506
  /*
   * Scale the accumulated delay based on how long it has been since we updated
   * the delay.  We only call this when we are adding delay, in case it's been a
   * while since we added delay, and when we are checking to see if we need to
   * delay a task, to account for any delays that may have occurred.
   */
  static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
  {
  	u64 old = atomic64_read(&blkg->delay_start);
54c52e10d   Tejun Heo   blk-iocost: switc...
1507
1508
1509
  	/* negative use_delay means no scaling, see blkcg_set_delay() */
  	if (atomic_read(&blkg->use_delay) < 0)
  		return;
d09d8df3a   Josef Bacik   blkcg: add generi...
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
  	/*
  	 * We only want to scale down every second.  The idea here is that we
  	 * want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
  	 * time window.  We only want to throttle tasks for recent delay that
  	 * has occurred, in 1 second time windows since that's the maximum
  	 * things can be throttled.  We save the current delay window in
  	 * blkg->last_delay so we know what amount is still left to be charged
  	 * to the blkg from this point onward.  blkg->last_use keeps track of
  	 * the use_delay counter.  The idea is if we're unthrottling the blkg we
  	 * are ok with whatever is happening now, and we can take away more of
  	 * the accumulated delay as we've already throttled enough that
  	 * everybody is happy with their IO latencies.
  	 */
  	if (time_before64(old + NSEC_PER_SEC, now) &&
  	    atomic64_cmpxchg(&blkg->delay_start, old, now) == old) {
  		u64 cur = atomic64_read(&blkg->delay_nsec);
  		u64 sub = min_t(u64, blkg->last_delay, now - old);
  		int cur_use = atomic_read(&blkg->use_delay);
  
  		/*
  		 * We've been unthrottled, subtract a larger chunk of our
  		 * accumulated delay.
  		 */
  		if (cur_use < blkg->last_use)
  			sub = max_t(u64, sub, blkg->last_delay >> 1);
  
  		/*
  		 * This shouldn't happen, but handle it anyway.  Our delay_nsec
  		 * should only ever be growing except here where we subtract out
  		 * min(last_delay, 1 second), but lord knows bugs happen and I'd
  		 * rather not end up with negative numbers.
  		 */
  		if (unlikely(cur < sub)) {
  			atomic64_set(&blkg->delay_nsec, 0);
  			blkg->last_delay = 0;
  		} else {
  			atomic64_sub(sub, &blkg->delay_nsec);
  			blkg->last_delay = cur - sub;
  		}
  		blkg->last_use = cur_use;
  	}
  }
  
  /*
   * This is called when we want to actually walk up the hierarchy and check to
   * see if we need to throttle, and then actually throttle if there is some
   * accumulated delay.  This should only be called upon return to user space so
   * we're not holding some lock that would induce a priority inversion.
   */
  static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
  {
fd112c746   Josef Bacik   blk-cgroup: turn ...
1561
  	unsigned long pflags;
5160a5a53   Tejun Heo   blk-iocost: imple...
1562
  	bool clamp;
d09d8df3a   Josef Bacik   blkcg: add generi...
1563
1564
1565
1566
1567
1568
  	u64 now = ktime_to_ns(ktime_get());
  	u64 exp;
  	u64 delay_nsec = 0;
  	int tok;
  
  	while (blkg->parent) {
5160a5a53   Tejun Heo   blk-iocost: imple...
1569
1570
1571
1572
  		int use_delay = atomic_read(&blkg->use_delay);
  
  		if (use_delay) {
  			u64 this_delay;
d09d8df3a   Josef Bacik   blkcg: add generi...
1573
  			blkcg_scale_delay(blkg, now);
5160a5a53   Tejun Heo   blk-iocost: imple...
1574
1575
1576
1577
1578
  			this_delay = atomic64_read(&blkg->delay_nsec);
  			if (this_delay > delay_nsec) {
  				delay_nsec = this_delay;
  				clamp = use_delay > 0;
  			}
d09d8df3a   Josef Bacik   blkcg: add generi...
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
  		}
  		blkg = blkg->parent;
  	}
  
  	if (!delay_nsec)
  		return;
  
  	/*
  	 * Let's not sleep for all eternity if we've amassed a huge delay.
  	 * Swapping or metadata IO can accumulate 10's of seconds worth of
  	 * delay, and we want userspace to be able to do _something_ so cap the
5160a5a53   Tejun Heo   blk-iocost: imple...
1590
1591
1592
1593
  	 * delays at 0.25s. If there's 10's of seconds worth of delay then the
  	 * tasks will be delayed for 0.25 second for every syscall. If
  	 * blkcg_set_delay() was used as indicated by negative use_delay, the
  	 * caller is responsible for regulating the range.
d09d8df3a   Josef Bacik   blkcg: add generi...
1594
  	 */
5160a5a53   Tejun Heo   blk-iocost: imple...
1595
1596
  	if (clamp)
  		delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);
d09d8df3a   Josef Bacik   blkcg: add generi...
1597

fd112c746   Josef Bacik   blk-cgroup: turn ...
1598
1599
  	if (use_memdelay)
  		psi_memstall_enter(&pflags);
d09d8df3a   Josef Bacik   blkcg: add generi...
1600
1601
1602
1603
1604
1605
1606
1607
1608
  
  	exp = ktime_add_ns(now, delay_nsec);
  	tok = io_schedule_prepare();
  	do {
  		__set_current_state(TASK_KILLABLE);
  		if (!schedule_hrtimeout(&exp, HRTIMER_MODE_ABS))
  			break;
  	} while (!fatal_signal_pending(current));
  	io_schedule_finish(tok);
fd112c746   Josef Bacik   blk-cgroup: turn ...
1609
1610
1611
  
  	if (use_memdelay)
  		psi_memstall_leave(&pflags);
d09d8df3a   Josef Bacik   blkcg: add generi...
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
  }
  
  /**
   * blkcg_maybe_throttle_current - throttle the current task if it has been marked
   *
   * This is only called if we've been marked with set_notify_resume().  Obviously
   * we can be set_notify_resume() for reasons other than blkcg throttling, so we
   * check to see if current->throttle_queue is set and if not this doesn't do
   * anything.  This should only ever be called by the resume code, it's not meant
   * to be called by people willy-nilly as it will actually do the work to
   * throttle the task if it is setup for throttling.
   */
  void blkcg_maybe_throttle_current(void)
  {
  	struct request_queue *q = current->throttle_queue;
  	struct cgroup_subsys_state *css;
  	struct blkcg *blkcg;
  	struct blkcg_gq *blkg;
  	bool use_memdelay = current->use_memdelay;
  
  	if (!q)
  		return;
  
  	current->throttle_queue = NULL;
  	current->use_memdelay = false;
  
  	rcu_read_lock();
  	css = kthread_blkcg();
  	if (css)
  		blkcg = css_to_blkcg(css);
  	else
  		blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
  
  	if (!blkcg)
  		goto out;
  	blkg = blkg_lookup(blkcg, q);
  	if (!blkg)
  		goto out;
7754f669f   Dennis Zhou   blkcg: rename blk...
1650
  	if (!blkg_tryget(blkg))
d09d8df3a   Josef Bacik   blkcg: add generi...
1651
1652
  		goto out;
  	rcu_read_unlock();
d09d8df3a   Josef Bacik   blkcg: add generi...
1653
1654
1655
  
  	blkcg_maybe_throttle_blkg(blkg, use_memdelay);
  	blkg_put(blkg);
cc7ecc258   Josef Bacik   blk-cgroup: hold ...
1656
  	blk_put_queue(q);
d09d8df3a   Josef Bacik   blkcg: add generi...
1657
1658
1659
1660
1661
  	return;
  out:
  	rcu_read_unlock();
  	blk_put_queue(q);
  }
d09d8df3a   Josef Bacik   blkcg: add generi...
1662
1663
1664
  
  /**
   * blkcg_schedule_throttle - this task needs to check for throttling
537d71b3f   Bart Van Assche   blkcg: Fix kernel...
1665
1666
   * @q: the request queue IO was submitted on
   * @use_memdelay: do we charge this to memory delay for PSI
d09d8df3a   Josef Bacik   blkcg: add generi...
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
   *
   * This is called by the IO controller when we know there's delay accumulated
   * for the blkg for this task.  We do not pass the blkg because there are places
   * we call this that may not have that information, the swapping code for
   * instance will only have a request_queue at that point.  This set's the
   * notify_resume for the task to check and see if it requires throttling before
   * returning to user space.
   *
   * We will only schedule once per syscall.  You can call this over and over
   * again and it will only do the check once upon return to user space, and only
   * throttle once.  If the task needs to be throttled again it'll need to be
   * re-set at the next time we see the task.
   */
  void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
  {
  	if (unlikely(current->flags & PF_KTHREAD))
  		return;
  
  	if (!blk_get_queue(q))
  		return;
  
  	if (current->throttle_queue)
  		blk_put_queue(current->throttle_queue);
  	current->throttle_queue = q;
  	if (use_memdelay)
  		current->use_memdelay = use_memdelay;
  	set_notify_resume(current);
  }
d09d8df3a   Josef Bacik   blkcg: add generi...
1695
1696
1697
  
  /**
   * blkcg_add_delay - add delay to this blkg
537d71b3f   Bart Van Assche   blkcg: Fix kernel...
1698
1699
1700
   * @blkg: blkg of interest
   * @now: the current time in nanoseconds
   * @delta: how many nanoseconds of delay to add
d09d8df3a   Josef Bacik   blkcg: add generi...
1701
1702
1703
1704
1705
1706
   *
   * Charge @delta to the blkg's current delay accumulation.  This is used to
   * throttle tasks if an IO controller thinks we need more throttling.
   */
  void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
  {
54c52e10d   Tejun Heo   blk-iocost: switc...
1707
1708
  	if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
  		return;
d09d8df3a   Josef Bacik   blkcg: add generi...
1709
1710
1711
  	blkcg_scale_delay(blkg, now);
  	atomic64_add(delta, &blkg->delay_nsec);
  }
d09d8df3a   Josef Bacik   blkcg: add generi...
1712

28fc591ff   Christoph Hellwig   block: move the b...
1713
1714
  /**
   * blkg_tryget_closest - try and get a blkg ref on the closet blkg
13c7863d4   Christoph Hellwig   block: move the i...
1715
1716
   * @bio: target bio
   * @css: target css
28fc591ff   Christoph Hellwig   block: move the b...
1717
   *
13c7863d4   Christoph Hellwig   block: move the i...
1718
1719
1720
   * As the failure mode here is to walk up the blkg tree, this ensure that the
   * blkg->parent pointers are always valid.  This returns the blkg that it ended
   * up taking a reference on or %NULL if no reference was taken.
28fc591ff   Christoph Hellwig   block: move the b...
1721
   */
13c7863d4   Christoph Hellwig   block: move the i...
1722
1723
  static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
  		struct cgroup_subsys_state *css)
28fc591ff   Christoph Hellwig   block: move the b...
1724
  {
13c7863d4   Christoph Hellwig   block: move the i...
1725
  	struct blkcg_gq *blkg, *ret_blkg = NULL;
28fc591ff   Christoph Hellwig   block: move the b...
1726

13c7863d4   Christoph Hellwig   block: move the i...
1727
1728
  	rcu_read_lock();
  	blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue);
28fc591ff   Christoph Hellwig   block: move the b...
1729
1730
1731
1732
1733
1734
1735
  	while (blkg) {
  		if (blkg_tryget(blkg)) {
  			ret_blkg = blkg;
  			break;
  		}
  		blkg = blkg->parent;
  	}
13c7863d4   Christoph Hellwig   block: move the i...
1736
  	rcu_read_unlock();
28fc591ff   Christoph Hellwig   block: move the b...
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
  
  	return ret_blkg;
  }
  
  /**
   * bio_associate_blkg_from_css - associate a bio with a specified css
   * @bio: target bio
   * @css: target css
   *
   * Associate @bio with the blkg found by combining the css's blkg and the
   * request_queue of the @bio.  An association failure is handled by walking up
   * the blkg tree.  Therefore, the blkg associated can be anything between @blkg
   * and q->root_blkg.  This situation only happens when a cgroup is dying and
   * then the remaining bios will spill to the closest alive blkg.
   *
   * A reference will be taken on the blkg and will be released when @bio is
   * freed.
   */
  void bio_associate_blkg_from_css(struct bio *bio,
  				 struct cgroup_subsys_state *css)
  {
28fc591ff   Christoph Hellwig   block: move the b...
1758
1759
  	if (bio->bi_blkg)
  		blkg_put(bio->bi_blkg);
a5b97526b   Christoph Hellwig   block: bypass blk...
1760
  	if (css && css->parent) {
13c7863d4   Christoph Hellwig   block: move the i...
1761
  		bio->bi_blkg = blkg_tryget_closest(bio, css);
a5b97526b   Christoph Hellwig   block: bypass blk...
1762
  	} else {
13c7863d4   Christoph Hellwig   block: move the i...
1763
1764
  		blkg_get(bio->bi_disk->queue->root_blkg);
  		bio->bi_blkg = bio->bi_disk->queue->root_blkg;
a5b97526b   Christoph Hellwig   block: bypass blk...
1765
  	}
28fc591ff   Christoph Hellwig   block: move the b...
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
  }
  EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
  
  /**
   * bio_associate_blkg - associate a bio with a blkg
   * @bio: target bio
   *
   * Associate @bio with the blkg found from the bio's css and request_queue.
   * If one is not found, bio_lookup_blkg() creates the blkg.  If a blkg is
   * already associated, the css is reused and association redone as the
   * request_queue may have changed.
   */
  void bio_associate_blkg(struct bio *bio)
  {
  	struct cgroup_subsys_state *css;
  
  	rcu_read_lock();
  
  	if (bio->bi_blkg)
  		css = &bio_blkcg(bio)->css;
  	else
  		css = blkcg_css();
  
  	bio_associate_blkg_from_css(bio, css);
  
  	rcu_read_unlock();
  }
  EXPORT_SYMBOL_GPL(bio_associate_blkg);
  
  /**
   * bio_clone_blkg_association - clone blkg association from src to dst bio
   * @dst: destination bio
   * @src: source bio
   */
  void bio_clone_blkg_association(struct bio *dst, struct bio *src)
  {
  	if (src->bi_blkg) {
  		if (dst->bi_blkg)
  			blkg_put(dst->bi_blkg);
  		blkg_get(src->bi_blkg);
  		dst->bi_blkg = src->bi_blkg;
  	}
  }
  EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
db18a53e5   Christoph Hellwig   blk-cgroup: remov...
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
  static int blk_cgroup_io_type(struct bio *bio)
  {
  	if (op_is_discard(bio->bi_opf))
  		return BLKG_IOSTAT_DISCARD;
  	if (op_is_write(bio->bi_opf))
  		return BLKG_IOSTAT_WRITE;
  	return BLKG_IOSTAT_READ;
  }
  
  void blk_cgroup_bio_start(struct bio *bio)
  {
  	int rwd = blk_cgroup_io_type(bio), cpu;
  	struct blkg_iostat_set *bis;
  
  	cpu = get_cpu();
  	bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
  	u64_stats_update_begin(&bis->sync);
  
  	/*
  	 * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
  	 * bio and we would have already accounted for the size of the bio.
  	 */
  	if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
  		bio_set_flag(bio, BIO_CGROUP_ACCT);
0b8cc25d9   Colin Ian King   blk-cgroup: clean...
1834
  		bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
db18a53e5   Christoph Hellwig   blk-cgroup: remov...
1835
1836
1837
1838
1839
1840
1841
1842
  	}
  	bis->cur.ios[rwd]++;
  
  	u64_stats_update_end(&bis->sync);
  	if (cgroup_subsys_on_dfl(io_cgrp_subsys))
  		cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu);
  	put_cpu();
  }
d3f77dfdc   Tejun Heo   blkcg: implement ...
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
  static int __init blkcg_init(void)
  {
  	blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
  					    WQ_MEM_RECLAIM | WQ_FREEZABLE |
  					    WQ_UNBOUND | WQ_SYSFS, 0);
  	if (!blkcg_punt_bio_wq)
  		return -ENOMEM;
  	return 0;
  }
  subsys_initcall(blkcg_init);
903d23f0a   Josef Bacik   blk-cgroup: allow...
1853
1854
  module_param(blkcg_debug_stats, bool, 0644);
  MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");