Commit 2a4fd070ee8561d918a3776388331bb7e92ea59e

Authored by Tejun Heo
1 parent db61367038

blkcg: move bulk of blkcg_gq release operations to the RCU callback

Currently, when the last reference of a blkcg_gq is put, all then
release operations sans the actual freeing happen directly in
blkg_put().  As blkg_put() may be called under queue_lock, all
pd_exit_fn()s may be too.  This makes it impossible for pd_exit_fn()s
to use del_timer_sync() on timers which grab the queue_lock which is
an irq-safe lock due to the deadlock possibility described in the
comment on top of del_timer_sync().

This can be easily avoided by perfoming the release operations in the
RCU callback instead of directly from blkg_put().  This patch moves
the blkcg_gq release operations to the RCU callback.

As this leaves __blkg_release() with only call_rcu() invocation,
blkg_rcu_free() is renamed to __blkg_release_rcu(), exported and
call_rcu() invocation is now done directly from blkg_put() instead of
going through __blkg_release() which is removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>

Showing 2 changed files with 18 additions and 20 deletions Inline Diff

1 /* 1 /*
2 * Common Block IO controller cgroup interface 2 * Common Block IO controller cgroup interface
3 * 3 *
4 * Based on ideas and code from CFQ, CFS and BFQ: 4 * Based on ideas and code from CFQ, CFS and BFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 * 6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> 7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it> 8 * Paolo Valente <paolo.valente@unimore.it>
9 * 9 *
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * Nauman Rafique <nauman@google.com> 11 * Nauman Rafique <nauman@google.com>
12 */ 12 */
13 #include <linux/ioprio.h> 13 #include <linux/ioprio.h>
14 #include <linux/kdev_t.h> 14 #include <linux/kdev_t.h>
15 #include <linux/module.h> 15 #include <linux/module.h>
16 #include <linux/err.h> 16 #include <linux/err.h>
17 #include <linux/blkdev.h> 17 #include <linux/blkdev.h>
18 #include <linux/slab.h> 18 #include <linux/slab.h>
19 #include <linux/genhd.h> 19 #include <linux/genhd.h>
20 #include <linux/delay.h> 20 #include <linux/delay.h>
21 #include <linux/atomic.h> 21 #include <linux/atomic.h>
22 #include "blk-cgroup.h" 22 #include "blk-cgroup.h"
23 #include "blk.h" 23 #include "blk.h"
24 24
25 #define MAX_KEY_LEN 100 25 #define MAX_KEY_LEN 100
26 26
27 static DEFINE_MUTEX(blkcg_pol_mutex); 27 static DEFINE_MUTEX(blkcg_pol_mutex);
28 28
29 struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, 29 struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT,
30 .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; 30 .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
31 EXPORT_SYMBOL_GPL(blkcg_root); 31 EXPORT_SYMBOL_GPL(blkcg_root);
32 32
33 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; 33 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
34 34
35 static bool blkcg_policy_enabled(struct request_queue *q, 35 static bool blkcg_policy_enabled(struct request_queue *q,
36 const struct blkcg_policy *pol) 36 const struct blkcg_policy *pol)
37 { 37 {
38 return pol && test_bit(pol->plid, q->blkcg_pols); 38 return pol && test_bit(pol->plid, q->blkcg_pols);
39 } 39 }
40 40
41 /** 41 /**
42 * blkg_free - free a blkg 42 * blkg_free - free a blkg
43 * @blkg: blkg to free 43 * @blkg: blkg to free
44 * 44 *
45 * Free @blkg which may be partially allocated. 45 * Free @blkg which may be partially allocated.
46 */ 46 */
47 static void blkg_free(struct blkcg_gq *blkg) 47 static void blkg_free(struct blkcg_gq *blkg)
48 { 48 {
49 int i; 49 int i;
50 50
51 if (!blkg) 51 if (!blkg)
52 return; 52 return;
53 53
54 for (i = 0; i < BLKCG_MAX_POLS; i++) 54 for (i = 0; i < BLKCG_MAX_POLS; i++)
55 kfree(blkg->pd[i]); 55 kfree(blkg->pd[i]);
56 56
57 blk_exit_rl(&blkg->rl); 57 blk_exit_rl(&blkg->rl);
58 kfree(blkg); 58 kfree(blkg);
59 } 59 }
60 60
61 /** 61 /**
62 * blkg_alloc - allocate a blkg 62 * blkg_alloc - allocate a blkg
63 * @blkcg: block cgroup the new blkg is associated with 63 * @blkcg: block cgroup the new blkg is associated with
64 * @q: request_queue the new blkg is associated with 64 * @q: request_queue the new blkg is associated with
65 * @gfp_mask: allocation mask to use 65 * @gfp_mask: allocation mask to use
66 * 66 *
67 * Allocate a new blkg assocating @blkcg and @q. 67 * Allocate a new blkg assocating @blkcg and @q.
68 */ 68 */
69 static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, 69 static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
70 gfp_t gfp_mask) 70 gfp_t gfp_mask)
71 { 71 {
72 struct blkcg_gq *blkg; 72 struct blkcg_gq *blkg;
73 int i; 73 int i;
74 74
75 /* alloc and init base part */ 75 /* alloc and init base part */
76 blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); 76 blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
77 if (!blkg) 77 if (!blkg)
78 return NULL; 78 return NULL;
79 79
80 blkg->q = q; 80 blkg->q = q;
81 INIT_LIST_HEAD(&blkg->q_node); 81 INIT_LIST_HEAD(&blkg->q_node);
82 blkg->blkcg = blkcg; 82 blkg->blkcg = blkcg;
83 blkg->refcnt = 1; 83 blkg->refcnt = 1;
84 84
85 /* root blkg uses @q->root_rl, init rl only for !root blkgs */ 85 /* root blkg uses @q->root_rl, init rl only for !root blkgs */
86 if (blkcg != &blkcg_root) { 86 if (blkcg != &blkcg_root) {
87 if (blk_init_rl(&blkg->rl, q, gfp_mask)) 87 if (blk_init_rl(&blkg->rl, q, gfp_mask))
88 goto err_free; 88 goto err_free;
89 blkg->rl.blkg = blkg; 89 blkg->rl.blkg = blkg;
90 } 90 }
91 91
92 for (i = 0; i < BLKCG_MAX_POLS; i++) { 92 for (i = 0; i < BLKCG_MAX_POLS; i++) {
93 struct blkcg_policy *pol = blkcg_policy[i]; 93 struct blkcg_policy *pol = blkcg_policy[i];
94 struct blkg_policy_data *pd; 94 struct blkg_policy_data *pd;
95 95
96 if (!blkcg_policy_enabled(q, pol)) 96 if (!blkcg_policy_enabled(q, pol))
97 continue; 97 continue;
98 98
99 /* alloc per-policy data and attach it to blkg */ 99 /* alloc per-policy data and attach it to blkg */
100 pd = kzalloc_node(pol->pd_size, gfp_mask, q->node); 100 pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
101 if (!pd) 101 if (!pd)
102 goto err_free; 102 goto err_free;
103 103
104 blkg->pd[i] = pd; 104 blkg->pd[i] = pd;
105 pd->blkg = blkg; 105 pd->blkg = blkg;
106 pd->plid = i; 106 pd->plid = i;
107 } 107 }
108 108
109 return blkg; 109 return blkg;
110 110
111 err_free: 111 err_free:
112 blkg_free(blkg); 112 blkg_free(blkg);
113 return NULL; 113 return NULL;
114 } 114 }
115 115
116 /** 116 /**
117 * __blkg_lookup - internal version of blkg_lookup() 117 * __blkg_lookup - internal version of blkg_lookup()
118 * @blkcg: blkcg of interest 118 * @blkcg: blkcg of interest
119 * @q: request_queue of interest 119 * @q: request_queue of interest
120 * @update_hint: whether to update lookup hint with the result or not 120 * @update_hint: whether to update lookup hint with the result or not
121 * 121 *
122 * This is internal version and shouldn't be used by policy 122 * This is internal version and shouldn't be used by policy
123 * implementations. Looks up blkgs for the @blkcg - @q pair regardless of 123 * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
124 * @q's bypass state. If @update_hint is %true, the caller should be 124 * @q's bypass state. If @update_hint is %true, the caller should be
125 * holding @q->queue_lock and lookup hint is updated on success. 125 * holding @q->queue_lock and lookup hint is updated on success.
126 */ 126 */
127 struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, 127 struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
128 bool update_hint) 128 bool update_hint)
129 { 129 {
130 struct blkcg_gq *blkg; 130 struct blkcg_gq *blkg;
131 131
132 blkg = rcu_dereference(blkcg->blkg_hint); 132 blkg = rcu_dereference(blkcg->blkg_hint);
133 if (blkg && blkg->q == q) 133 if (blkg && blkg->q == q)
134 return blkg; 134 return blkg;
135 135
136 /* 136 /*
137 * Hint didn't match. Look up from the radix tree. Note that the 137 * Hint didn't match. Look up from the radix tree. Note that the
138 * hint can only be updated under queue_lock as otherwise @blkg 138 * hint can only be updated under queue_lock as otherwise @blkg
139 * could have already been removed from blkg_tree. The caller is 139 * could have already been removed from blkg_tree. The caller is
140 * responsible for grabbing queue_lock if @update_hint. 140 * responsible for grabbing queue_lock if @update_hint.
141 */ 141 */
142 blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); 142 blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
143 if (blkg && blkg->q == q) { 143 if (blkg && blkg->q == q) {
144 if (update_hint) { 144 if (update_hint) {
145 lockdep_assert_held(q->queue_lock); 145 lockdep_assert_held(q->queue_lock);
146 rcu_assign_pointer(blkcg->blkg_hint, blkg); 146 rcu_assign_pointer(blkcg->blkg_hint, blkg);
147 } 147 }
148 return blkg; 148 return blkg;
149 } 149 }
150 150
151 return NULL; 151 return NULL;
152 } 152 }
153 153
154 /** 154 /**
155 * blkg_lookup - lookup blkg for the specified blkcg - q pair 155 * blkg_lookup - lookup blkg for the specified blkcg - q pair
156 * @blkcg: blkcg of interest 156 * @blkcg: blkcg of interest
157 * @q: request_queue of interest 157 * @q: request_queue of interest
158 * 158 *
159 * Lookup blkg for the @blkcg - @q pair. This function should be called 159 * Lookup blkg for the @blkcg - @q pair. This function should be called
160 * under RCU read lock and is guaranteed to return %NULL if @q is bypassing 160 * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
161 * - see blk_queue_bypass_start() for details. 161 * - see blk_queue_bypass_start() for details.
162 */ 162 */
163 struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) 163 struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
164 { 164 {
165 WARN_ON_ONCE(!rcu_read_lock_held()); 165 WARN_ON_ONCE(!rcu_read_lock_held());
166 166
167 if (unlikely(blk_queue_bypass(q))) 167 if (unlikely(blk_queue_bypass(q)))
168 return NULL; 168 return NULL;
169 return __blkg_lookup(blkcg, q, false); 169 return __blkg_lookup(blkcg, q, false);
170 } 170 }
171 EXPORT_SYMBOL_GPL(blkg_lookup); 171 EXPORT_SYMBOL_GPL(blkg_lookup);
172 172
173 /* 173 /*
174 * If @new_blkg is %NULL, this function tries to allocate a new one as 174 * If @new_blkg is %NULL, this function tries to allocate a new one as
175 * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. 175 * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return.
176 */ 176 */
177 static struct blkcg_gq *blkg_create(struct blkcg *blkcg, 177 static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
178 struct request_queue *q, 178 struct request_queue *q,
179 struct blkcg_gq *new_blkg) 179 struct blkcg_gq *new_blkg)
180 { 180 {
181 struct blkcg_gq *blkg; 181 struct blkcg_gq *blkg;
182 int i, ret; 182 int i, ret;
183 183
184 WARN_ON_ONCE(!rcu_read_lock_held()); 184 WARN_ON_ONCE(!rcu_read_lock_held());
185 lockdep_assert_held(q->queue_lock); 185 lockdep_assert_held(q->queue_lock);
186 186
187 /* blkg holds a reference to blkcg */ 187 /* blkg holds a reference to blkcg */
188 if (!css_tryget(&blkcg->css)) { 188 if (!css_tryget(&blkcg->css)) {
189 ret = -EINVAL; 189 ret = -EINVAL;
190 goto err_free_blkg; 190 goto err_free_blkg;
191 } 191 }
192 192
193 /* allocate */ 193 /* allocate */
194 if (!new_blkg) { 194 if (!new_blkg) {
195 new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); 195 new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
196 if (unlikely(!new_blkg)) { 196 if (unlikely(!new_blkg)) {
197 ret = -ENOMEM; 197 ret = -ENOMEM;
198 goto err_put_css; 198 goto err_put_css;
199 } 199 }
200 } 200 }
201 blkg = new_blkg; 201 blkg = new_blkg;
202 202
203 /* link parent */ 203 /* link parent */
204 if (blkcg_parent(blkcg)) { 204 if (blkcg_parent(blkcg)) {
205 blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); 205 blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
206 if (WARN_ON_ONCE(!blkg->parent)) { 206 if (WARN_ON_ONCE(!blkg->parent)) {
207 ret = -EINVAL; 207 ret = -EINVAL;
208 goto err_put_css; 208 goto err_put_css;
209 } 209 }
210 blkg_get(blkg->parent); 210 blkg_get(blkg->parent);
211 } 211 }
212 212
213 /* invoke per-policy init */ 213 /* invoke per-policy init */
214 for (i = 0; i < BLKCG_MAX_POLS; i++) { 214 for (i = 0; i < BLKCG_MAX_POLS; i++) {
215 struct blkcg_policy *pol = blkcg_policy[i]; 215 struct blkcg_policy *pol = blkcg_policy[i];
216 216
217 if (blkg->pd[i] && pol->pd_init_fn) 217 if (blkg->pd[i] && pol->pd_init_fn)
218 pol->pd_init_fn(blkg); 218 pol->pd_init_fn(blkg);
219 } 219 }
220 220
221 /* insert */ 221 /* insert */
222 spin_lock(&blkcg->lock); 222 spin_lock(&blkcg->lock);
223 ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); 223 ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
224 if (likely(!ret)) { 224 if (likely(!ret)) {
225 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 225 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
226 list_add(&blkg->q_node, &q->blkg_list); 226 list_add(&blkg->q_node, &q->blkg_list);
227 227
228 for (i = 0; i < BLKCG_MAX_POLS; i++) { 228 for (i = 0; i < BLKCG_MAX_POLS; i++) {
229 struct blkcg_policy *pol = blkcg_policy[i]; 229 struct blkcg_policy *pol = blkcg_policy[i];
230 230
231 if (blkg->pd[i] && pol->pd_online_fn) 231 if (blkg->pd[i] && pol->pd_online_fn)
232 pol->pd_online_fn(blkg); 232 pol->pd_online_fn(blkg);
233 } 233 }
234 } 234 }
235 blkg->online = true; 235 blkg->online = true;
236 spin_unlock(&blkcg->lock); 236 spin_unlock(&blkcg->lock);
237 237
238 if (!ret) 238 if (!ret)
239 return blkg; 239 return blkg;
240 240
241 /* @blkg failed fully initialized, use the usual release path */ 241 /* @blkg failed fully initialized, use the usual release path */
242 blkg_put(blkg); 242 blkg_put(blkg);
243 return ERR_PTR(ret); 243 return ERR_PTR(ret);
244 244
245 err_put_css: 245 err_put_css:
246 css_put(&blkcg->css); 246 css_put(&blkcg->css);
247 err_free_blkg: 247 err_free_blkg:
248 blkg_free(new_blkg); 248 blkg_free(new_blkg);
249 return ERR_PTR(ret); 249 return ERR_PTR(ret);
250 } 250 }
251 251
252 /** 252 /**
253 * blkg_lookup_create - lookup blkg, try to create one if not there 253 * blkg_lookup_create - lookup blkg, try to create one if not there
254 * @blkcg: blkcg of interest 254 * @blkcg: blkcg of interest
255 * @q: request_queue of interest 255 * @q: request_queue of interest
256 * 256 *
257 * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to 257 * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to
258 * create one. blkg creation is performed recursively from blkcg_root such 258 * create one. blkg creation is performed recursively from blkcg_root such
259 * that all non-root blkg's have access to the parent blkg. This function 259 * that all non-root blkg's have access to the parent blkg. This function
260 * should be called under RCU read lock and @q->queue_lock. 260 * should be called under RCU read lock and @q->queue_lock.
261 * 261 *
262 * Returns pointer to the looked up or created blkg on success, ERR_PTR() 262 * Returns pointer to the looked up or created blkg on success, ERR_PTR()
263 * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not 263 * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
264 * dead and bypassing, returns ERR_PTR(-EBUSY). 264 * dead and bypassing, returns ERR_PTR(-EBUSY).
265 */ 265 */
266 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, 266 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
267 struct request_queue *q) 267 struct request_queue *q)
268 { 268 {
269 struct blkcg_gq *blkg; 269 struct blkcg_gq *blkg;
270 270
271 WARN_ON_ONCE(!rcu_read_lock_held()); 271 WARN_ON_ONCE(!rcu_read_lock_held());
272 lockdep_assert_held(q->queue_lock); 272 lockdep_assert_held(q->queue_lock);
273 273
274 /* 274 /*
275 * This could be the first entry point of blkcg implementation and 275 * This could be the first entry point of blkcg implementation and
276 * we shouldn't allow anything to go through for a bypassing queue. 276 * we shouldn't allow anything to go through for a bypassing queue.
277 */ 277 */
278 if (unlikely(blk_queue_bypass(q))) 278 if (unlikely(blk_queue_bypass(q)))
279 return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); 279 return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
280 280
281 blkg = __blkg_lookup(blkcg, q, true); 281 blkg = __blkg_lookup(blkcg, q, true);
282 if (blkg) 282 if (blkg)
283 return blkg; 283 return blkg;
284 284
285 /* 285 /*
286 * Create blkgs walking down from blkcg_root to @blkcg, so that all 286 * Create blkgs walking down from blkcg_root to @blkcg, so that all
287 * non-root blkgs have access to their parents. 287 * non-root blkgs have access to their parents.
288 */ 288 */
289 while (true) { 289 while (true) {
290 struct blkcg *pos = blkcg; 290 struct blkcg *pos = blkcg;
291 struct blkcg *parent = blkcg_parent(blkcg); 291 struct blkcg *parent = blkcg_parent(blkcg);
292 292
293 while (parent && !__blkg_lookup(parent, q, false)) { 293 while (parent && !__blkg_lookup(parent, q, false)) {
294 pos = parent; 294 pos = parent;
295 parent = blkcg_parent(parent); 295 parent = blkcg_parent(parent);
296 } 296 }
297 297
298 blkg = blkg_create(pos, q, NULL); 298 blkg = blkg_create(pos, q, NULL);
299 if (pos == blkcg || IS_ERR(blkg)) 299 if (pos == blkcg || IS_ERR(blkg))
300 return blkg; 300 return blkg;
301 } 301 }
302 } 302 }
303 EXPORT_SYMBOL_GPL(blkg_lookup_create); 303 EXPORT_SYMBOL_GPL(blkg_lookup_create);
304 304
305 static void blkg_destroy(struct blkcg_gq *blkg) 305 static void blkg_destroy(struct blkcg_gq *blkg)
306 { 306 {
307 struct blkcg *blkcg = blkg->blkcg; 307 struct blkcg *blkcg = blkg->blkcg;
308 int i; 308 int i;
309 309
310 lockdep_assert_held(blkg->q->queue_lock); 310 lockdep_assert_held(blkg->q->queue_lock);
311 lockdep_assert_held(&blkcg->lock); 311 lockdep_assert_held(&blkcg->lock);
312 312
313 /* Something wrong if we are trying to remove same group twice */ 313 /* Something wrong if we are trying to remove same group twice */
314 WARN_ON_ONCE(list_empty(&blkg->q_node)); 314 WARN_ON_ONCE(list_empty(&blkg->q_node));
315 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); 315 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
316 316
317 for (i = 0; i < BLKCG_MAX_POLS; i++) { 317 for (i = 0; i < BLKCG_MAX_POLS; i++) {
318 struct blkcg_policy *pol = blkcg_policy[i]; 318 struct blkcg_policy *pol = blkcg_policy[i];
319 319
320 if (blkg->pd[i] && pol->pd_offline_fn) 320 if (blkg->pd[i] && pol->pd_offline_fn)
321 pol->pd_offline_fn(blkg); 321 pol->pd_offline_fn(blkg);
322 } 322 }
323 blkg->online = false; 323 blkg->online = false;
324 324
325 radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); 325 radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
326 list_del_init(&blkg->q_node); 326 list_del_init(&blkg->q_node);
327 hlist_del_init_rcu(&blkg->blkcg_node); 327 hlist_del_init_rcu(&blkg->blkcg_node);
328 328
329 /* 329 /*
330 * Both setting lookup hint to and clearing it from @blkg are done 330 * Both setting lookup hint to and clearing it from @blkg are done
331 * under queue_lock. If it's not pointing to @blkg now, it never 331 * under queue_lock. If it's not pointing to @blkg now, it never
332 * will. Hint assignment itself can race safely. 332 * will. Hint assignment itself can race safely.
333 */ 333 */
334 if (rcu_dereference_raw(blkcg->blkg_hint) == blkg) 334 if (rcu_dereference_raw(blkcg->blkg_hint) == blkg)
335 rcu_assign_pointer(blkcg->blkg_hint, NULL); 335 rcu_assign_pointer(blkcg->blkg_hint, NULL);
336 336
337 /* 337 /*
338 * Put the reference taken at the time of creation so that when all 338 * Put the reference taken at the time of creation so that when all
339 * queues are gone, group can be destroyed. 339 * queues are gone, group can be destroyed.
340 */ 340 */
341 blkg_put(blkg); 341 blkg_put(blkg);
342 } 342 }
343 343
344 /** 344 /**
345 * blkg_destroy_all - destroy all blkgs associated with a request_queue 345 * blkg_destroy_all - destroy all blkgs associated with a request_queue
346 * @q: request_queue of interest 346 * @q: request_queue of interest
347 * 347 *
348 * Destroy all blkgs associated with @q. 348 * Destroy all blkgs associated with @q.
349 */ 349 */
350 static void blkg_destroy_all(struct request_queue *q) 350 static void blkg_destroy_all(struct request_queue *q)
351 { 351 {
352 struct blkcg_gq *blkg, *n; 352 struct blkcg_gq *blkg, *n;
353 353
354 lockdep_assert_held(q->queue_lock); 354 lockdep_assert_held(q->queue_lock);
355 355
356 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { 356 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
357 struct blkcg *blkcg = blkg->blkcg; 357 struct blkcg *blkcg = blkg->blkcg;
358 358
359 spin_lock(&blkcg->lock); 359 spin_lock(&blkcg->lock);
360 blkg_destroy(blkg); 360 blkg_destroy(blkg);
361 spin_unlock(&blkcg->lock); 361 spin_unlock(&blkcg->lock);
362 } 362 }
363 363
364 /* 364 /*
365 * root blkg is destroyed. Just clear the pointer since 365 * root blkg is destroyed. Just clear the pointer since
366 * root_rl does not take reference on root blkg. 366 * root_rl does not take reference on root blkg.
367 */ 367 */
368 q->root_blkg = NULL; 368 q->root_blkg = NULL;
369 q->root_rl.blkg = NULL; 369 q->root_rl.blkg = NULL;
370 } 370 }
371 371
372 static void blkg_rcu_free(struct rcu_head *rcu_head) 372 /*
373 * A group is RCU protected, but having an rcu lock does not mean that one
374 * can access all the fields of blkg and assume these are valid. For
375 * example, don't try to follow throtl_data and request queue links.
376 *
377 * Having a reference to blkg under an rcu allows accesses to only values
378 * local to groups like group stats and group rate limits.
379 */
380 void __blkg_release_rcu(struct rcu_head *rcu_head)
373 { 381 {
374 blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head)); 382 struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
375 }
376
377 void __blkg_release(struct blkcg_gq *blkg)
378 {
379 int i; 383 int i;
380 384
381 /* tell policies that this one is being freed */ 385 /* tell policies that this one is being freed */
382 for (i = 0; i < BLKCG_MAX_POLS; i++) { 386 for (i = 0; i < BLKCG_MAX_POLS; i++) {
383 struct blkcg_policy *pol = blkcg_policy[i]; 387 struct blkcg_policy *pol = blkcg_policy[i];
384 388
385 if (blkg->pd[i] && pol->pd_exit_fn) 389 if (blkg->pd[i] && pol->pd_exit_fn)
386 pol->pd_exit_fn(blkg); 390 pol->pd_exit_fn(blkg);
387 } 391 }
388 392
389 /* release the blkcg and parent blkg refs this blkg has been holding */ 393 /* release the blkcg and parent blkg refs this blkg has been holding */
390 css_put(&blkg->blkcg->css); 394 css_put(&blkg->blkcg->css);
391 if (blkg->parent) 395 if (blkg->parent) {
396 spin_lock_irq(blkg->q->queue_lock);
392 blkg_put(blkg->parent); 397 blkg_put(blkg->parent);
398 spin_unlock_irq(blkg->q->queue_lock);
399 }
393 400
394 /* 401 blkg_free(blkg);
395 * A group is freed in rcu manner. But having an rcu lock does not
396 * mean that one can access all the fields of blkg and assume these
397 * are valid. For example, don't try to follow throtl_data and
398 * request queue links.
399 *
400 * Having a reference to blkg under an rcu allows acess to only
401 * values local to groups like group stats and group rate limits
402 */
403 call_rcu(&blkg->rcu_head, blkg_rcu_free);
404 } 402 }
405 EXPORT_SYMBOL_GPL(__blkg_release); 403 EXPORT_SYMBOL_GPL(__blkg_release_rcu);
406 404
407 /* 405 /*
408 * The next function used by blk_queue_for_each_rl(). It's a bit tricky 406 * The next function used by blk_queue_for_each_rl(). It's a bit tricky
409 * because the root blkg uses @q->root_rl instead of its own rl. 407 * because the root blkg uses @q->root_rl instead of its own rl.
410 */ 408 */
411 struct request_list *__blk_queue_next_rl(struct request_list *rl, 409 struct request_list *__blk_queue_next_rl(struct request_list *rl,
412 struct request_queue *q) 410 struct request_queue *q)
413 { 411 {
414 struct list_head *ent; 412 struct list_head *ent;
415 struct blkcg_gq *blkg; 413 struct blkcg_gq *blkg;
416 414
417 /* 415 /*
418 * Determine the current blkg list_head. The first entry is 416 * Determine the current blkg list_head. The first entry is
419 * root_rl which is off @q->blkg_list and mapped to the head. 417 * root_rl which is off @q->blkg_list and mapped to the head.
420 */ 418 */
421 if (rl == &q->root_rl) { 419 if (rl == &q->root_rl) {
422 ent = &q->blkg_list; 420 ent = &q->blkg_list;
423 /* There are no more block groups, hence no request lists */ 421 /* There are no more block groups, hence no request lists */
424 if (list_empty(ent)) 422 if (list_empty(ent))
425 return NULL; 423 return NULL;
426 } else { 424 } else {
427 blkg = container_of(rl, struct blkcg_gq, rl); 425 blkg = container_of(rl, struct blkcg_gq, rl);
428 ent = &blkg->q_node; 426 ent = &blkg->q_node;
429 } 427 }
430 428
431 /* walk to the next list_head, skip root blkcg */ 429 /* walk to the next list_head, skip root blkcg */
432 ent = ent->next; 430 ent = ent->next;
433 if (ent == &q->root_blkg->q_node) 431 if (ent == &q->root_blkg->q_node)
434 ent = ent->next; 432 ent = ent->next;
435 if (ent == &q->blkg_list) 433 if (ent == &q->blkg_list)
436 return NULL; 434 return NULL;
437 435
438 blkg = container_of(ent, struct blkcg_gq, q_node); 436 blkg = container_of(ent, struct blkcg_gq, q_node);
439 return &blkg->rl; 437 return &blkg->rl;
440 } 438 }
441 439
442 static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, 440 static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
443 u64 val) 441 u64 val)
444 { 442 {
445 struct blkcg *blkcg = cgroup_to_blkcg(cgroup); 443 struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
446 struct blkcg_gq *blkg; 444 struct blkcg_gq *blkg;
447 int i; 445 int i;
448 446
449 mutex_lock(&blkcg_pol_mutex); 447 mutex_lock(&blkcg_pol_mutex);
450 spin_lock_irq(&blkcg->lock); 448 spin_lock_irq(&blkcg->lock);
451 449
452 /* 450 /*
453 * Note that stat reset is racy - it doesn't synchronize against 451 * Note that stat reset is racy - it doesn't synchronize against
454 * stat updates. This is a debug feature which shouldn't exist 452 * stat updates. This is a debug feature which shouldn't exist
455 * anyway. If you get hit by a race, retry. 453 * anyway. If you get hit by a race, retry.
456 */ 454 */
457 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { 455 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
458 for (i = 0; i < BLKCG_MAX_POLS; i++) { 456 for (i = 0; i < BLKCG_MAX_POLS; i++) {
459 struct blkcg_policy *pol = blkcg_policy[i]; 457 struct blkcg_policy *pol = blkcg_policy[i];
460 458
461 if (blkcg_policy_enabled(blkg->q, pol) && 459 if (blkcg_policy_enabled(blkg->q, pol) &&
462 pol->pd_reset_stats_fn) 460 pol->pd_reset_stats_fn)
463 pol->pd_reset_stats_fn(blkg); 461 pol->pd_reset_stats_fn(blkg);
464 } 462 }
465 } 463 }
466 464
467 spin_unlock_irq(&blkcg->lock); 465 spin_unlock_irq(&blkcg->lock);
468 mutex_unlock(&blkcg_pol_mutex); 466 mutex_unlock(&blkcg_pol_mutex);
469 return 0; 467 return 0;
470 } 468 }
471 469
472 static const char *blkg_dev_name(struct blkcg_gq *blkg) 470 static const char *blkg_dev_name(struct blkcg_gq *blkg)
473 { 471 {
474 /* some drivers (floppy) instantiate a queue w/o disk registered */ 472 /* some drivers (floppy) instantiate a queue w/o disk registered */
475 if (blkg->q->backing_dev_info.dev) 473 if (blkg->q->backing_dev_info.dev)
476 return dev_name(blkg->q->backing_dev_info.dev); 474 return dev_name(blkg->q->backing_dev_info.dev);
477 return NULL; 475 return NULL;
478 } 476 }
479 477
480 /** 478 /**
481 * blkcg_print_blkgs - helper for printing per-blkg data 479 * blkcg_print_blkgs - helper for printing per-blkg data
482 * @sf: seq_file to print to 480 * @sf: seq_file to print to
483 * @blkcg: blkcg of interest 481 * @blkcg: blkcg of interest
484 * @prfill: fill function to print out a blkg 482 * @prfill: fill function to print out a blkg
485 * @pol: policy in question 483 * @pol: policy in question
486 * @data: data to be passed to @prfill 484 * @data: data to be passed to @prfill
487 * @show_total: to print out sum of prfill return values or not 485 * @show_total: to print out sum of prfill return values or not
488 * 486 *
489 * This function invokes @prfill on each blkg of @blkcg if pd for the 487 * This function invokes @prfill on each blkg of @blkcg if pd for the
490 * policy specified by @pol exists. @prfill is invoked with @sf, the 488 * policy specified by @pol exists. @prfill is invoked with @sf, the
491 * policy data and @data and the matching queue lock held. If @show_total 489 * policy data and @data and the matching queue lock held. If @show_total
492 * is %true, the sum of the return values from @prfill is printed with 490 * is %true, the sum of the return values from @prfill is printed with
493 * "Total" label at the end. 491 * "Total" label at the end.
494 * 492 *
495 * This is to be used to construct print functions for 493 * This is to be used to construct print functions for
496 * cftype->read_seq_string method. 494 * cftype->read_seq_string method.
497 */ 495 */
498 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, 496 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
499 u64 (*prfill)(struct seq_file *, 497 u64 (*prfill)(struct seq_file *,
500 struct blkg_policy_data *, int), 498 struct blkg_policy_data *, int),
501 const struct blkcg_policy *pol, int data, 499 const struct blkcg_policy *pol, int data,
502 bool show_total) 500 bool show_total)
503 { 501 {
504 struct blkcg_gq *blkg; 502 struct blkcg_gq *blkg;
505 u64 total = 0; 503 u64 total = 0;
506 504
507 rcu_read_lock(); 505 rcu_read_lock();
508 hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { 506 hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
509 spin_lock_irq(blkg->q->queue_lock); 507 spin_lock_irq(blkg->q->queue_lock);
510 if (blkcg_policy_enabled(blkg->q, pol)) 508 if (blkcg_policy_enabled(blkg->q, pol))
511 total += prfill(sf, blkg->pd[pol->plid], data); 509 total += prfill(sf, blkg->pd[pol->plid], data);
512 spin_unlock_irq(blkg->q->queue_lock); 510 spin_unlock_irq(blkg->q->queue_lock);
513 } 511 }
514 rcu_read_unlock(); 512 rcu_read_unlock();
515 513
516 if (show_total) 514 if (show_total)
517 seq_printf(sf, "Total %llu\n", (unsigned long long)total); 515 seq_printf(sf, "Total %llu\n", (unsigned long long)total);
518 } 516 }
519 EXPORT_SYMBOL_GPL(blkcg_print_blkgs); 517 EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
520 518
521 /** 519 /**
522 * __blkg_prfill_u64 - prfill helper for a single u64 value 520 * __blkg_prfill_u64 - prfill helper for a single u64 value
523 * @sf: seq_file to print to 521 * @sf: seq_file to print to
524 * @pd: policy private data of interest 522 * @pd: policy private data of interest
525 * @v: value to print 523 * @v: value to print
526 * 524 *
527 * Print @v to @sf for the device assocaited with @pd. 525 * Print @v to @sf for the device assocaited with @pd.
528 */ 526 */
529 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v) 527 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
530 { 528 {
531 const char *dname = blkg_dev_name(pd->blkg); 529 const char *dname = blkg_dev_name(pd->blkg);
532 530
533 if (!dname) 531 if (!dname)
534 return 0; 532 return 0;
535 533
536 seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v); 534 seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
537 return v; 535 return v;
538 } 536 }
539 EXPORT_SYMBOL_GPL(__blkg_prfill_u64); 537 EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
540 538
541 /** 539 /**
542 * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat 540 * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
543 * @sf: seq_file to print to 541 * @sf: seq_file to print to
544 * @pd: policy private data of interest 542 * @pd: policy private data of interest
545 * @rwstat: rwstat to print 543 * @rwstat: rwstat to print
546 * 544 *
547 * Print @rwstat to @sf for the device assocaited with @pd. 545 * Print @rwstat to @sf for the device assocaited with @pd.
548 */ 546 */
549 u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 547 u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
550 const struct blkg_rwstat *rwstat) 548 const struct blkg_rwstat *rwstat)
551 { 549 {
552 static const char *rwstr[] = { 550 static const char *rwstr[] = {
553 [BLKG_RWSTAT_READ] = "Read", 551 [BLKG_RWSTAT_READ] = "Read",
554 [BLKG_RWSTAT_WRITE] = "Write", 552 [BLKG_RWSTAT_WRITE] = "Write",
555 [BLKG_RWSTAT_SYNC] = "Sync", 553 [BLKG_RWSTAT_SYNC] = "Sync",
556 [BLKG_RWSTAT_ASYNC] = "Async", 554 [BLKG_RWSTAT_ASYNC] = "Async",
557 }; 555 };
558 const char *dname = blkg_dev_name(pd->blkg); 556 const char *dname = blkg_dev_name(pd->blkg);
559 u64 v; 557 u64 v;
560 int i; 558 int i;
561 559
562 if (!dname) 560 if (!dname)
563 return 0; 561 return 0;
564 562
565 for (i = 0; i < BLKG_RWSTAT_NR; i++) 563 for (i = 0; i < BLKG_RWSTAT_NR; i++)
566 seq_printf(sf, "%s %s %llu\n", dname, rwstr[i], 564 seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
567 (unsigned long long)rwstat->cnt[i]); 565 (unsigned long long)rwstat->cnt[i]);
568 566
569 v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE]; 567 v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
570 seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); 568 seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
571 return v; 569 return v;
572 } 570 }
573 EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); 571 EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
574 572
575 /** 573 /**
576 * blkg_prfill_stat - prfill callback for blkg_stat 574 * blkg_prfill_stat - prfill callback for blkg_stat
577 * @sf: seq_file to print to 575 * @sf: seq_file to print to
578 * @pd: policy private data of interest 576 * @pd: policy private data of interest
579 * @off: offset to the blkg_stat in @pd 577 * @off: offset to the blkg_stat in @pd
580 * 578 *
581 * prfill callback for printing a blkg_stat. 579 * prfill callback for printing a blkg_stat.
582 */ 580 */
583 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) 581 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off)
584 { 582 {
585 return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off)); 583 return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off));
586 } 584 }
587 EXPORT_SYMBOL_GPL(blkg_prfill_stat); 585 EXPORT_SYMBOL_GPL(blkg_prfill_stat);
588 586
589 /** 587 /**
590 * blkg_prfill_rwstat - prfill callback for blkg_rwstat 588 * blkg_prfill_rwstat - prfill callback for blkg_rwstat
591 * @sf: seq_file to print to 589 * @sf: seq_file to print to
592 * @pd: policy private data of interest 590 * @pd: policy private data of interest
593 * @off: offset to the blkg_rwstat in @pd 591 * @off: offset to the blkg_rwstat in @pd
594 * 592 *
595 * prfill callback for printing a blkg_rwstat. 593 * prfill callback for printing a blkg_rwstat.
596 */ 594 */
597 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 595 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
598 int off) 596 int off)
599 { 597 {
600 struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off); 598 struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off);
601 599
602 return __blkg_prfill_rwstat(sf, pd, &rwstat); 600 return __blkg_prfill_rwstat(sf, pd, &rwstat);
603 } 601 }
604 EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); 602 EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
605 603
606 /** 604 /**
607 * blkg_stat_recursive_sum - collect hierarchical blkg_stat 605 * blkg_stat_recursive_sum - collect hierarchical blkg_stat
608 * @pd: policy private data of interest 606 * @pd: policy private data of interest
609 * @off: offset to the blkg_stat in @pd 607 * @off: offset to the blkg_stat in @pd
610 * 608 *
611 * Collect the blkg_stat specified by @off from @pd and all its online 609 * Collect the blkg_stat specified by @off from @pd and all its online
612 * descendants and return the sum. The caller must be holding the queue 610 * descendants and return the sum. The caller must be holding the queue
613 * lock for online tests. 611 * lock for online tests.
614 */ 612 */
615 u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) 613 u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
616 { 614 {
617 struct blkcg_policy *pol = blkcg_policy[pd->plid]; 615 struct blkcg_policy *pol = blkcg_policy[pd->plid];
618 struct blkcg_gq *pos_blkg; 616 struct blkcg_gq *pos_blkg;
619 struct cgroup *pos_cgrp; 617 struct cgroup *pos_cgrp;
620 u64 sum; 618 u64 sum;
621 619
622 lockdep_assert_held(pd->blkg->q->queue_lock); 620 lockdep_assert_held(pd->blkg->q->queue_lock);
623 621
624 sum = blkg_stat_read((void *)pd + off); 622 sum = blkg_stat_read((void *)pd + off);
625 623
626 rcu_read_lock(); 624 rcu_read_lock();
627 blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { 625 blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
628 struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); 626 struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
629 struct blkg_stat *stat = (void *)pos_pd + off; 627 struct blkg_stat *stat = (void *)pos_pd + off;
630 628
631 if (pos_blkg->online) 629 if (pos_blkg->online)
632 sum += blkg_stat_read(stat); 630 sum += blkg_stat_read(stat);
633 } 631 }
634 rcu_read_unlock(); 632 rcu_read_unlock();
635 633
636 return sum; 634 return sum;
637 } 635 }
638 EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); 636 EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum);
639 637
640 /** 638 /**
641 * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat 639 * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
642 * @pd: policy private data of interest 640 * @pd: policy private data of interest
643 * @off: offset to the blkg_stat in @pd 641 * @off: offset to the blkg_stat in @pd
644 * 642 *
645 * Collect the blkg_rwstat specified by @off from @pd and all its online 643 * Collect the blkg_rwstat specified by @off from @pd and all its online
646 * descendants and return the sum. The caller must be holding the queue 644 * descendants and return the sum. The caller must be holding the queue
647 * lock for online tests. 645 * lock for online tests.
648 */ 646 */
649 struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, 647 struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
650 int off) 648 int off)
651 { 649 {
652 struct blkcg_policy *pol = blkcg_policy[pd->plid]; 650 struct blkcg_policy *pol = blkcg_policy[pd->plid];
653 struct blkcg_gq *pos_blkg; 651 struct blkcg_gq *pos_blkg;
654 struct cgroup *pos_cgrp; 652 struct cgroup *pos_cgrp;
655 struct blkg_rwstat sum; 653 struct blkg_rwstat sum;
656 int i; 654 int i;
657 655
658 lockdep_assert_held(pd->blkg->q->queue_lock); 656 lockdep_assert_held(pd->blkg->q->queue_lock);
659 657
660 sum = blkg_rwstat_read((void *)pd + off); 658 sum = blkg_rwstat_read((void *)pd + off);
661 659
662 rcu_read_lock(); 660 rcu_read_lock();
663 blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { 661 blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
664 struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); 662 struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
665 struct blkg_rwstat *rwstat = (void *)pos_pd + off; 663 struct blkg_rwstat *rwstat = (void *)pos_pd + off;
666 struct blkg_rwstat tmp; 664 struct blkg_rwstat tmp;
667 665
668 if (!pos_blkg->online) 666 if (!pos_blkg->online)
669 continue; 667 continue;
670 668
671 tmp = blkg_rwstat_read(rwstat); 669 tmp = blkg_rwstat_read(rwstat);
672 670
673 for (i = 0; i < BLKG_RWSTAT_NR; i++) 671 for (i = 0; i < BLKG_RWSTAT_NR; i++)
674 sum.cnt[i] += tmp.cnt[i]; 672 sum.cnt[i] += tmp.cnt[i];
675 } 673 }
676 rcu_read_unlock(); 674 rcu_read_unlock();
677 675
678 return sum; 676 return sum;
679 } 677 }
680 EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); 678 EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
681 679
682 /** 680 /**
683 * blkg_conf_prep - parse and prepare for per-blkg config update 681 * blkg_conf_prep - parse and prepare for per-blkg config update
684 * @blkcg: target block cgroup 682 * @blkcg: target block cgroup
685 * @pol: target policy 683 * @pol: target policy
686 * @input: input string 684 * @input: input string
687 * @ctx: blkg_conf_ctx to be filled 685 * @ctx: blkg_conf_ctx to be filled
688 * 686 *
689 * Parse per-blkg config update from @input and initialize @ctx with the 687 * Parse per-blkg config update from @input and initialize @ctx with the
690 * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new 688 * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new
691 * value. This function returns with RCU read lock and queue lock held and 689 * value. This function returns with RCU read lock and queue lock held and
692 * must be paired with blkg_conf_finish(). 690 * must be paired with blkg_conf_finish().
693 */ 691 */
694 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 692 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
695 const char *input, struct blkg_conf_ctx *ctx) 693 const char *input, struct blkg_conf_ctx *ctx)
696 __acquires(rcu) __acquires(disk->queue->queue_lock) 694 __acquires(rcu) __acquires(disk->queue->queue_lock)
697 { 695 {
698 struct gendisk *disk; 696 struct gendisk *disk;
699 struct blkcg_gq *blkg; 697 struct blkcg_gq *blkg;
700 unsigned int major, minor; 698 unsigned int major, minor;
701 unsigned long long v; 699 unsigned long long v;
702 int part, ret; 700 int part, ret;
703 701
704 if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3) 702 if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
705 return -EINVAL; 703 return -EINVAL;
706 704
707 disk = get_gendisk(MKDEV(major, minor), &part); 705 disk = get_gendisk(MKDEV(major, minor), &part);
708 if (!disk || part) 706 if (!disk || part)
709 return -EINVAL; 707 return -EINVAL;
710 708
711 rcu_read_lock(); 709 rcu_read_lock();
712 spin_lock_irq(disk->queue->queue_lock); 710 spin_lock_irq(disk->queue->queue_lock);
713 711
714 if (blkcg_policy_enabled(disk->queue, pol)) 712 if (blkcg_policy_enabled(disk->queue, pol))
715 blkg = blkg_lookup_create(blkcg, disk->queue); 713 blkg = blkg_lookup_create(blkcg, disk->queue);
716 else 714 else
717 blkg = ERR_PTR(-EINVAL); 715 blkg = ERR_PTR(-EINVAL);
718 716
719 if (IS_ERR(blkg)) { 717 if (IS_ERR(blkg)) {
720 ret = PTR_ERR(blkg); 718 ret = PTR_ERR(blkg);
721 rcu_read_unlock(); 719 rcu_read_unlock();
722 spin_unlock_irq(disk->queue->queue_lock); 720 spin_unlock_irq(disk->queue->queue_lock);
723 put_disk(disk); 721 put_disk(disk);
724 /* 722 /*
725 * If queue was bypassing, we should retry. Do so after a 723 * If queue was bypassing, we should retry. Do so after a
726 * short msleep(). It isn't strictly necessary but queue 724 * short msleep(). It isn't strictly necessary but queue
727 * can be bypassing for some time and it's always nice to 725 * can be bypassing for some time and it's always nice to
728 * avoid busy looping. 726 * avoid busy looping.
729 */ 727 */
730 if (ret == -EBUSY) { 728 if (ret == -EBUSY) {
731 msleep(10); 729 msleep(10);
732 ret = restart_syscall(); 730 ret = restart_syscall();
733 } 731 }
734 return ret; 732 return ret;
735 } 733 }
736 734
737 ctx->disk = disk; 735 ctx->disk = disk;
738 ctx->blkg = blkg; 736 ctx->blkg = blkg;
739 ctx->v = v; 737 ctx->v = v;
740 return 0; 738 return 0;
741 } 739 }
742 EXPORT_SYMBOL_GPL(blkg_conf_prep); 740 EXPORT_SYMBOL_GPL(blkg_conf_prep);
743 741
744 /** 742 /**
745 * blkg_conf_finish - finish up per-blkg config update 743 * blkg_conf_finish - finish up per-blkg config update
746 * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep() 744 * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
747 * 745 *
748 * Finish up after per-blkg config update. This function must be paired 746 * Finish up after per-blkg config update. This function must be paired
749 * with blkg_conf_prep(). 747 * with blkg_conf_prep().
750 */ 748 */
751 void blkg_conf_finish(struct blkg_conf_ctx *ctx) 749 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
752 __releases(ctx->disk->queue->queue_lock) __releases(rcu) 750 __releases(ctx->disk->queue->queue_lock) __releases(rcu)
753 { 751 {
754 spin_unlock_irq(ctx->disk->queue->queue_lock); 752 spin_unlock_irq(ctx->disk->queue->queue_lock);
755 rcu_read_unlock(); 753 rcu_read_unlock();
756 put_disk(ctx->disk); 754 put_disk(ctx->disk);
757 } 755 }
758 EXPORT_SYMBOL_GPL(blkg_conf_finish); 756 EXPORT_SYMBOL_GPL(blkg_conf_finish);
759 757
760 struct cftype blkcg_files[] = { 758 struct cftype blkcg_files[] = {
761 { 759 {
762 .name = "reset_stats", 760 .name = "reset_stats",
763 .write_u64 = blkcg_reset_stats, 761 .write_u64 = blkcg_reset_stats,
764 }, 762 },
765 { } /* terminate */ 763 { } /* terminate */
766 }; 764 };
767 765
768 /** 766 /**
769 * blkcg_css_offline - cgroup css_offline callback 767 * blkcg_css_offline - cgroup css_offline callback
770 * @cgroup: cgroup of interest 768 * @cgroup: cgroup of interest
771 * 769 *
772 * This function is called when @cgroup is about to go away and responsible 770 * This function is called when @cgroup is about to go away and responsible
773 * for shooting down all blkgs associated with @cgroup. blkgs should be 771 * for shooting down all blkgs associated with @cgroup. blkgs should be
774 * removed while holding both q and blkcg locks. As blkcg lock is nested 772 * removed while holding both q and blkcg locks. As blkcg lock is nested
775 * inside q lock, this function performs reverse double lock dancing. 773 * inside q lock, this function performs reverse double lock dancing.
776 * 774 *
777 * This is the blkcg counterpart of ioc_release_fn(). 775 * This is the blkcg counterpart of ioc_release_fn().
778 */ 776 */
779 static void blkcg_css_offline(struct cgroup *cgroup) 777 static void blkcg_css_offline(struct cgroup *cgroup)
780 { 778 {
781 struct blkcg *blkcg = cgroup_to_blkcg(cgroup); 779 struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
782 780
783 spin_lock_irq(&blkcg->lock); 781 spin_lock_irq(&blkcg->lock);
784 782
785 while (!hlist_empty(&blkcg->blkg_list)) { 783 while (!hlist_empty(&blkcg->blkg_list)) {
786 struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, 784 struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
787 struct blkcg_gq, blkcg_node); 785 struct blkcg_gq, blkcg_node);
788 struct request_queue *q = blkg->q; 786 struct request_queue *q = blkg->q;
789 787
790 if (spin_trylock(q->queue_lock)) { 788 if (spin_trylock(q->queue_lock)) {
791 blkg_destroy(blkg); 789 blkg_destroy(blkg);
792 spin_unlock(q->queue_lock); 790 spin_unlock(q->queue_lock);
793 } else { 791 } else {
794 spin_unlock_irq(&blkcg->lock); 792 spin_unlock_irq(&blkcg->lock);
795 cpu_relax(); 793 cpu_relax();
796 spin_lock_irq(&blkcg->lock); 794 spin_lock_irq(&blkcg->lock);
797 } 795 }
798 } 796 }
799 797
800 spin_unlock_irq(&blkcg->lock); 798 spin_unlock_irq(&blkcg->lock);
801 } 799 }
802 800
803 static void blkcg_css_free(struct cgroup *cgroup) 801 static void blkcg_css_free(struct cgroup *cgroup)
804 { 802 {
805 struct blkcg *blkcg = cgroup_to_blkcg(cgroup); 803 struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
806 804
807 if (blkcg != &blkcg_root) 805 if (blkcg != &blkcg_root)
808 kfree(blkcg); 806 kfree(blkcg);
809 } 807 }
810 808
811 static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) 809 static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup)
812 { 810 {
813 static atomic64_t id_seq = ATOMIC64_INIT(0); 811 static atomic64_t id_seq = ATOMIC64_INIT(0);
814 struct blkcg *blkcg; 812 struct blkcg *blkcg;
815 struct cgroup *parent = cgroup->parent; 813 struct cgroup *parent = cgroup->parent;
816 814
817 if (!parent) { 815 if (!parent) {
818 blkcg = &blkcg_root; 816 blkcg = &blkcg_root;
819 goto done; 817 goto done;
820 } 818 }
821 819
822 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 820 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
823 if (!blkcg) 821 if (!blkcg)
824 return ERR_PTR(-ENOMEM); 822 return ERR_PTR(-ENOMEM);
825 823
826 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; 824 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
827 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; 825 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
828 blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ 826 blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
829 done: 827 done:
830 spin_lock_init(&blkcg->lock); 828 spin_lock_init(&blkcg->lock);
831 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); 829 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
832 INIT_HLIST_HEAD(&blkcg->blkg_list); 830 INIT_HLIST_HEAD(&blkcg->blkg_list);
833 831
834 return &blkcg->css; 832 return &blkcg->css;
835 } 833 }
836 834
837 /** 835 /**
838 * blkcg_init_queue - initialize blkcg part of request queue 836 * blkcg_init_queue - initialize blkcg part of request queue
839 * @q: request_queue to initialize 837 * @q: request_queue to initialize
840 * 838 *
841 * Called from blk_alloc_queue_node(). Responsible for initializing blkcg 839 * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
842 * part of new request_queue @q. 840 * part of new request_queue @q.
843 * 841 *
844 * RETURNS: 842 * RETURNS:
845 * 0 on success, -errno on failure. 843 * 0 on success, -errno on failure.
846 */ 844 */
847 int blkcg_init_queue(struct request_queue *q) 845 int blkcg_init_queue(struct request_queue *q)
848 { 846 {
849 might_sleep(); 847 might_sleep();
850 848
851 return blk_throtl_init(q); 849 return blk_throtl_init(q);
852 } 850 }
853 851
854 /** 852 /**
855 * blkcg_drain_queue - drain blkcg part of request_queue 853 * blkcg_drain_queue - drain blkcg part of request_queue
856 * @q: request_queue to drain 854 * @q: request_queue to drain
857 * 855 *
858 * Called from blk_drain_queue(). Responsible for draining blkcg part. 856 * Called from blk_drain_queue(). Responsible for draining blkcg part.
859 */ 857 */
860 void blkcg_drain_queue(struct request_queue *q) 858 void blkcg_drain_queue(struct request_queue *q)
861 { 859 {
862 lockdep_assert_held(q->queue_lock); 860 lockdep_assert_held(q->queue_lock);
863 861
864 blk_throtl_drain(q); 862 blk_throtl_drain(q);
865 } 863 }
866 864
867 /** 865 /**
868 * blkcg_exit_queue - exit and release blkcg part of request_queue 866 * blkcg_exit_queue - exit and release blkcg part of request_queue
869 * @q: request_queue being released 867 * @q: request_queue being released
870 * 868 *
871 * Called from blk_release_queue(). Responsible for exiting blkcg part. 869 * Called from blk_release_queue(). Responsible for exiting blkcg part.
872 */ 870 */
873 void blkcg_exit_queue(struct request_queue *q) 871 void blkcg_exit_queue(struct request_queue *q)
874 { 872 {
875 spin_lock_irq(q->queue_lock); 873 spin_lock_irq(q->queue_lock);
876 blkg_destroy_all(q); 874 blkg_destroy_all(q);
877 spin_unlock_irq(q->queue_lock); 875 spin_unlock_irq(q->queue_lock);
878 876
879 blk_throtl_exit(q); 877 blk_throtl_exit(q);
880 } 878 }
881 879
882 /* 880 /*
883 * We cannot support shared io contexts, as we have no mean to support 881 * We cannot support shared io contexts, as we have no mean to support
884 * two tasks with the same ioc in two different groups without major rework 882 * two tasks with the same ioc in two different groups without major rework
885 * of the main cic data structures. For now we allow a task to change 883 * of the main cic data structures. For now we allow a task to change
886 * its cgroup only if it's the only owner of its ioc. 884 * its cgroup only if it's the only owner of its ioc.
887 */ 885 */
888 static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 886 static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
889 { 887 {
890 struct task_struct *task; 888 struct task_struct *task;
891 struct io_context *ioc; 889 struct io_context *ioc;
892 int ret = 0; 890 int ret = 0;
893 891
894 /* task_lock() is needed to avoid races with exit_io_context() */ 892 /* task_lock() is needed to avoid races with exit_io_context() */
895 cgroup_taskset_for_each(task, cgrp, tset) { 893 cgroup_taskset_for_each(task, cgrp, tset) {
896 task_lock(task); 894 task_lock(task);
897 ioc = task->io_context; 895 ioc = task->io_context;
898 if (ioc && atomic_read(&ioc->nr_tasks) > 1) 896 if (ioc && atomic_read(&ioc->nr_tasks) > 1)
899 ret = -EINVAL; 897 ret = -EINVAL;
900 task_unlock(task); 898 task_unlock(task);
901 if (ret) 899 if (ret)
902 break; 900 break;
903 } 901 }
904 return ret; 902 return ret;
905 } 903 }
906 904
907 struct cgroup_subsys blkio_subsys = { 905 struct cgroup_subsys blkio_subsys = {
908 .name = "blkio", 906 .name = "blkio",
909 .css_alloc = blkcg_css_alloc, 907 .css_alloc = blkcg_css_alloc,
910 .css_offline = blkcg_css_offline, 908 .css_offline = blkcg_css_offline,
911 .css_free = blkcg_css_free, 909 .css_free = blkcg_css_free,
912 .can_attach = blkcg_can_attach, 910 .can_attach = blkcg_can_attach,
913 .subsys_id = blkio_subsys_id, 911 .subsys_id = blkio_subsys_id,
914 .base_cftypes = blkcg_files, 912 .base_cftypes = blkcg_files,
915 .module = THIS_MODULE, 913 .module = THIS_MODULE,
916 914
917 /* 915 /*
918 * blkio subsystem is utterly broken in terms of hierarchy support. 916 * blkio subsystem is utterly broken in terms of hierarchy support.
919 * It treats all cgroups equally regardless of where they're 917 * It treats all cgroups equally regardless of where they're
920 * located in the hierarchy - all cgroups are treated as if they're 918 * located in the hierarchy - all cgroups are treated as if they're
921 * right below the root. Fix it and remove the following. 919 * right below the root. Fix it and remove the following.
922 */ 920 */
923 .broken_hierarchy = true, 921 .broken_hierarchy = true,
924 }; 922 };
925 EXPORT_SYMBOL_GPL(blkio_subsys); 923 EXPORT_SYMBOL_GPL(blkio_subsys);
926 924
927 /** 925 /**
928 * blkcg_activate_policy - activate a blkcg policy on a request_queue 926 * blkcg_activate_policy - activate a blkcg policy on a request_queue
929 * @q: request_queue of interest 927 * @q: request_queue of interest
930 * @pol: blkcg policy to activate 928 * @pol: blkcg policy to activate
931 * 929 *
932 * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through 930 * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through
933 * bypass mode to populate its blkgs with policy_data for @pol. 931 * bypass mode to populate its blkgs with policy_data for @pol.
934 * 932 *
935 * Activation happens with @q bypassed, so nobody would be accessing blkgs 933 * Activation happens with @q bypassed, so nobody would be accessing blkgs
936 * from IO path. Update of each blkg is protected by both queue and blkcg 934 * from IO path. Update of each blkg is protected by both queue and blkcg
937 * locks so that holding either lock and testing blkcg_policy_enabled() is 935 * locks so that holding either lock and testing blkcg_policy_enabled() is
938 * always enough for dereferencing policy data. 936 * always enough for dereferencing policy data.
939 * 937 *
940 * The caller is responsible for synchronizing [de]activations and policy 938 * The caller is responsible for synchronizing [de]activations and policy
941 * [un]registerations. Returns 0 on success, -errno on failure. 939 * [un]registerations. Returns 0 on success, -errno on failure.
942 */ 940 */
943 int blkcg_activate_policy(struct request_queue *q, 941 int blkcg_activate_policy(struct request_queue *q,
944 const struct blkcg_policy *pol) 942 const struct blkcg_policy *pol)
945 { 943 {
946 LIST_HEAD(pds); 944 LIST_HEAD(pds);
947 struct blkcg_gq *blkg, *new_blkg; 945 struct blkcg_gq *blkg, *new_blkg;
948 struct blkg_policy_data *pd, *n; 946 struct blkg_policy_data *pd, *n;
949 int cnt = 0, ret; 947 int cnt = 0, ret;
950 bool preloaded; 948 bool preloaded;
951 949
952 if (blkcg_policy_enabled(q, pol)) 950 if (blkcg_policy_enabled(q, pol))
953 return 0; 951 return 0;
954 952
955 /* preallocations for root blkg */ 953 /* preallocations for root blkg */
956 new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); 954 new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
957 if (!new_blkg) 955 if (!new_blkg)
958 return -ENOMEM; 956 return -ENOMEM;
959 957
960 blk_queue_bypass_start(q); 958 blk_queue_bypass_start(q);
961 959
962 preloaded = !radix_tree_preload(GFP_KERNEL); 960 preloaded = !radix_tree_preload(GFP_KERNEL);
963 961
964 /* 962 /*
965 * Make sure the root blkg exists and count the existing blkgs. As 963 * Make sure the root blkg exists and count the existing blkgs. As
966 * @q is bypassing at this point, blkg_lookup_create() can't be 964 * @q is bypassing at this point, blkg_lookup_create() can't be
967 * used. Open code it. 965 * used. Open code it.
968 */ 966 */
969 spin_lock_irq(q->queue_lock); 967 spin_lock_irq(q->queue_lock);
970 968
971 rcu_read_lock(); 969 rcu_read_lock();
972 blkg = __blkg_lookup(&blkcg_root, q, false); 970 blkg = __blkg_lookup(&blkcg_root, q, false);
973 if (blkg) 971 if (blkg)
974 blkg_free(new_blkg); 972 blkg_free(new_blkg);
975 else 973 else
976 blkg = blkg_create(&blkcg_root, q, new_blkg); 974 blkg = blkg_create(&blkcg_root, q, new_blkg);
977 rcu_read_unlock(); 975 rcu_read_unlock();
978 976
979 if (preloaded) 977 if (preloaded)
980 radix_tree_preload_end(); 978 radix_tree_preload_end();
981 979
982 if (IS_ERR(blkg)) { 980 if (IS_ERR(blkg)) {
983 ret = PTR_ERR(blkg); 981 ret = PTR_ERR(blkg);
984 goto out_unlock; 982 goto out_unlock;
985 } 983 }
986 q->root_blkg = blkg; 984 q->root_blkg = blkg;
987 q->root_rl.blkg = blkg; 985 q->root_rl.blkg = blkg;
988 986
989 list_for_each_entry(blkg, &q->blkg_list, q_node) 987 list_for_each_entry(blkg, &q->blkg_list, q_node)
990 cnt++; 988 cnt++;
991 989
992 spin_unlock_irq(q->queue_lock); 990 spin_unlock_irq(q->queue_lock);
993 991
994 /* allocate policy_data for all existing blkgs */ 992 /* allocate policy_data for all existing blkgs */
995 while (cnt--) { 993 while (cnt--) {
996 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); 994 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
997 if (!pd) { 995 if (!pd) {
998 ret = -ENOMEM; 996 ret = -ENOMEM;
999 goto out_free; 997 goto out_free;
1000 } 998 }
1001 list_add_tail(&pd->alloc_node, &pds); 999 list_add_tail(&pd->alloc_node, &pds);
1002 } 1000 }
1003 1001
1004 /* 1002 /*
1005 * Install the allocated pds. With @q bypassing, no new blkg 1003 * Install the allocated pds. With @q bypassing, no new blkg
1006 * should have been created while the queue lock was dropped. 1004 * should have been created while the queue lock was dropped.
1007 */ 1005 */
1008 spin_lock_irq(q->queue_lock); 1006 spin_lock_irq(q->queue_lock);
1009 1007
1010 list_for_each_entry(blkg, &q->blkg_list, q_node) { 1008 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1011 if (WARN_ON(list_empty(&pds))) { 1009 if (WARN_ON(list_empty(&pds))) {
1012 /* umm... this shouldn't happen, just abort */ 1010 /* umm... this shouldn't happen, just abort */
1013 ret = -ENOMEM; 1011 ret = -ENOMEM;
1014 goto out_unlock; 1012 goto out_unlock;
1015 } 1013 }
1016 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); 1014 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
1017 list_del_init(&pd->alloc_node); 1015 list_del_init(&pd->alloc_node);
1018 1016
1019 /* grab blkcg lock too while installing @pd on @blkg */ 1017 /* grab blkcg lock too while installing @pd on @blkg */
1020 spin_lock(&blkg->blkcg->lock); 1018 spin_lock(&blkg->blkcg->lock);
1021 1019
1022 blkg->pd[pol->plid] = pd; 1020 blkg->pd[pol->plid] = pd;
1023 pd->blkg = blkg; 1021 pd->blkg = blkg;
1024 pd->plid = pol->plid; 1022 pd->plid = pol->plid;
1025 pol->pd_init_fn(blkg); 1023 pol->pd_init_fn(blkg);
1026 1024
1027 spin_unlock(&blkg->blkcg->lock); 1025 spin_unlock(&blkg->blkcg->lock);
1028 } 1026 }
1029 1027
1030 __set_bit(pol->plid, q->blkcg_pols); 1028 __set_bit(pol->plid, q->blkcg_pols);
1031 ret = 0; 1029 ret = 0;
1032 out_unlock: 1030 out_unlock:
1033 spin_unlock_irq(q->queue_lock); 1031 spin_unlock_irq(q->queue_lock);
1034 out_free: 1032 out_free:
1035 blk_queue_bypass_end(q); 1033 blk_queue_bypass_end(q);
1036 list_for_each_entry_safe(pd, n, &pds, alloc_node) 1034 list_for_each_entry_safe(pd, n, &pds, alloc_node)
1037 kfree(pd); 1035 kfree(pd);
1038 return ret; 1036 return ret;
1039 } 1037 }
1040 EXPORT_SYMBOL_GPL(blkcg_activate_policy); 1038 EXPORT_SYMBOL_GPL(blkcg_activate_policy);
1041 1039
1042 /** 1040 /**
1043 * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue 1041 * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
1044 * @q: request_queue of interest 1042 * @q: request_queue of interest
1045 * @pol: blkcg policy to deactivate 1043 * @pol: blkcg policy to deactivate
1046 * 1044 *
1047 * Deactivate @pol on @q. Follows the same synchronization rules as 1045 * Deactivate @pol on @q. Follows the same synchronization rules as
1048 * blkcg_activate_policy(). 1046 * blkcg_activate_policy().
1049 */ 1047 */
1050 void blkcg_deactivate_policy(struct request_queue *q, 1048 void blkcg_deactivate_policy(struct request_queue *q,
1051 const struct blkcg_policy *pol) 1049 const struct blkcg_policy *pol)
1052 { 1050 {
1053 struct blkcg_gq *blkg; 1051 struct blkcg_gq *blkg;
1054 1052
1055 if (!blkcg_policy_enabled(q, pol)) 1053 if (!blkcg_policy_enabled(q, pol))
1056 return; 1054 return;
1057 1055
1058 blk_queue_bypass_start(q); 1056 blk_queue_bypass_start(q);
1059 spin_lock_irq(q->queue_lock); 1057 spin_lock_irq(q->queue_lock);
1060 1058
1061 __clear_bit(pol->plid, q->blkcg_pols); 1059 __clear_bit(pol->plid, q->blkcg_pols);
1062 1060
1063 /* if no policy is left, no need for blkgs - shoot them down */ 1061 /* if no policy is left, no need for blkgs - shoot them down */
1064 if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS)) 1062 if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
1065 blkg_destroy_all(q); 1063 blkg_destroy_all(q);
1066 1064
1067 list_for_each_entry(blkg, &q->blkg_list, q_node) { 1065 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1068 /* grab blkcg lock too while removing @pd from @blkg */ 1066 /* grab blkcg lock too while removing @pd from @blkg */
1069 spin_lock(&blkg->blkcg->lock); 1067 spin_lock(&blkg->blkcg->lock);
1070 1068
1071 if (pol->pd_offline_fn) 1069 if (pol->pd_offline_fn)
1072 pol->pd_offline_fn(blkg); 1070 pol->pd_offline_fn(blkg);
1073 if (pol->pd_exit_fn) 1071 if (pol->pd_exit_fn)
1074 pol->pd_exit_fn(blkg); 1072 pol->pd_exit_fn(blkg);
1075 1073
1076 kfree(blkg->pd[pol->plid]); 1074 kfree(blkg->pd[pol->plid]);
1077 blkg->pd[pol->plid] = NULL; 1075 blkg->pd[pol->plid] = NULL;
1078 1076
1079 spin_unlock(&blkg->blkcg->lock); 1077 spin_unlock(&blkg->blkcg->lock);
1080 } 1078 }
1081 1079
1082 spin_unlock_irq(q->queue_lock); 1080 spin_unlock_irq(q->queue_lock);
1083 blk_queue_bypass_end(q); 1081 blk_queue_bypass_end(q);
1084 } 1082 }
1085 EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); 1083 EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
1086 1084
1087 /** 1085 /**
1088 * blkcg_policy_register - register a blkcg policy 1086 * blkcg_policy_register - register a blkcg policy
1089 * @pol: blkcg policy to register 1087 * @pol: blkcg policy to register
1090 * 1088 *
1091 * Register @pol with blkcg core. Might sleep and @pol may be modified on 1089 * Register @pol with blkcg core. Might sleep and @pol may be modified on
1092 * successful registration. Returns 0 on success and -errno on failure. 1090 * successful registration. Returns 0 on success and -errno on failure.
1093 */ 1091 */
1094 int blkcg_policy_register(struct blkcg_policy *pol) 1092 int blkcg_policy_register(struct blkcg_policy *pol)
1095 { 1093 {
1096 int i, ret; 1094 int i, ret;
1097 1095
1098 if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) 1096 if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
1099 return -EINVAL; 1097 return -EINVAL;
1100 1098
1101 mutex_lock(&blkcg_pol_mutex); 1099 mutex_lock(&blkcg_pol_mutex);
1102 1100
1103 /* find an empty slot */ 1101 /* find an empty slot */
1104 ret = -ENOSPC; 1102 ret = -ENOSPC;
1105 for (i = 0; i < BLKCG_MAX_POLS; i++) 1103 for (i = 0; i < BLKCG_MAX_POLS; i++)
1106 if (!blkcg_policy[i]) 1104 if (!blkcg_policy[i])
1107 break; 1105 break;
1108 if (i >= BLKCG_MAX_POLS) 1106 if (i >= BLKCG_MAX_POLS)
1109 goto out_unlock; 1107 goto out_unlock;
1110 1108
1111 /* register and update blkgs */ 1109 /* register and update blkgs */
1112 pol->plid = i; 1110 pol->plid = i;
1113 blkcg_policy[i] = pol; 1111 blkcg_policy[i] = pol;
1114 1112
1115 /* everything is in place, add intf files for the new policy */ 1113 /* everything is in place, add intf files for the new policy */
1116 if (pol->cftypes) 1114 if (pol->cftypes)
1117 WARN_ON(cgroup_add_cftypes(&blkio_subsys, pol->cftypes)); 1115 WARN_ON(cgroup_add_cftypes(&blkio_subsys, pol->cftypes));
1118 ret = 0; 1116 ret = 0;
1119 out_unlock: 1117 out_unlock:
1120 mutex_unlock(&blkcg_pol_mutex); 1118 mutex_unlock(&blkcg_pol_mutex);
1121 return ret; 1119 return ret;
1122 } 1120 }
1123 EXPORT_SYMBOL_GPL(blkcg_policy_register); 1121 EXPORT_SYMBOL_GPL(blkcg_policy_register);
1124 1122
1125 /** 1123 /**
1126 * blkcg_policy_unregister - unregister a blkcg policy 1124 * blkcg_policy_unregister - unregister a blkcg policy
1127 * @pol: blkcg policy to unregister 1125 * @pol: blkcg policy to unregister
1128 * 1126 *
1129 * Undo blkcg_policy_register(@pol). Might sleep. 1127 * Undo blkcg_policy_register(@pol). Might sleep.
1130 */ 1128 */
1131 void blkcg_policy_unregister(struct blkcg_policy *pol) 1129 void blkcg_policy_unregister(struct blkcg_policy *pol)
1132 { 1130 {
1133 mutex_lock(&blkcg_pol_mutex); 1131 mutex_lock(&blkcg_pol_mutex);
1134 1132
1135 if (WARN_ON(blkcg_policy[pol->plid] != pol)) 1133 if (WARN_ON(blkcg_policy[pol->plid] != pol))
1136 goto out_unlock; 1134 goto out_unlock;
1137 1135
1 #ifndef _BLK_CGROUP_H 1 #ifndef _BLK_CGROUP_H
2 #define _BLK_CGROUP_H 2 #define _BLK_CGROUP_H
3 /* 3 /*
4 * Common Block IO controller cgroup interface 4 * Common Block IO controller cgroup interface
5 * 5 *
6 * Based on ideas and code from CFQ, CFS and BFQ: 6 * Based on ideas and code from CFQ, CFS and BFQ:
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 * 8 *
9 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> 9 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
10 * Paolo Valente <paolo.valente@unimore.it> 10 * Paolo Valente <paolo.valente@unimore.it>
11 * 11 *
12 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 12 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
13 * Nauman Rafique <nauman@google.com> 13 * Nauman Rafique <nauman@google.com>
14 */ 14 */
15 15
16 #include <linux/cgroup.h> 16 #include <linux/cgroup.h>
17 #include <linux/u64_stats_sync.h> 17 #include <linux/u64_stats_sync.h>
18 #include <linux/seq_file.h> 18 #include <linux/seq_file.h>
19 #include <linux/radix-tree.h> 19 #include <linux/radix-tree.h>
20 #include <linux/blkdev.h> 20 #include <linux/blkdev.h>
21 21
22 /* Max limits for throttle policy */ 22 /* Max limits for throttle policy */
23 #define THROTL_IOPS_MAX UINT_MAX 23 #define THROTL_IOPS_MAX UINT_MAX
24 24
25 /* CFQ specific, out here for blkcg->cfq_weight */ 25 /* CFQ specific, out here for blkcg->cfq_weight */
26 #define CFQ_WEIGHT_MIN 10 26 #define CFQ_WEIGHT_MIN 10
27 #define CFQ_WEIGHT_MAX 1000 27 #define CFQ_WEIGHT_MAX 1000
28 #define CFQ_WEIGHT_DEFAULT 500 28 #define CFQ_WEIGHT_DEFAULT 500
29 29
30 #ifdef CONFIG_BLK_CGROUP 30 #ifdef CONFIG_BLK_CGROUP
31 31
32 enum blkg_rwstat_type { 32 enum blkg_rwstat_type {
33 BLKG_RWSTAT_READ, 33 BLKG_RWSTAT_READ,
34 BLKG_RWSTAT_WRITE, 34 BLKG_RWSTAT_WRITE,
35 BLKG_RWSTAT_SYNC, 35 BLKG_RWSTAT_SYNC,
36 BLKG_RWSTAT_ASYNC, 36 BLKG_RWSTAT_ASYNC,
37 37
38 BLKG_RWSTAT_NR, 38 BLKG_RWSTAT_NR,
39 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, 39 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
40 }; 40 };
41 41
42 struct blkcg_gq; 42 struct blkcg_gq;
43 43
44 struct blkcg { 44 struct blkcg {
45 struct cgroup_subsys_state css; 45 struct cgroup_subsys_state css;
46 spinlock_t lock; 46 spinlock_t lock;
47 47
48 struct radix_tree_root blkg_tree; 48 struct radix_tree_root blkg_tree;
49 struct blkcg_gq *blkg_hint; 49 struct blkcg_gq *blkg_hint;
50 struct hlist_head blkg_list; 50 struct hlist_head blkg_list;
51 51
52 /* for policies to test whether associated blkcg has changed */ 52 /* for policies to test whether associated blkcg has changed */
53 uint64_t id; 53 uint64_t id;
54 54
55 /* TODO: per-policy storage in blkcg */ 55 /* TODO: per-policy storage in blkcg */
56 unsigned int cfq_weight; /* belongs to cfq */ 56 unsigned int cfq_weight; /* belongs to cfq */
57 unsigned int cfq_leaf_weight; 57 unsigned int cfq_leaf_weight;
58 }; 58 };
59 59
60 struct blkg_stat { 60 struct blkg_stat {
61 struct u64_stats_sync syncp; 61 struct u64_stats_sync syncp;
62 uint64_t cnt; 62 uint64_t cnt;
63 }; 63 };
64 64
65 struct blkg_rwstat { 65 struct blkg_rwstat {
66 struct u64_stats_sync syncp; 66 struct u64_stats_sync syncp;
67 uint64_t cnt[BLKG_RWSTAT_NR]; 67 uint64_t cnt[BLKG_RWSTAT_NR];
68 }; 68 };
69 69
70 /* 70 /*
71 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a 71 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
72 * request_queue (q). This is used by blkcg policies which need to track 72 * request_queue (q). This is used by blkcg policies which need to track
73 * information per blkcg - q pair. 73 * information per blkcg - q pair.
74 * 74 *
75 * There can be multiple active blkcg policies and each has its private 75 * There can be multiple active blkcg policies and each has its private
76 * data on each blkg, the size of which is determined by 76 * data on each blkg, the size of which is determined by
77 * blkcg_policy->pd_size. blkcg core allocates and frees such areas 77 * blkcg_policy->pd_size. blkcg core allocates and frees such areas
78 * together with blkg and invokes pd_init/exit_fn() methods. 78 * together with blkg and invokes pd_init/exit_fn() methods.
79 * 79 *
80 * Such private data must embed struct blkg_policy_data (pd) at the 80 * Such private data must embed struct blkg_policy_data (pd) at the
81 * beginning and pd_size can't be smaller than pd. 81 * beginning and pd_size can't be smaller than pd.
82 */ 82 */
83 struct blkg_policy_data { 83 struct blkg_policy_data {
84 /* the blkg and policy id this per-policy data belongs to */ 84 /* the blkg and policy id this per-policy data belongs to */
85 struct blkcg_gq *blkg; 85 struct blkcg_gq *blkg;
86 int plid; 86 int plid;
87 87
88 /* used during policy activation */ 88 /* used during policy activation */
89 struct list_head alloc_node; 89 struct list_head alloc_node;
90 }; 90 };
91 91
92 /* association between a blk cgroup and a request queue */ 92 /* association between a blk cgroup and a request queue */
93 struct blkcg_gq { 93 struct blkcg_gq {
94 /* Pointer to the associated request_queue */ 94 /* Pointer to the associated request_queue */
95 struct request_queue *q; 95 struct request_queue *q;
96 struct list_head q_node; 96 struct list_head q_node;
97 struct hlist_node blkcg_node; 97 struct hlist_node blkcg_node;
98 struct blkcg *blkcg; 98 struct blkcg *blkcg;
99 99
100 /* all non-root blkcg_gq's are guaranteed to have access to parent */ 100 /* all non-root blkcg_gq's are guaranteed to have access to parent */
101 struct blkcg_gq *parent; 101 struct blkcg_gq *parent;
102 102
103 /* request allocation list for this blkcg-q pair */ 103 /* request allocation list for this blkcg-q pair */
104 struct request_list rl; 104 struct request_list rl;
105 105
106 /* reference count */ 106 /* reference count */
107 int refcnt; 107 int refcnt;
108 108
109 /* is this blkg online? protected by both blkcg and q locks */ 109 /* is this blkg online? protected by both blkcg and q locks */
110 bool online; 110 bool online;
111 111
112 struct blkg_policy_data *pd[BLKCG_MAX_POLS]; 112 struct blkg_policy_data *pd[BLKCG_MAX_POLS];
113 113
114 struct rcu_head rcu_head; 114 struct rcu_head rcu_head;
115 }; 115 };
116 116
117 typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); 117 typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
118 typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); 118 typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg);
119 typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); 119 typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg);
120 typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); 120 typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg);
121 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); 121 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg);
122 122
123 struct blkcg_policy { 123 struct blkcg_policy {
124 int plid; 124 int plid;
125 /* policy specific private data size */ 125 /* policy specific private data size */
126 size_t pd_size; 126 size_t pd_size;
127 /* cgroup files for the policy */ 127 /* cgroup files for the policy */
128 struct cftype *cftypes; 128 struct cftype *cftypes;
129 129
130 /* operations */ 130 /* operations */
131 blkcg_pol_init_pd_fn *pd_init_fn; 131 blkcg_pol_init_pd_fn *pd_init_fn;
132 blkcg_pol_online_pd_fn *pd_online_fn; 132 blkcg_pol_online_pd_fn *pd_online_fn;
133 blkcg_pol_offline_pd_fn *pd_offline_fn; 133 blkcg_pol_offline_pd_fn *pd_offline_fn;
134 blkcg_pol_exit_pd_fn *pd_exit_fn; 134 blkcg_pol_exit_pd_fn *pd_exit_fn;
135 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; 135 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
136 }; 136 };
137 137
138 extern struct blkcg blkcg_root; 138 extern struct blkcg blkcg_root;
139 139
140 struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); 140 struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
141 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, 141 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
142 struct request_queue *q); 142 struct request_queue *q);
143 int blkcg_init_queue(struct request_queue *q); 143 int blkcg_init_queue(struct request_queue *q);
144 void blkcg_drain_queue(struct request_queue *q); 144 void blkcg_drain_queue(struct request_queue *q);
145 void blkcg_exit_queue(struct request_queue *q); 145 void blkcg_exit_queue(struct request_queue *q);
146 146
147 /* Blkio controller policy registration */ 147 /* Blkio controller policy registration */
148 int blkcg_policy_register(struct blkcg_policy *pol); 148 int blkcg_policy_register(struct blkcg_policy *pol);
149 void blkcg_policy_unregister(struct blkcg_policy *pol); 149 void blkcg_policy_unregister(struct blkcg_policy *pol);
150 int blkcg_activate_policy(struct request_queue *q, 150 int blkcg_activate_policy(struct request_queue *q,
151 const struct blkcg_policy *pol); 151 const struct blkcg_policy *pol);
152 void blkcg_deactivate_policy(struct request_queue *q, 152 void blkcg_deactivate_policy(struct request_queue *q,
153 const struct blkcg_policy *pol); 153 const struct blkcg_policy *pol);
154 154
155 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, 155 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
156 u64 (*prfill)(struct seq_file *, 156 u64 (*prfill)(struct seq_file *,
157 struct blkg_policy_data *, int), 157 struct blkg_policy_data *, int),
158 const struct blkcg_policy *pol, int data, 158 const struct blkcg_policy *pol, int data,
159 bool show_total); 159 bool show_total);
160 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); 160 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
161 u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 161 u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
162 const struct blkg_rwstat *rwstat); 162 const struct blkg_rwstat *rwstat);
163 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); 163 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
164 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 164 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
165 int off); 165 int off);
166 166
167 u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); 167 u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off);
168 struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, 168 struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
169 int off); 169 int off);
170 170
171 struct blkg_conf_ctx { 171 struct blkg_conf_ctx {
172 struct gendisk *disk; 172 struct gendisk *disk;
173 struct blkcg_gq *blkg; 173 struct blkcg_gq *blkg;
174 u64 v; 174 u64 v;
175 }; 175 };
176 176
177 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 177 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
178 const char *input, struct blkg_conf_ctx *ctx); 178 const char *input, struct blkg_conf_ctx *ctx);
179 void blkg_conf_finish(struct blkg_conf_ctx *ctx); 179 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
180 180
181 181
182 static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) 182 static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
183 { 183 {
184 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), 184 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
185 struct blkcg, css); 185 struct blkcg, css);
186 } 186 }
187 187
188 static inline struct blkcg *task_blkcg(struct task_struct *tsk) 188 static inline struct blkcg *task_blkcg(struct task_struct *tsk)
189 { 189 {
190 return container_of(task_subsys_state(tsk, blkio_subsys_id), 190 return container_of(task_subsys_state(tsk, blkio_subsys_id),
191 struct blkcg, css); 191 struct blkcg, css);
192 } 192 }
193 193
194 static inline struct blkcg *bio_blkcg(struct bio *bio) 194 static inline struct blkcg *bio_blkcg(struct bio *bio)
195 { 195 {
196 if (bio && bio->bi_css) 196 if (bio && bio->bi_css)
197 return container_of(bio->bi_css, struct blkcg, css); 197 return container_of(bio->bi_css, struct blkcg, css);
198 return task_blkcg(current); 198 return task_blkcg(current);
199 } 199 }
200 200
201 /** 201 /**
202 * blkcg_parent - get the parent of a blkcg 202 * blkcg_parent - get the parent of a blkcg
203 * @blkcg: blkcg of interest 203 * @blkcg: blkcg of interest
204 * 204 *
205 * Return the parent blkcg of @blkcg. Can be called anytime. 205 * Return the parent blkcg of @blkcg. Can be called anytime.
206 */ 206 */
207 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) 207 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
208 { 208 {
209 struct cgroup *pcg = blkcg->css.cgroup->parent; 209 struct cgroup *pcg = blkcg->css.cgroup->parent;
210 210
211 return pcg ? cgroup_to_blkcg(pcg) : NULL; 211 return pcg ? cgroup_to_blkcg(pcg) : NULL;
212 } 212 }
213 213
214 /** 214 /**
215 * blkg_to_pdata - get policy private data 215 * blkg_to_pdata - get policy private data
216 * @blkg: blkg of interest 216 * @blkg: blkg of interest
217 * @pol: policy of interest 217 * @pol: policy of interest
218 * 218 *
219 * Return pointer to private data associated with the @blkg-@pol pair. 219 * Return pointer to private data associated with the @blkg-@pol pair.
220 */ 220 */
221 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 221 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
222 struct blkcg_policy *pol) 222 struct blkcg_policy *pol)
223 { 223 {
224 return blkg ? blkg->pd[pol->plid] : NULL; 224 return blkg ? blkg->pd[pol->plid] : NULL;
225 } 225 }
226 226
227 /** 227 /**
228 * pdata_to_blkg - get blkg associated with policy private data 228 * pdata_to_blkg - get blkg associated with policy private data
229 * @pd: policy private data of interest 229 * @pd: policy private data of interest
230 * 230 *
231 * @pd is policy private data. Determine the blkg it's associated with. 231 * @pd is policy private data. Determine the blkg it's associated with.
232 */ 232 */
233 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) 233 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
234 { 234 {
235 return pd ? pd->blkg : NULL; 235 return pd ? pd->blkg : NULL;
236 } 236 }
237 237
238 /** 238 /**
239 * blkg_path - format cgroup path of blkg 239 * blkg_path - format cgroup path of blkg
240 * @blkg: blkg of interest 240 * @blkg: blkg of interest
241 * @buf: target buffer 241 * @buf: target buffer
242 * @buflen: target buffer length 242 * @buflen: target buffer length
243 * 243 *
244 * Format the path of the cgroup of @blkg into @buf. 244 * Format the path of the cgroup of @blkg into @buf.
245 */ 245 */
246 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) 246 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
247 { 247 {
248 int ret; 248 int ret;
249 249
250 ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); 250 ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
251 if (ret) 251 if (ret)
252 strncpy(buf, "<unavailable>", buflen); 252 strncpy(buf, "<unavailable>", buflen);
253 return ret; 253 return ret;
254 } 254 }
255 255
256 /** 256 /**
257 * blkg_get - get a blkg reference 257 * blkg_get - get a blkg reference
258 * @blkg: blkg to get 258 * @blkg: blkg to get
259 * 259 *
260 * The caller should be holding queue_lock and an existing reference. 260 * The caller should be holding queue_lock and an existing reference.
261 */ 261 */
262 static inline void blkg_get(struct blkcg_gq *blkg) 262 static inline void blkg_get(struct blkcg_gq *blkg)
263 { 263 {
264 lockdep_assert_held(blkg->q->queue_lock); 264 lockdep_assert_held(blkg->q->queue_lock);
265 WARN_ON_ONCE(!blkg->refcnt); 265 WARN_ON_ONCE(!blkg->refcnt);
266 blkg->refcnt++; 266 blkg->refcnt++;
267 } 267 }
268 268
269 void __blkg_release(struct blkcg_gq *blkg); 269 void __blkg_release_rcu(struct rcu_head *rcu);
270 270
271 /** 271 /**
272 * blkg_put - put a blkg reference 272 * blkg_put - put a blkg reference
273 * @blkg: blkg to put 273 * @blkg: blkg to put
274 * 274 *
275 * The caller should be holding queue_lock. 275 * The caller should be holding queue_lock.
276 */ 276 */
277 static inline void blkg_put(struct blkcg_gq *blkg) 277 static inline void blkg_put(struct blkcg_gq *blkg)
278 { 278 {
279 lockdep_assert_held(blkg->q->queue_lock); 279 lockdep_assert_held(blkg->q->queue_lock);
280 WARN_ON_ONCE(blkg->refcnt <= 0); 280 WARN_ON_ONCE(blkg->refcnt <= 0);
281 if (!--blkg->refcnt) 281 if (!--blkg->refcnt)
282 __blkg_release(blkg); 282 call_rcu(&blkg->rcu_head, __blkg_release_rcu);
283 } 283 }
284 284
285 struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, 285 struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
286 bool update_hint); 286 bool update_hint);
287 287
288 /** 288 /**
289 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants 289 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
290 * @d_blkg: loop cursor pointing to the current descendant 290 * @d_blkg: loop cursor pointing to the current descendant
291 * @pos_cgrp: used for iteration 291 * @pos_cgrp: used for iteration
292 * @p_blkg: target blkg to walk descendants of 292 * @p_blkg: target blkg to walk descendants of
293 * 293 *
294 * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU 294 * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
295 * read locked. If called under either blkcg or queue lock, the iteration 295 * read locked. If called under either blkcg or queue lock, the iteration
296 * is guaranteed to include all and only online blkgs. The caller may 296 * is guaranteed to include all and only online blkgs. The caller may
297 * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip 297 * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
298 * subtree. 298 * subtree.
299 */ 299 */
300 #define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ 300 #define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
301 cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ 301 cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
302 if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ 302 if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
303 (p_blkg)->q, false))) 303 (p_blkg)->q, false)))
304 304
305 /** 305 /**
306 * blkg_for_each_descendant_post - post-order walk of a blkg's descendants 306 * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
307 * @d_blkg: loop cursor pointing to the current descendant 307 * @d_blkg: loop cursor pointing to the current descendant
308 * @pos_cgrp: used for iteration 308 * @pos_cgrp: used for iteration
309 * @p_blkg: target blkg to walk descendants of 309 * @p_blkg: target blkg to walk descendants of
310 * 310 *
311 * Similar to blkg_for_each_descendant_pre() but performs post-order 311 * Similar to blkg_for_each_descendant_pre() but performs post-order
312 * traversal instead. Synchronization rules are the same. 312 * traversal instead. Synchronization rules are the same.
313 */ 313 */
314 #define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \ 314 #define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \
315 cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ 315 cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
316 if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ 316 if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
317 (p_blkg)->q, false))) 317 (p_blkg)->q, false)))
318 318
319 /** 319 /**
320 * blk_get_rl - get request_list to use 320 * blk_get_rl - get request_list to use
321 * @q: request_queue of interest 321 * @q: request_queue of interest
322 * @bio: bio which will be attached to the allocated request (may be %NULL) 322 * @bio: bio which will be attached to the allocated request (may be %NULL)
323 * 323 *
324 * The caller wants to allocate a request from @q to use for @bio. Find 324 * The caller wants to allocate a request from @q to use for @bio. Find
325 * the request_list to use and obtain a reference on it. Should be called 325 * the request_list to use and obtain a reference on it. Should be called
326 * under queue_lock. This function is guaranteed to return non-%NULL 326 * under queue_lock. This function is guaranteed to return non-%NULL
327 * request_list. 327 * request_list.
328 */ 328 */
329 static inline struct request_list *blk_get_rl(struct request_queue *q, 329 static inline struct request_list *blk_get_rl(struct request_queue *q,
330 struct bio *bio) 330 struct bio *bio)
331 { 331 {
332 struct blkcg *blkcg; 332 struct blkcg *blkcg;
333 struct blkcg_gq *blkg; 333 struct blkcg_gq *blkg;
334 334
335 rcu_read_lock(); 335 rcu_read_lock();
336 336
337 blkcg = bio_blkcg(bio); 337 blkcg = bio_blkcg(bio);
338 338
339 /* bypass blkg lookup and use @q->root_rl directly for root */ 339 /* bypass blkg lookup and use @q->root_rl directly for root */
340 if (blkcg == &blkcg_root) 340 if (blkcg == &blkcg_root)
341 goto root_rl; 341 goto root_rl;
342 342
343 /* 343 /*
344 * Try to use blkg->rl. blkg lookup may fail under memory pressure 344 * Try to use blkg->rl. blkg lookup may fail under memory pressure
345 * or if either the blkcg or queue is going away. Fall back to 345 * or if either the blkcg or queue is going away. Fall back to
346 * root_rl in such cases. 346 * root_rl in such cases.
347 */ 347 */
348 blkg = blkg_lookup_create(blkcg, q); 348 blkg = blkg_lookup_create(blkcg, q);
349 if (unlikely(IS_ERR(blkg))) 349 if (unlikely(IS_ERR(blkg)))
350 goto root_rl; 350 goto root_rl;
351 351
352 blkg_get(blkg); 352 blkg_get(blkg);
353 rcu_read_unlock(); 353 rcu_read_unlock();
354 return &blkg->rl; 354 return &blkg->rl;
355 root_rl: 355 root_rl:
356 rcu_read_unlock(); 356 rcu_read_unlock();
357 return &q->root_rl; 357 return &q->root_rl;
358 } 358 }
359 359
360 /** 360 /**
361 * blk_put_rl - put request_list 361 * blk_put_rl - put request_list
362 * @rl: request_list to put 362 * @rl: request_list to put
363 * 363 *
364 * Put the reference acquired by blk_get_rl(). Should be called under 364 * Put the reference acquired by blk_get_rl(). Should be called under
365 * queue_lock. 365 * queue_lock.
366 */ 366 */
367 static inline void blk_put_rl(struct request_list *rl) 367 static inline void blk_put_rl(struct request_list *rl)
368 { 368 {
369 /* root_rl may not have blkg set */ 369 /* root_rl may not have blkg set */
370 if (rl->blkg && rl->blkg->blkcg != &blkcg_root) 370 if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
371 blkg_put(rl->blkg); 371 blkg_put(rl->blkg);
372 } 372 }
373 373
374 /** 374 /**
375 * blk_rq_set_rl - associate a request with a request_list 375 * blk_rq_set_rl - associate a request with a request_list
376 * @rq: request of interest 376 * @rq: request of interest
377 * @rl: target request_list 377 * @rl: target request_list
378 * 378 *
379 * Associate @rq with @rl so that accounting and freeing can know the 379 * Associate @rq with @rl so that accounting and freeing can know the
380 * request_list @rq came from. 380 * request_list @rq came from.
381 */ 381 */
382 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) 382 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
383 { 383 {
384 rq->rl = rl; 384 rq->rl = rl;
385 } 385 }
386 386
387 /** 387 /**
388 * blk_rq_rl - return the request_list a request came from 388 * blk_rq_rl - return the request_list a request came from
389 * @rq: request of interest 389 * @rq: request of interest
390 * 390 *
391 * Return the request_list @rq is allocated from. 391 * Return the request_list @rq is allocated from.
392 */ 392 */
393 static inline struct request_list *blk_rq_rl(struct request *rq) 393 static inline struct request_list *blk_rq_rl(struct request *rq)
394 { 394 {
395 return rq->rl; 395 return rq->rl;
396 } 396 }
397 397
398 struct request_list *__blk_queue_next_rl(struct request_list *rl, 398 struct request_list *__blk_queue_next_rl(struct request_list *rl,
399 struct request_queue *q); 399 struct request_queue *q);
400 /** 400 /**
401 * blk_queue_for_each_rl - iterate through all request_lists of a request_queue 401 * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
402 * 402 *
403 * Should be used under queue_lock. 403 * Should be used under queue_lock.
404 */ 404 */
405 #define blk_queue_for_each_rl(rl, q) \ 405 #define blk_queue_for_each_rl(rl, q) \
406 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) 406 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
407 407
408 /** 408 /**
409 * blkg_stat_add - add a value to a blkg_stat 409 * blkg_stat_add - add a value to a blkg_stat
410 * @stat: target blkg_stat 410 * @stat: target blkg_stat
411 * @val: value to add 411 * @val: value to add
412 * 412 *
413 * Add @val to @stat. The caller is responsible for synchronizing calls to 413 * Add @val to @stat. The caller is responsible for synchronizing calls to
414 * this function. 414 * this function.
415 */ 415 */
416 static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) 416 static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
417 { 417 {
418 u64_stats_update_begin(&stat->syncp); 418 u64_stats_update_begin(&stat->syncp);
419 stat->cnt += val; 419 stat->cnt += val;
420 u64_stats_update_end(&stat->syncp); 420 u64_stats_update_end(&stat->syncp);
421 } 421 }
422 422
423 /** 423 /**
424 * blkg_stat_read - read the current value of a blkg_stat 424 * blkg_stat_read - read the current value of a blkg_stat
425 * @stat: blkg_stat to read 425 * @stat: blkg_stat to read
426 * 426 *
427 * Read the current value of @stat. This function can be called without 427 * Read the current value of @stat. This function can be called without
428 * synchroniztion and takes care of u64 atomicity. 428 * synchroniztion and takes care of u64 atomicity.
429 */ 429 */
430 static inline uint64_t blkg_stat_read(struct blkg_stat *stat) 430 static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
431 { 431 {
432 unsigned int start; 432 unsigned int start;
433 uint64_t v; 433 uint64_t v;
434 434
435 do { 435 do {
436 start = u64_stats_fetch_begin(&stat->syncp); 436 start = u64_stats_fetch_begin(&stat->syncp);
437 v = stat->cnt; 437 v = stat->cnt;
438 } while (u64_stats_fetch_retry(&stat->syncp, start)); 438 } while (u64_stats_fetch_retry(&stat->syncp, start));
439 439
440 return v; 440 return v;
441 } 441 }
442 442
443 /** 443 /**
444 * blkg_stat_reset - reset a blkg_stat 444 * blkg_stat_reset - reset a blkg_stat
445 * @stat: blkg_stat to reset 445 * @stat: blkg_stat to reset
446 */ 446 */
447 static inline void blkg_stat_reset(struct blkg_stat *stat) 447 static inline void blkg_stat_reset(struct blkg_stat *stat)
448 { 448 {
449 stat->cnt = 0; 449 stat->cnt = 0;
450 } 450 }
451 451
452 /** 452 /**
453 * blkg_stat_merge - merge a blkg_stat into another 453 * blkg_stat_merge - merge a blkg_stat into another
454 * @to: the destination blkg_stat 454 * @to: the destination blkg_stat
455 * @from: the source 455 * @from: the source
456 * 456 *
457 * Add @from's count to @to. 457 * Add @from's count to @to.
458 */ 458 */
459 static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) 459 static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from)
460 { 460 {
461 blkg_stat_add(to, blkg_stat_read(from)); 461 blkg_stat_add(to, blkg_stat_read(from));
462 } 462 }
463 463
464 /** 464 /**
465 * blkg_rwstat_add - add a value to a blkg_rwstat 465 * blkg_rwstat_add - add a value to a blkg_rwstat
466 * @rwstat: target blkg_rwstat 466 * @rwstat: target blkg_rwstat
467 * @rw: mask of REQ_{WRITE|SYNC} 467 * @rw: mask of REQ_{WRITE|SYNC}
468 * @val: value to add 468 * @val: value to add
469 * 469 *
470 * Add @val to @rwstat. The counters are chosen according to @rw. The 470 * Add @val to @rwstat. The counters are chosen according to @rw. The
471 * caller is responsible for synchronizing calls to this function. 471 * caller is responsible for synchronizing calls to this function.
472 */ 472 */
473 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, 473 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
474 int rw, uint64_t val) 474 int rw, uint64_t val)
475 { 475 {
476 u64_stats_update_begin(&rwstat->syncp); 476 u64_stats_update_begin(&rwstat->syncp);
477 477
478 if (rw & REQ_WRITE) 478 if (rw & REQ_WRITE)
479 rwstat->cnt[BLKG_RWSTAT_WRITE] += val; 479 rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
480 else 480 else
481 rwstat->cnt[BLKG_RWSTAT_READ] += val; 481 rwstat->cnt[BLKG_RWSTAT_READ] += val;
482 if (rw & REQ_SYNC) 482 if (rw & REQ_SYNC)
483 rwstat->cnt[BLKG_RWSTAT_SYNC] += val; 483 rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
484 else 484 else
485 rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; 485 rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
486 486
487 u64_stats_update_end(&rwstat->syncp); 487 u64_stats_update_end(&rwstat->syncp);
488 } 488 }
489 489
490 /** 490 /**
491 * blkg_rwstat_read - read the current values of a blkg_rwstat 491 * blkg_rwstat_read - read the current values of a blkg_rwstat
492 * @rwstat: blkg_rwstat to read 492 * @rwstat: blkg_rwstat to read
493 * 493 *
494 * Read the current snapshot of @rwstat and return it as the return value. 494 * Read the current snapshot of @rwstat and return it as the return value.
495 * This function can be called without synchronization and takes care of 495 * This function can be called without synchronization and takes care of
496 * u64 atomicity. 496 * u64 atomicity.
497 */ 497 */
498 static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) 498 static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
499 { 499 {
500 unsigned int start; 500 unsigned int start;
501 struct blkg_rwstat tmp; 501 struct blkg_rwstat tmp;
502 502
503 do { 503 do {
504 start = u64_stats_fetch_begin(&rwstat->syncp); 504 start = u64_stats_fetch_begin(&rwstat->syncp);
505 tmp = *rwstat; 505 tmp = *rwstat;
506 } while (u64_stats_fetch_retry(&rwstat->syncp, start)); 506 } while (u64_stats_fetch_retry(&rwstat->syncp, start));
507 507
508 return tmp; 508 return tmp;
509 } 509 }
510 510
511 /** 511 /**
512 * blkg_rwstat_total - read the total count of a blkg_rwstat 512 * blkg_rwstat_total - read the total count of a blkg_rwstat
513 * @rwstat: blkg_rwstat to read 513 * @rwstat: blkg_rwstat to read
514 * 514 *
515 * Return the total count of @rwstat regardless of the IO direction. This 515 * Return the total count of @rwstat regardless of the IO direction. This
516 * function can be called without synchronization and takes care of u64 516 * function can be called without synchronization and takes care of u64
517 * atomicity. 517 * atomicity.
518 */ 518 */
519 static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) 519 static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
520 { 520 {
521 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); 521 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
522 522
523 return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; 523 return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
524 } 524 }
525 525
526 /** 526 /**
527 * blkg_rwstat_reset - reset a blkg_rwstat 527 * blkg_rwstat_reset - reset a blkg_rwstat
528 * @rwstat: blkg_rwstat to reset 528 * @rwstat: blkg_rwstat to reset
529 */ 529 */
530 static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) 530 static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
531 { 531 {
532 memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); 532 memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
533 } 533 }
534 534
535 /** 535 /**
536 * blkg_rwstat_merge - merge a blkg_rwstat into another 536 * blkg_rwstat_merge - merge a blkg_rwstat into another
537 * @to: the destination blkg_rwstat 537 * @to: the destination blkg_rwstat
538 * @from: the source 538 * @from: the source
539 * 539 *
540 * Add @from's counts to @to. 540 * Add @from's counts to @to.
541 */ 541 */
542 static inline void blkg_rwstat_merge(struct blkg_rwstat *to, 542 static inline void blkg_rwstat_merge(struct blkg_rwstat *to,
543 struct blkg_rwstat *from) 543 struct blkg_rwstat *from)
544 { 544 {
545 struct blkg_rwstat v = blkg_rwstat_read(from); 545 struct blkg_rwstat v = blkg_rwstat_read(from);
546 int i; 546 int i;
547 547
548 u64_stats_update_begin(&to->syncp); 548 u64_stats_update_begin(&to->syncp);
549 for (i = 0; i < BLKG_RWSTAT_NR; i++) 549 for (i = 0; i < BLKG_RWSTAT_NR; i++)
550 to->cnt[i] += v.cnt[i]; 550 to->cnt[i] += v.cnt[i];
551 u64_stats_update_end(&to->syncp); 551 u64_stats_update_end(&to->syncp);
552 } 552 }
553 553
554 #else /* CONFIG_BLK_CGROUP */ 554 #else /* CONFIG_BLK_CGROUP */
555 555
556 struct cgroup; 556 struct cgroup;
557 struct blkcg; 557 struct blkcg;
558 558
559 struct blkg_policy_data { 559 struct blkg_policy_data {
560 }; 560 };
561 561
562 struct blkcg_gq { 562 struct blkcg_gq {
563 }; 563 };
564 564
565 struct blkcg_policy { 565 struct blkcg_policy {
566 }; 566 };
567 567
568 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } 568 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
569 static inline int blkcg_init_queue(struct request_queue *q) { return 0; } 569 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
570 static inline void blkcg_drain_queue(struct request_queue *q) { } 570 static inline void blkcg_drain_queue(struct request_queue *q) { }
571 static inline void blkcg_exit_queue(struct request_queue *q) { } 571 static inline void blkcg_exit_queue(struct request_queue *q) { }
572 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } 572 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
573 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } 573 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
574 static inline int blkcg_activate_policy(struct request_queue *q, 574 static inline int blkcg_activate_policy(struct request_queue *q,
575 const struct blkcg_policy *pol) { return 0; } 575 const struct blkcg_policy *pol) { return 0; }
576 static inline void blkcg_deactivate_policy(struct request_queue *q, 576 static inline void blkcg_deactivate_policy(struct request_queue *q,
577 const struct blkcg_policy *pol) { } 577 const struct blkcg_policy *pol) { }
578 578
579 static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } 579 static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
580 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } 580 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
581 581
582 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 582 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
583 struct blkcg_policy *pol) { return NULL; } 583 struct blkcg_policy *pol) { return NULL; }
584 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } 584 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
585 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } 585 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
586 static inline void blkg_get(struct blkcg_gq *blkg) { } 586 static inline void blkg_get(struct blkcg_gq *blkg) { }
587 static inline void blkg_put(struct blkcg_gq *blkg) { } 587 static inline void blkg_put(struct blkcg_gq *blkg) { }
588 588
589 static inline struct request_list *blk_get_rl(struct request_queue *q, 589 static inline struct request_list *blk_get_rl(struct request_queue *q,
590 struct bio *bio) { return &q->root_rl; } 590 struct bio *bio) { return &q->root_rl; }
591 static inline void blk_put_rl(struct request_list *rl) { } 591 static inline void blk_put_rl(struct request_list *rl) { }
592 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } 592 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
593 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } 593 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
594 594
595 #define blk_queue_for_each_rl(rl, q) \ 595 #define blk_queue_for_each_rl(rl, q) \
596 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) 596 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
597 597
598 #endif /* CONFIG_BLK_CGROUP */ 598 #endif /* CONFIG_BLK_CGROUP */
599 #endif /* _BLK_CGROUP_H */ 599 #endif /* _BLK_CGROUP_H */
600 600