Blame view
block/blk-cgroup.c
29.6 KB
31e4c28d9
|
1 2 3 4 5 6 7 8 9 10 11 12 13 |
/* * Common Block IO controller cgroup interface * * Based on ideas and code from CFQ, CFS and BFQ: * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> * * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> * Paolo Valente <paolo.valente@unimore.it> * * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> * Nauman Rafique <nauman@google.com> */ #include <linux/ioprio.h> |
220841906
|
14 |
#include <linux/kdev_t.h> |
9d6a986c0
|
15 |
#include <linux/module.h> |
accee7854
|
16 |
#include <linux/err.h> |
9195291e5
|
17 |
#include <linux/blkdev.h> |
5a0e3ad6a
|
18 |
#include <linux/slab.h> |
34d0f179d
|
19 |
#include <linux/genhd.h> |
72e06c255
|
20 |
#include <linux/delay.h> |
9a9e8a26d
|
21 |
#include <linux/atomic.h> |
72e06c255
|
22 |
#include "blk-cgroup.h" |
5efd61135
|
23 |
#include "blk.h" |
3e2520668
|
24 |
|
84c124da9
|
25 |
#define MAX_KEY_LEN 100 |
bc0d6501a
|
26 |
static DEFINE_MUTEX(blkcg_pol_mutex); |
923adde1b
|
27 |
|
e71357e11
|
28 29 |
struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; |
3c798398e
|
30 |
EXPORT_SYMBOL_GPL(blkcg_root); |
9d6a986c0
|
31 |
|
3c798398e
|
32 |
static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
035d10b2f
|
33 |
|
a2b1693ba
|
34 |
static bool blkcg_policy_enabled(struct request_queue *q, |
3c798398e
|
35 |
const struct blkcg_policy *pol) |
a2b1693ba
|
36 37 38 |
{ return pol && test_bit(pol->plid, q->blkcg_pols); } |
0381411e4
|
39 40 41 42 43 44 |
/** * blkg_free - free a blkg * @blkg: blkg to free * * Free @blkg which may be partially allocated. */ |
3c798398e
|
45 |
static void blkg_free(struct blkcg_gq *blkg) |
0381411e4
|
46 |
{ |
e8989fae3
|
47 |
int i; |
549d3aa87
|
48 49 50 |
if (!blkg) return; |
db6136703
|
51 52 |
for (i = 0; i < BLKCG_MAX_POLS; i++) kfree(blkg->pd[i]); |
e8989fae3
|
53 |
|
a051661ca
|
54 |
blk_exit_rl(&blkg->rl); |
549d3aa87
|
55 |
kfree(blkg); |
0381411e4
|
56 57 58 59 60 61 |
} /** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with * @q: request_queue the new blkg is associated with |
159749937
|
62 |
* @gfp_mask: allocation mask to use |
0381411e4
|
63 |
* |
e8989fae3
|
64 |
* Allocate a new blkg assocating @blkcg and @q. |
0381411e4
|
65 |
*/ |
159749937
|
66 67 |
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, gfp_t gfp_mask) |
0381411e4
|
68 |
{ |
3c798398e
|
69 |
struct blkcg_gq *blkg; |
e8989fae3
|
70 |
int i; |
0381411e4
|
71 72 |
/* alloc and init base part */ |
159749937
|
73 |
blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); |
0381411e4
|
74 75 |
if (!blkg) return NULL; |
c875f4d02
|
76 |
blkg->q = q; |
e8989fae3
|
77 |
INIT_LIST_HEAD(&blkg->q_node); |
0381411e4
|
78 |
blkg->blkcg = blkcg; |
a5049a8ae
|
79 |
atomic_set(&blkg->refcnt, 1); |
0381411e4
|
80 |
|
a051661ca
|
81 82 83 84 85 86 |
/* root blkg uses @q->root_rl, init rl only for !root blkgs */ if (blkcg != &blkcg_root) { if (blk_init_rl(&blkg->rl, q, gfp_mask)) goto err_free; blkg->rl.blkg = blkg; } |
8bd435b30
|
87 |
for (i = 0; i < BLKCG_MAX_POLS; i++) { |
3c798398e
|
88 |
struct blkcg_policy *pol = blkcg_policy[i]; |
e8989fae3
|
89 |
struct blkg_policy_data *pd; |
0381411e4
|
90 |
|
a2b1693ba
|
91 |
if (!blkcg_policy_enabled(q, pol)) |
e8989fae3
|
92 93 94 |
continue; /* alloc per-policy data and attach it to blkg */ |
159749937
|
95 |
pd = kzalloc_node(pol->pd_size, gfp_mask, q->node); |
a051661ca
|
96 97 |
if (!pd) goto err_free; |
549d3aa87
|
98 |
|
e8989fae3
|
99 100 |
blkg->pd[i] = pd; pd->blkg = blkg; |
b276a876a
|
101 |
pd->plid = i; |
e8989fae3
|
102 |
} |
0381411e4
|
103 |
return blkg; |
a051661ca
|
104 105 106 107 |
err_free: blkg_free(blkg); return NULL; |
0381411e4
|
108 |
} |
16b3de665
|
109 110 111 112 113 114 115 116 117 118 119 |
/** * __blkg_lookup - internal version of blkg_lookup() * @blkcg: blkcg of interest * @q: request_queue of interest * @update_hint: whether to update lookup hint with the result or not * * This is internal version and shouldn't be used by policy * implementations. Looks up blkgs for the @blkcg - @q pair regardless of * @q's bypass state. If @update_hint is %true, the caller should be * holding @q->queue_lock and lookup hint is updated on success. */ |
dd4a4ffc0
|
120 121 |
struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, bool update_hint) |
80fd99792
|
122 |
{ |
3c798398e
|
123 |
struct blkcg_gq *blkg; |
80fd99792
|
124 |
|
a637120e4
|
125 126 127 128 129 |
blkg = rcu_dereference(blkcg->blkg_hint); if (blkg && blkg->q == q) return blkg; /* |
86cde6b62
|
130 131 132 133 |
* Hint didn't match. Look up from the radix tree. Note that the * hint can only be updated under queue_lock as otherwise @blkg * could have already been removed from blkg_tree. The caller is * responsible for grabbing queue_lock if @update_hint. |
a637120e4
|
134 135 |
*/ blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); |
86cde6b62
|
136 137 138 139 140 |
if (blkg && blkg->q == q) { if (update_hint) { lockdep_assert_held(q->queue_lock); rcu_assign_pointer(blkcg->blkg_hint, blkg); } |
a637120e4
|
141 |
return blkg; |
86cde6b62
|
142 |
} |
a637120e4
|
143 |
|
80fd99792
|
144 145 146 147 148 149 150 151 152 153 154 155 |
return NULL; } /** * blkg_lookup - lookup blkg for the specified blkcg - q pair * @blkcg: blkcg of interest * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. This function should be called * under RCU read lock and is guaranteed to return %NULL if @q is bypassing * - see blk_queue_bypass_start() for details. */ |
3c798398e
|
156 |
struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) |
80fd99792
|
157 158 159 160 161 |
{ WARN_ON_ONCE(!rcu_read_lock_held()); if (unlikely(blk_queue_bypass(q))) return NULL; |
86cde6b62
|
162 |
return __blkg_lookup(blkcg, q, false); |
80fd99792
|
163 164 |
} EXPORT_SYMBOL_GPL(blkg_lookup); |
159749937
|
165 166 167 168 |
/* * If @new_blkg is %NULL, this function tries to allocate a new one as * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. */ |
86cde6b62
|
169 170 171 |
static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct request_queue *q, struct blkcg_gq *new_blkg) |
5624a4e44
|
172 |
{ |
3c798398e
|
173 |
struct blkcg_gq *blkg; |
f427d9096
|
174 |
int i, ret; |
5624a4e44
|
175 |
|
cd1604fab
|
176 177 |
WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(q->queue_lock); |
7ee9c5620
|
178 |
/* blkg holds a reference to blkcg */ |
ec903c0c8
|
179 |
if (!css_tryget_online(&blkcg->css)) { |
93e6d5d8f
|
180 181 |
ret = -EINVAL; goto err_free_blkg; |
159749937
|
182 |
} |
cd1604fab
|
183 |
|
496fb7806
|
184 |
/* allocate */ |
159749937
|
185 186 187 |
if (!new_blkg) { new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); if (unlikely(!new_blkg)) { |
93e6d5d8f
|
188 189 |
ret = -ENOMEM; goto err_put_css; |
159749937
|
190 191 192 |
} } blkg = new_blkg; |
cd1604fab
|
193 |
|
db6136703
|
194 |
/* link parent */ |
3c5478659
|
195 196 197 |
if (blkcg_parent(blkcg)) { blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); if (WARN_ON_ONCE(!blkg->parent)) { |
2423c9c3f
|
198 |
ret = -EINVAL; |
3c5478659
|
199 200 201 202 |
goto err_put_css; } blkg_get(blkg->parent); } |
db6136703
|
203 204 205 206 207 208 209 210 211 |
/* invoke per-policy init */ for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; if (blkg->pd[i] && pol->pd_init_fn) pol->pd_init_fn(blkg); } /* insert */ |
cd1604fab
|
212 |
spin_lock(&blkcg->lock); |
a637120e4
|
213 214 215 216 |
ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); if (likely(!ret)) { hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); list_add(&blkg->q_node, &q->blkg_list); |
f427d9096
|
217 218 219 220 221 222 223 |
for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; if (blkg->pd[i] && pol->pd_online_fn) pol->pd_online_fn(blkg); } |
a637120e4
|
224 |
} |
f427d9096
|
225 |
blkg->online = true; |
cd1604fab
|
226 |
spin_unlock(&blkcg->lock); |
496fb7806
|
227 |
|
577cee1e8
|
228 229 230 231 232 |
if (!ret) { if (blkcg == &blkcg_root) { q->root_blkg = blkg; q->root_rl.blkg = blkg; } |
a637120e4
|
233 |
return blkg; |
577cee1e8
|
234 |
} |
159749937
|
235 |
|
3c5478659
|
236 237 238 |
/* @blkg failed fully initialized, use the usual release path */ blkg_put(blkg); return ERR_PTR(ret); |
93e6d5d8f
|
239 |
err_put_css: |
496fb7806
|
240 |
css_put(&blkcg->css); |
93e6d5d8f
|
241 |
err_free_blkg: |
159749937
|
242 |
blkg_free(new_blkg); |
93e6d5d8f
|
243 |
return ERR_PTR(ret); |
31e4c28d9
|
244 |
} |
3c96cb32d
|
245 |
|
86cde6b62
|
246 247 248 249 250 251 |
/** * blkg_lookup_create - lookup blkg, try to create one if not there * @blkcg: blkcg of interest * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to |
3c5478659
|
252 253 254 |
* create one. blkg creation is performed recursively from blkcg_root such * that all non-root blkg's have access to the parent blkg. This function * should be called under RCU read lock and @q->queue_lock. |
86cde6b62
|
255 256 257 258 259 |
* * Returns pointer to the looked up or created blkg on success, ERR_PTR() * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not * dead and bypassing, returns ERR_PTR(-EBUSY). */ |
3c798398e
|
260 261 |
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q) |
3c96cb32d
|
262 |
{ |
86cde6b62
|
263 264 265 266 |
struct blkcg_gq *blkg; WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(q->queue_lock); |
3c96cb32d
|
267 268 269 270 271 |
/* * This could be the first entry point of blkcg implementation and * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) |
3f3299d5c
|
272 |
return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); |
86cde6b62
|
273 274 275 276 |
blkg = __blkg_lookup(blkcg, q, true); if (blkg) return blkg; |
3c5478659
|
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 |
/* * Create blkgs walking down from blkcg_root to @blkcg, so that all * non-root blkgs have access to their parents. */ while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); while (parent && !__blkg_lookup(parent, q, false)) { pos = parent; parent = blkcg_parent(parent); } blkg = blkg_create(pos, q, NULL); if (pos == blkcg || IS_ERR(blkg)) return blkg; } |
3c96cb32d
|
294 |
} |
cd1604fab
|
295 |
EXPORT_SYMBOL_GPL(blkg_lookup_create); |
31e4c28d9
|
296 |
|
3c798398e
|
297 |
static void blkg_destroy(struct blkcg_gq *blkg) |
03aa264ac
|
298 |
{ |
3c798398e
|
299 |
struct blkcg *blkcg = blkg->blkcg; |
f427d9096
|
300 |
int i; |
03aa264ac
|
301 |
|
27e1f9d1c
|
302 |
lockdep_assert_held(blkg->q->queue_lock); |
9f13ef678
|
303 |
lockdep_assert_held(&blkcg->lock); |
03aa264ac
|
304 305 |
/* Something wrong if we are trying to remove same group twice */ |
e8989fae3
|
306 |
WARN_ON_ONCE(list_empty(&blkg->q_node)); |
9f13ef678
|
307 |
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); |
a637120e4
|
308 |
|
f427d9096
|
309 310 311 312 313 314 315 |
for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; if (blkg->pd[i] && pol->pd_offline_fn) pol->pd_offline_fn(blkg); } blkg->online = false; |
a637120e4
|
316 |
radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); |
e8989fae3
|
317 |
list_del_init(&blkg->q_node); |
9f13ef678
|
318 |
hlist_del_init_rcu(&blkg->blkcg_node); |
03aa264ac
|
319 |
|
03aa264ac
|
320 |
/* |
a637120e4
|
321 322 323 324 |
* Both setting lookup hint to and clearing it from @blkg are done * under queue_lock. If it's not pointing to @blkg now, it never * will. Hint assignment itself can race safely. */ |
ec6c676a0
|
325 |
if (rcu_access_pointer(blkcg->blkg_hint) == blkg) |
a637120e4
|
326 327 328 |
rcu_assign_pointer(blkcg->blkg_hint, NULL); /* |
577cee1e8
|
329 330 331 332 333 334 335 336 337 |
* If root blkg is destroyed. Just clear the pointer since root_rl * does not take reference on root blkg. */ if (blkcg == &blkcg_root) { blkg->q->root_blkg = NULL; blkg->q->root_rl.blkg = NULL; } /* |
03aa264ac
|
338 339 340 341 342 |
* Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ blkg_put(blkg); } |
9f13ef678
|
343 344 345 |
/** * blkg_destroy_all - destroy all blkgs associated with a request_queue * @q: request_queue of interest |
9f13ef678
|
346 |
* |
3c96cb32d
|
347 |
* Destroy all blkgs associated with @q. |
9f13ef678
|
348 |
*/ |
3c96cb32d
|
349 |
static void blkg_destroy_all(struct request_queue *q) |
72e06c255
|
350 |
{ |
3c798398e
|
351 |
struct blkcg_gq *blkg, *n; |
72e06c255
|
352 |
|
6d18b008d
|
353 |
lockdep_assert_held(q->queue_lock); |
72e06c255
|
354 |
|
9f13ef678
|
355 |
list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { |
3c798398e
|
356 |
struct blkcg *blkcg = blkg->blkcg; |
72e06c255
|
357 |
|
9f13ef678
|
358 359 360 |
spin_lock(&blkcg->lock); blkg_destroy(blkg); spin_unlock(&blkcg->lock); |
72e06c255
|
361 362 |
} } |
2a4fd070e
|
363 364 365 366 367 368 369 370 371 |
/* * A group is RCU protected, but having an rcu lock does not mean that one * can access all the fields of blkg and assume these are valid. For * example, don't try to follow throtl_data and request queue links. * * Having a reference to blkg under an rcu allows accesses to only values * local to groups like group stats and group rate limits. */ void __blkg_release_rcu(struct rcu_head *rcu_head) |
1adaf3dde
|
372 |
{ |
2a4fd070e
|
373 |
struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); |
db6136703
|
374 375 376 377 378 379 380 381 382 |
int i; /* tell policies that this one is being freed */ for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; if (blkg->pd[i] && pol->pd_exit_fn) pol->pd_exit_fn(blkg); } |
3c5478659
|
383 |
/* release the blkcg and parent blkg refs this blkg has been holding */ |
1adaf3dde
|
384 |
css_put(&blkg->blkcg->css); |
a5049a8ae
|
385 |
if (blkg->parent) |
3c5478659
|
386 |
blkg_put(blkg->parent); |
1adaf3dde
|
387 |
|
2a4fd070e
|
388 |
blkg_free(blkg); |
1adaf3dde
|
389 |
} |
2a4fd070e
|
390 |
EXPORT_SYMBOL_GPL(__blkg_release_rcu); |
1adaf3dde
|
391 |
|
a051661ca
|
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 |
/* * The next function used by blk_queue_for_each_rl(). It's a bit tricky * because the root blkg uses @q->root_rl instead of its own rl. */ struct request_list *__blk_queue_next_rl(struct request_list *rl, struct request_queue *q) { struct list_head *ent; struct blkcg_gq *blkg; /* * Determine the current blkg list_head. The first entry is * root_rl which is off @q->blkg_list and mapped to the head. */ if (rl == &q->root_rl) { ent = &q->blkg_list; |
65c77fd9e
|
408 409 410 |
/* There are no more block groups, hence no request lists */ if (list_empty(ent)) return NULL; |
a051661ca
|
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 |
} else { blkg = container_of(rl, struct blkcg_gq, rl); ent = &blkg->q_node; } /* walk to the next list_head, skip root blkcg */ ent = ent->next; if (ent == &q->root_blkg->q_node) ent = ent->next; if (ent == &q->blkg_list) return NULL; blkg = container_of(ent, struct blkcg_gq, q_node); return &blkg->rl; } |
182446d08
|
426 427 |
static int blkcg_reset_stats(struct cgroup_subsys_state *css, struct cftype *cftype, u64 val) |
303a3acb2
|
428 |
{ |
182446d08
|
429 |
struct blkcg *blkcg = css_to_blkcg(css); |
3c798398e
|
430 |
struct blkcg_gq *blkg; |
bc0d6501a
|
431 |
int i; |
303a3acb2
|
432 |
|
36c38fb71
|
433 434 435 436 437 438 439 440 441 442 443 444 445 446 |
/* * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex * which ends up putting cgroup's internal cgroup_tree_mutex under * it; however, cgroup_tree_mutex is nested above cgroup file * active protection and grabbing blkcg_pol_mutex from a cgroup * file operation creates a possible circular dependency. cgroup * internal locking is planned to go through further simplification * and this issue should go away soon. For now, let's trylock * blkcg_pol_mutex and restart the write on failure. * * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com */ if (!mutex_trylock(&blkcg_pol_mutex)) return restart_syscall(); |
303a3acb2
|
447 |
spin_lock_irq(&blkcg->lock); |
997a026c8
|
448 449 450 451 452 453 |
/* * Note that stat reset is racy - it doesn't synchronize against * stat updates. This is a debug feature which shouldn't exist * anyway. If you get hit by a race, retry. */ |
b67bfe0d4
|
454 |
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { |
8bd435b30
|
455 |
for (i = 0; i < BLKCG_MAX_POLS; i++) { |
3c798398e
|
456 |
struct blkcg_policy *pol = blkcg_policy[i]; |
549d3aa87
|
457 |
|
a2b1693ba
|
458 |
if (blkcg_policy_enabled(blkg->q, pol) && |
f9fcc2d39
|
459 460 |
pol->pd_reset_stats_fn) pol->pd_reset_stats_fn(blkg); |
bc0d6501a
|
461 |
} |
303a3acb2
|
462 |
} |
f0bdc8cdd
|
463 |
|
303a3acb2
|
464 |
spin_unlock_irq(&blkcg->lock); |
bc0d6501a
|
465 |
mutex_unlock(&blkcg_pol_mutex); |
303a3acb2
|
466 467 |
return 0; } |
3c798398e
|
468 |
static const char *blkg_dev_name(struct blkcg_gq *blkg) |
303a3acb2
|
469 |
{ |
d3d32e69f
|
470 471 472 473 |
/* some drivers (floppy) instantiate a queue w/o disk registered */ if (blkg->q->backing_dev_info.dev) return dev_name(blkg->q->backing_dev_info.dev); return NULL; |
303a3acb2
|
474 |
} |
d3d32e69f
|
475 476 477 478 479 480 481 482 483 484 485 |
/** * blkcg_print_blkgs - helper for printing per-blkg data * @sf: seq_file to print to * @blkcg: blkcg of interest * @prfill: fill function to print out a blkg * @pol: policy in question * @data: data to be passed to @prfill * @show_total: to print out sum of prfill return values or not * * This function invokes @prfill on each blkg of @blkcg if pd for the * policy specified by @pol exists. @prfill is invoked with @sf, the |
810ecfa76
|
486 487 488 |
* policy data and @data and the matching queue lock held. If @show_total * is %true, the sum of the return values from @prfill is printed with * "Total" label at the end. |
d3d32e69f
|
489 490 491 492 |
* * This is to be used to construct print functions for * cftype->read_seq_string method. */ |
3c798398e
|
493 |
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, |
f95a04afa
|
494 495 |
u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int), |
3c798398e
|
496 |
const struct blkcg_policy *pol, int data, |
ec399347d
|
497 |
bool show_total) |
5624a4e44
|
498 |
{ |
3c798398e
|
499 |
struct blkcg_gq *blkg; |
d3d32e69f
|
500 |
u64 total = 0; |
5624a4e44
|
501 |
|
810ecfa76
|
502 |
rcu_read_lock(); |
ee89f8125
|
503 |
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { |
810ecfa76
|
504 |
spin_lock_irq(blkg->q->queue_lock); |
a2b1693ba
|
505 |
if (blkcg_policy_enabled(blkg->q, pol)) |
f95a04afa
|
506 |
total += prfill(sf, blkg->pd[pol->plid], data); |
810ecfa76
|
507 508 509 |
spin_unlock_irq(blkg->q->queue_lock); } rcu_read_unlock(); |
d3d32e69f
|
510 511 512 513 514 |
if (show_total) seq_printf(sf, "Total %llu ", (unsigned long long)total); } |
829fdb500
|
515 |
EXPORT_SYMBOL_GPL(blkcg_print_blkgs); |
d3d32e69f
|
516 517 518 519 |
/** * __blkg_prfill_u64 - prfill helper for a single u64 value * @sf: seq_file to print to |
f95a04afa
|
520 |
* @pd: policy private data of interest |
d3d32e69f
|
521 522 |
* @v: value to print * |
f95a04afa
|
523 |
* Print @v to @sf for the device assocaited with @pd. |
d3d32e69f
|
524 |
*/ |
f95a04afa
|
525 |
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v) |
d3d32e69f
|
526 |
{ |
f95a04afa
|
527 |
const char *dname = blkg_dev_name(pd->blkg); |
d3d32e69f
|
528 529 530 531 532 533 534 535 |
if (!dname) return 0; seq_printf(sf, "%s %llu ", dname, (unsigned long long)v); return v; } |
829fdb500
|
536 |
EXPORT_SYMBOL_GPL(__blkg_prfill_u64); |
d3d32e69f
|
537 538 539 540 |
/** * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat * @sf: seq_file to print to |
f95a04afa
|
541 |
* @pd: policy private data of interest |
d3d32e69f
|
542 543 |
* @rwstat: rwstat to print * |
f95a04afa
|
544 |
* Print @rwstat to @sf for the device assocaited with @pd. |
d3d32e69f
|
545 |
*/ |
f95a04afa
|
546 |
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
829fdb500
|
547 |
const struct blkg_rwstat *rwstat) |
d3d32e69f
|
548 549 550 551 552 553 554 |
{ static const char *rwstr[] = { [BLKG_RWSTAT_READ] = "Read", [BLKG_RWSTAT_WRITE] = "Write", [BLKG_RWSTAT_SYNC] = "Sync", [BLKG_RWSTAT_ASYNC] = "Async", }; |
f95a04afa
|
555 |
const char *dname = blkg_dev_name(pd->blkg); |
d3d32e69f
|
556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 |
u64 v; int i; if (!dname) return 0; for (i = 0; i < BLKG_RWSTAT_NR; i++) seq_printf(sf, "%s %s %llu ", dname, rwstr[i], (unsigned long long)rwstat->cnt[i]); v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE]; seq_printf(sf, "%s Total %llu ", dname, (unsigned long long)v); return v; } |
b50da39f5
|
572 |
EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); |
d3d32e69f
|
573 |
|
5bc4afb1e
|
574 575 576 |
/** * blkg_prfill_stat - prfill callback for blkg_stat * @sf: seq_file to print to |
f95a04afa
|
577 578 |
* @pd: policy private data of interest * @off: offset to the blkg_stat in @pd |
5bc4afb1e
|
579 580 581 |
* * prfill callback for printing a blkg_stat. */ |
f95a04afa
|
582 |
u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) |
d3d32e69f
|
583 |
{ |
f95a04afa
|
584 |
return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off)); |
d3d32e69f
|
585 |
} |
5bc4afb1e
|
586 |
EXPORT_SYMBOL_GPL(blkg_prfill_stat); |
d3d32e69f
|
587 |
|
5bc4afb1e
|
588 589 590 |
/** * blkg_prfill_rwstat - prfill callback for blkg_rwstat * @sf: seq_file to print to |
f95a04afa
|
591 592 |
* @pd: policy private data of interest * @off: offset to the blkg_rwstat in @pd |
5bc4afb1e
|
593 594 595 |
* * prfill callback for printing a blkg_rwstat. */ |
f95a04afa
|
596 597 |
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off) |
d3d32e69f
|
598 |
{ |
f95a04afa
|
599 |
struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off); |
d3d32e69f
|
600 |
|
f95a04afa
|
601 |
return __blkg_prfill_rwstat(sf, pd, &rwstat); |
d3d32e69f
|
602 |
} |
5bc4afb1e
|
603 |
EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); |
d3d32e69f
|
604 |
|
3a8b31d39
|
605 |
/** |
16b3de665
|
606 607 608 609 610 611 612 613 614 615 616 617 |
* blkg_stat_recursive_sum - collect hierarchical blkg_stat * @pd: policy private data of interest * @off: offset to the blkg_stat in @pd * * Collect the blkg_stat specified by @off from @pd and all its online * descendants and return the sum. The caller must be holding the queue * lock for online tests. */ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) { struct blkcg_policy *pol = blkcg_policy[pd->plid]; struct blkcg_gq *pos_blkg; |
492eb21b9
|
618 |
struct cgroup_subsys_state *pos_css; |
bd8815a6d
|
619 |
u64 sum = 0; |
16b3de665
|
620 621 |
lockdep_assert_held(pd->blkg->q->queue_lock); |
16b3de665
|
622 |
rcu_read_lock(); |
492eb21b9
|
623 |
blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) { |
16b3de665
|
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 |
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); struct blkg_stat *stat = (void *)pos_pd + off; if (pos_blkg->online) sum += blkg_stat_read(stat); } rcu_read_unlock(); return sum; } EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); /** * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat * @pd: policy private data of interest * @off: offset to the blkg_stat in @pd * * Collect the blkg_rwstat specified by @off from @pd and all its online * descendants and return the sum. The caller must be holding the queue * lock for online tests. */ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, int off) { struct blkcg_policy *pol = blkcg_policy[pd->plid]; struct blkcg_gq *pos_blkg; |
492eb21b9
|
650 |
struct cgroup_subsys_state *pos_css; |
bd8815a6d
|
651 |
struct blkg_rwstat sum = { }; |
16b3de665
|
652 653 654 |
int i; lockdep_assert_held(pd->blkg->q->queue_lock); |
16b3de665
|
655 |
rcu_read_lock(); |
492eb21b9
|
656 |
blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) { |
16b3de665
|
657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 |
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); struct blkg_rwstat *rwstat = (void *)pos_pd + off; struct blkg_rwstat tmp; if (!pos_blkg->online) continue; tmp = blkg_rwstat_read(rwstat); for (i = 0; i < BLKG_RWSTAT_NR; i++) sum.cnt[i] += tmp.cnt[i]; } rcu_read_unlock(); return sum; } EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); /** |
3a8b31d39
|
676 677 |
* blkg_conf_prep - parse and prepare for per-blkg config update * @blkcg: target block cgroup |
da8b06626
|
678 |
* @pol: target policy |
3a8b31d39
|
679 680 681 682 683 |
* @input: input string * @ctx: blkg_conf_ctx to be filled * * Parse per-blkg config update from @input and initialize @ctx with the * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new |
da8b06626
|
684 685 |
* value. This function returns with RCU read lock and queue lock held and * must be paired with blkg_conf_finish(). |
3a8b31d39
|
686 |
*/ |
3c798398e
|
687 688 |
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, const char *input, struct blkg_conf_ctx *ctx) |
da8b06626
|
689 |
__acquires(rcu) __acquires(disk->queue->queue_lock) |
34d0f179d
|
690 |
{ |
3a8b31d39
|
691 |
struct gendisk *disk; |
3c798398e
|
692 |
struct blkcg_gq *blkg; |
726fa6945
|
693 694 695 |
unsigned int major, minor; unsigned long long v; int part, ret; |
34d0f179d
|
696 |
|
726fa6945
|
697 698 |
if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3) return -EINVAL; |
3a8b31d39
|
699 |
|
726fa6945
|
700 |
disk = get_gendisk(MKDEV(major, minor), &part); |
1fbb15f22
|
701 |
if (!disk) |
726fa6945
|
702 |
return -EINVAL; |
1fbb15f22
|
703 704 705 706 |
if (part) { put_disk(disk); return -EINVAL; } |
e56da7e28
|
707 708 |
rcu_read_lock(); |
4bfd482e7
|
709 |
spin_lock_irq(disk->queue->queue_lock); |
da8b06626
|
710 |
|
a2b1693ba
|
711 |
if (blkcg_policy_enabled(disk->queue, pol)) |
3c96cb32d
|
712 |
blkg = blkg_lookup_create(blkcg, disk->queue); |
a2b1693ba
|
713 714 |
else blkg = ERR_PTR(-EINVAL); |
e56da7e28
|
715 |
|
4bfd482e7
|
716 717 |
if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); |
3a8b31d39
|
718 |
rcu_read_unlock(); |
da8b06626
|
719 |
spin_unlock_irq(disk->queue->queue_lock); |
3a8b31d39
|
720 721 722 723 724 725 726 727 728 729 |
put_disk(disk); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue * can be bypassing for some time and it's always nice to * avoid busy looping. */ if (ret == -EBUSY) { msleep(10); ret = restart_syscall(); |
7702e8f45
|
730 |
} |
726fa6945
|
731 |
return ret; |
062a644d6
|
732 |
} |
3a8b31d39
|
733 734 735 |
ctx->disk = disk; ctx->blkg = blkg; |
726fa6945
|
736 737 |
ctx->v = v; return 0; |
34d0f179d
|
738 |
} |
829fdb500
|
739 |
EXPORT_SYMBOL_GPL(blkg_conf_prep); |
34d0f179d
|
740 |
|
3a8b31d39
|
741 742 743 744 745 746 747 |
/** * blkg_conf_finish - finish up per-blkg config update * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep() * * Finish up after per-blkg config update. This function must be paired * with blkg_conf_prep(). */ |
829fdb500
|
748 |
void blkg_conf_finish(struct blkg_conf_ctx *ctx) |
da8b06626
|
749 |
__releases(ctx->disk->queue->queue_lock) __releases(rcu) |
34d0f179d
|
750 |
{ |
da8b06626
|
751 |
spin_unlock_irq(ctx->disk->queue->queue_lock); |
3a8b31d39
|
752 753 |
rcu_read_unlock(); put_disk(ctx->disk); |
34d0f179d
|
754 |
} |
829fdb500
|
755 |
EXPORT_SYMBOL_GPL(blkg_conf_finish); |
34d0f179d
|
756 |
|
3c798398e
|
757 |
struct cftype blkcg_files[] = { |
31e4c28d9
|
758 |
{ |
84c124da9
|
759 |
.name = "reset_stats", |
3c798398e
|
760 |
.write_u64 = blkcg_reset_stats, |
220841906
|
761 |
}, |
4baf6e332
|
762 |
{ } /* terminate */ |
31e4c28d9
|
763 |
}; |
9f13ef678
|
764 |
/** |
92fb97487
|
765 |
* blkcg_css_offline - cgroup css_offline callback |
eb95419b0
|
766 |
* @css: css of interest |
9f13ef678
|
767 |
* |
eb95419b0
|
768 769 |
* This function is called when @css is about to go away and responsible * for shooting down all blkgs associated with @css. blkgs should be |
9f13ef678
|
770 771 772 773 774 |
* removed while holding both q and blkcg locks. As blkcg lock is nested * inside q lock, this function performs reverse double lock dancing. * * This is the blkcg counterpart of ioc_release_fn(). */ |
eb95419b0
|
775 |
static void blkcg_css_offline(struct cgroup_subsys_state *css) |
31e4c28d9
|
776 |
{ |
eb95419b0
|
777 |
struct blkcg *blkcg = css_to_blkcg(css); |
b1c357696
|
778 |
|
9f13ef678
|
779 |
spin_lock_irq(&blkcg->lock); |
7ee9c5620
|
780 |
|
9f13ef678
|
781 |
while (!hlist_empty(&blkcg->blkg_list)) { |
3c798398e
|
782 783 |
struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, struct blkcg_gq, blkcg_node); |
c875f4d02
|
784 |
struct request_queue *q = blkg->q; |
b1c357696
|
785 |
|
9f13ef678
|
786 787 788 789 790 |
if (spin_trylock(q->queue_lock)) { blkg_destroy(blkg); spin_unlock(q->queue_lock); } else { spin_unlock_irq(&blkcg->lock); |
9f13ef678
|
791 |
cpu_relax(); |
a5567932f
|
792 |
spin_lock_irq(&blkcg->lock); |
0f3942a39
|
793 |
} |
9f13ef678
|
794 |
} |
b1c357696
|
795 |
|
9f13ef678
|
796 |
spin_unlock_irq(&blkcg->lock); |
7ee9c5620
|
797 |
} |
eb95419b0
|
798 |
static void blkcg_css_free(struct cgroup_subsys_state *css) |
7ee9c5620
|
799 |
{ |
eb95419b0
|
800 |
struct blkcg *blkcg = css_to_blkcg(css); |
7ee9c5620
|
801 |
|
3c798398e
|
802 |
if (blkcg != &blkcg_root) |
67523c48a
|
803 |
kfree(blkcg); |
31e4c28d9
|
804 |
} |
eb95419b0
|
805 806 |
static struct cgroup_subsys_state * blkcg_css_alloc(struct cgroup_subsys_state *parent_css) |
31e4c28d9
|
807 |
{ |
3c798398e
|
808 |
struct blkcg *blkcg; |
31e4c28d9
|
809 |
|
eb95419b0
|
810 |
if (!parent_css) { |
3c798398e
|
811 |
blkcg = &blkcg_root; |
31e4c28d9
|
812 813 |
goto done; } |
31e4c28d9
|
814 815 816 |
blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); if (!blkcg) return ERR_PTR(-ENOMEM); |
3381cb8d2
|
817 |
blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; |
e71357e11
|
818 |
blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; |
31e4c28d9
|
819 820 |
done: spin_lock_init(&blkcg->lock); |
a637120e4
|
821 |
INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); |
31e4c28d9
|
822 823 824 825 |
INIT_HLIST_HEAD(&blkcg->blkg_list); return &blkcg->css; } |
5efd61135
|
826 827 828 829 830 831 832 833 834 835 836 837 838 |
/** * blkcg_init_queue - initialize blkcg part of request queue * @q: request_queue to initialize * * Called from blk_alloc_queue_node(). Responsible for initializing blkcg * part of new request_queue @q. * * RETURNS: * 0 on success, -errno on failure. */ int blkcg_init_queue(struct request_queue *q) { might_sleep(); |
3c96cb32d
|
839 |
return blk_throtl_init(q); |
5efd61135
|
840 841 842 843 844 845 846 847 848 849 850 |
} /** * blkcg_drain_queue - drain blkcg part of request_queue * @q: request_queue to drain * * Called from blk_drain_queue(). Responsible for draining blkcg part. */ void blkcg_drain_queue(struct request_queue *q) { lockdep_assert_held(q->queue_lock); |
0b462c89e
|
851 852 853 854 855 856 |
/* * @q could be exiting and already have destroyed all blkgs as * indicated by NULL root_blkg. If so, don't confuse policies. */ if (!q->root_blkg) return; |
5efd61135
|
857 858 859 860 861 862 863 864 865 866 867 |
blk_throtl_drain(q); } /** * blkcg_exit_queue - exit and release blkcg part of request_queue * @q: request_queue being released * * Called from blk_release_queue(). Responsible for exiting blkcg part. */ void blkcg_exit_queue(struct request_queue *q) { |
6d18b008d
|
868 |
spin_lock_irq(q->queue_lock); |
3c96cb32d
|
869 |
blkg_destroy_all(q); |
6d18b008d
|
870 |
spin_unlock_irq(q->queue_lock); |
5efd61135
|
871 872 |
blk_throtl_exit(q); } |
31e4c28d9
|
873 874 875 876 877 878 |
/* * We cannot support shared io contexts, as we have no mean to support * two tasks with the same ioc in two different groups without major rework * of the main cic data structures. For now we allow a task to change * its cgroup only if it's the only owner of its ioc. */ |
eb95419b0
|
879 880 |
static int blkcg_can_attach(struct cgroup_subsys_state *css, struct cgroup_taskset *tset) |
31e4c28d9
|
881 |
{ |
bb9d97b6d
|
882 |
struct task_struct *task; |
31e4c28d9
|
883 884 885 886 |
struct io_context *ioc; int ret = 0; /* task_lock() is needed to avoid races with exit_io_context() */ |
924f0d9a2
|
887 |
cgroup_taskset_for_each(task, tset) { |
bb9d97b6d
|
888 889 890 891 892 893 894 895 |
task_lock(task); ioc = task->io_context; if (ioc && atomic_read(&ioc->nr_tasks) > 1) ret = -EINVAL; task_unlock(task); if (ret) break; } |
31e4c28d9
|
896 897 |
return ret; } |
073219e99
|
898 |
struct cgroup_subsys blkio_cgrp_subsys = { |
92fb97487
|
899 900 901 |
.css_alloc = blkcg_css_alloc, .css_offline = blkcg_css_offline, .css_free = blkcg_css_free, |
3c798398e
|
902 |
.can_attach = blkcg_can_attach, |
5577964e6
|
903 |
.legacy_cftypes = blkcg_files, |
1ced953b1
|
904 905 906 907 908 909 910 911 |
#ifdef CONFIG_MEMCG /* * This ensures that, if available, memcg is automatically enabled * together on the default hierarchy so that the owner cgroup can * be retrieved from writeback pages. */ .depends_on = 1 << memory_cgrp_id, #endif |
676f7c8f8
|
912 |
}; |
073219e99
|
913 |
EXPORT_SYMBOL_GPL(blkio_cgrp_subsys); |
676f7c8f8
|
914 |
|
8bd435b30
|
915 |
/** |
a2b1693ba
|
916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 |
* blkcg_activate_policy - activate a blkcg policy on a request_queue * @q: request_queue of interest * @pol: blkcg policy to activate * * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through * bypass mode to populate its blkgs with policy_data for @pol. * * Activation happens with @q bypassed, so nobody would be accessing blkgs * from IO path. Update of each blkg is protected by both queue and blkcg * locks so that holding either lock and testing blkcg_policy_enabled() is * always enough for dereferencing policy data. * * The caller is responsible for synchronizing [de]activations and policy * [un]registerations. Returns 0 on success, -errno on failure. */ int blkcg_activate_policy(struct request_queue *q, |
3c798398e
|
932 |
const struct blkcg_policy *pol) |
a2b1693ba
|
933 934 |
{ LIST_HEAD(pds); |
86cde6b62
|
935 |
struct blkcg_gq *blkg, *new_blkg; |
a2b1693ba
|
936 937 |
struct blkg_policy_data *pd, *n; int cnt = 0, ret; |
159749937
|
938 |
bool preloaded; |
a2b1693ba
|
939 940 941 |
if (blkcg_policy_enabled(q, pol)) return 0; |
159749937
|
942 |
/* preallocations for root blkg */ |
86cde6b62
|
943 944 |
new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); if (!new_blkg) |
159749937
|
945 |
return -ENOMEM; |
a2b1693ba
|
946 |
blk_queue_bypass_start(q); |
e5072664f
|
947 |
preloaded = !radix_tree_preload(GFP_KERNEL); |
86cde6b62
|
948 949 950 951 952 |
/* * Make sure the root blkg exists and count the existing blkgs. As * @q is bypassing at this point, blkg_lookup_create() can't be * used. Open code it. */ |
a2b1693ba
|
953 954 955 |
spin_lock_irq(q->queue_lock); rcu_read_lock(); |
86cde6b62
|
956 957 958 959 960 |
blkg = __blkg_lookup(&blkcg_root, q, false); if (blkg) blkg_free(new_blkg); else blkg = blkg_create(&blkcg_root, q, new_blkg); |
a2b1693ba
|
961 |
rcu_read_unlock(); |
159749937
|
962 963 |
if (preloaded) radix_tree_preload_end(); |
a2b1693ba
|
964 965 966 967 |
if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); goto out_unlock; } |
a2b1693ba
|
968 969 970 971 972 973 974 975 |
list_for_each_entry(blkg, &q->blkg_list, q_node) cnt++; spin_unlock_irq(q->queue_lock); /* allocate policy_data for all existing blkgs */ while (cnt--) { |
f95a04afa
|
976 |
pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); |
a2b1693ba
|
977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 |
if (!pd) { ret = -ENOMEM; goto out_free; } list_add_tail(&pd->alloc_node, &pds); } /* * Install the allocated pds. With @q bypassing, no new blkg * should have been created while the queue lock was dropped. */ spin_lock_irq(q->queue_lock); list_for_each_entry(blkg, &q->blkg_list, q_node) { if (WARN_ON(list_empty(&pds))) { /* umm... this shouldn't happen, just abort */ ret = -ENOMEM; goto out_unlock; } pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); list_del_init(&pd->alloc_node); /* grab blkcg lock too while installing @pd on @blkg */ spin_lock(&blkg->blkcg->lock); blkg->pd[pol->plid] = pd; pd->blkg = blkg; |
b276a876a
|
1004 |
pd->plid = pol->plid; |
f9fcc2d39
|
1005 |
pol->pd_init_fn(blkg); |
a2b1693ba
|
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 |
spin_unlock(&blkg->blkcg->lock); } __set_bit(pol->plid, q->blkcg_pols); ret = 0; out_unlock: spin_unlock_irq(q->queue_lock); out_free: blk_queue_bypass_end(q); list_for_each_entry_safe(pd, n, &pds, alloc_node) kfree(pd); return ret; } EXPORT_SYMBOL_GPL(blkcg_activate_policy); /** * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue * @q: request_queue of interest * @pol: blkcg policy to deactivate * * Deactivate @pol on @q. Follows the same synchronization rules as * blkcg_activate_policy(). */ void blkcg_deactivate_policy(struct request_queue *q, |
3c798398e
|
1031 |
const struct blkcg_policy *pol) |
a2b1693ba
|
1032 |
{ |
3c798398e
|
1033 |
struct blkcg_gq *blkg; |
a2b1693ba
|
1034 1035 1036 1037 1038 1039 1040 1041 |
if (!blkcg_policy_enabled(q, pol)) return; blk_queue_bypass_start(q); spin_lock_irq(q->queue_lock); __clear_bit(pol->plid, q->blkcg_pols); |
6d18b008d
|
1042 1043 1044 |
/* if no policy is left, no need for blkgs - shoot them down */ if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS)) blkg_destroy_all(q); |
a2b1693ba
|
1045 1046 1047 |
list_for_each_entry(blkg, &q->blkg_list, q_node) { /* grab blkcg lock too while removing @pd from @blkg */ spin_lock(&blkg->blkcg->lock); |
f427d9096
|
1048 1049 |
if (pol->pd_offline_fn) pol->pd_offline_fn(blkg); |
f9fcc2d39
|
1050 1051 |
if (pol->pd_exit_fn) pol->pd_exit_fn(blkg); |
a2b1693ba
|
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 |
kfree(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; spin_unlock(&blkg->blkcg->lock); } spin_unlock_irq(q->queue_lock); blk_queue_bypass_end(q); } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); /** |
3c798398e
|
1065 1066 |
* blkcg_policy_register - register a blkcg policy * @pol: blkcg policy to register |
8bd435b30
|
1067 |
* |
3c798398e
|
1068 1069 |
* Register @pol with blkcg core. Might sleep and @pol may be modified on * successful registration. Returns 0 on success and -errno on failure. |
8bd435b30
|
1070 |
*/ |
d5bf02914
|
1071 |
int blkcg_policy_register(struct blkcg_policy *pol) |
3e2520668
|
1072 |
{ |
8bd435b30
|
1073 |
int i, ret; |
e8989fae3
|
1074 |
|
f95a04afa
|
1075 1076 |
if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) return -EINVAL; |
bc0d6501a
|
1077 |
mutex_lock(&blkcg_pol_mutex); |
8bd435b30
|
1078 1079 1080 |
/* find an empty slot */ ret = -ENOSPC; for (i = 0; i < BLKCG_MAX_POLS; i++) |
3c798398e
|
1081 |
if (!blkcg_policy[i]) |
8bd435b30
|
1082 1083 1084 |
break; if (i >= BLKCG_MAX_POLS) goto out_unlock; |
035d10b2f
|
1085 |
|
8bd435b30
|
1086 |
/* register and update blkgs */ |
3c798398e
|
1087 1088 |
pol->plid = i; blkcg_policy[i] = pol; |
8bd435b30
|
1089 |
|
8bd435b30
|
1090 |
/* everything is in place, add intf files for the new policy */ |
3c798398e
|
1091 |
if (pol->cftypes) |
2cf669a58
|
1092 1093 |
WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys, pol->cftypes)); |
8bd435b30
|
1094 1095 |
ret = 0; out_unlock: |
bc0d6501a
|
1096 |
mutex_unlock(&blkcg_pol_mutex); |
8bd435b30
|
1097 |
return ret; |
3e2520668
|
1098 |
} |
3c798398e
|
1099 |
EXPORT_SYMBOL_GPL(blkcg_policy_register); |
3e2520668
|
1100 |
|
8bd435b30
|
1101 |
/** |
3c798398e
|
1102 1103 |
* blkcg_policy_unregister - unregister a blkcg policy * @pol: blkcg policy to unregister |
8bd435b30
|
1104 |
* |
3c798398e
|
1105 |
* Undo blkcg_policy_register(@pol). Might sleep. |
8bd435b30
|
1106 |
*/ |
3c798398e
|
1107 |
void blkcg_policy_unregister(struct blkcg_policy *pol) |
3e2520668
|
1108 |
{ |
bc0d6501a
|
1109 |
mutex_lock(&blkcg_pol_mutex); |
3c798398e
|
1110 |
if (WARN_ON(blkcg_policy[pol->plid] != pol)) |
8bd435b30
|
1111 1112 1113 |
goto out_unlock; /* kill the intf files first */ |
3c798398e
|
1114 |
if (pol->cftypes) |
2bb566cb6
|
1115 |
cgroup_rm_cftypes(pol->cftypes); |
44ea53de4
|
1116 |
|
8bd435b30
|
1117 |
/* unregister and update blkgs */ |
3c798398e
|
1118 |
blkcg_policy[pol->plid] = NULL; |
8bd435b30
|
1119 |
out_unlock: |
bc0d6501a
|
1120 |
mutex_unlock(&blkcg_pol_mutex); |
3e2520668
|
1121 |
} |
3c798398e
|
1122 |
EXPORT_SYMBOL_GPL(blkcg_policy_unregister); |