Commit cd1604fab4f95f7cfc227d3955fd7ae14da61f38
Committed by
Jens Axboe
1 parent
f51b802c17
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
blkcg: factor out blkio_group creation
Currently both blk-throttle and cfq-iosched implement their own blkio_group creation code in throtl_get_tg() and cfq_get_cfqg(). This patch factors out the common code into blkg_lookup_create(), which returns ERR_PTR value so that transitional failures due to queue bypass can be distinguished from other failures. * New plkio_policy_ops methods blkio_alloc_group_fn() and blkio_link_group_fn added. Both are transitional and will be removed once the blkg management code is fully moved into blk-cgroup.c. * blkio_alloc_group_fn() allocates policy-specific blkg which is usually a larger data structure with blkg as the first entry and intiailizes it. Note that initialization of blkg proper, including percpu stats, is responsibility of blk-cgroup proper. Note that default config (weight, bps...) initialization is done from this method; otherwise, we end up violating locking order between blkcg and q locks via blkcg_get_CONF() functions. * blkio_link_group_fn() is called under queue_lock and responsible for linking the blkg to the queue. blkcg side is handled by blk-cgroup proper. * The common blkg creation function is named blkg_lookup_create() and blkiocg_lookup_group() is renamed to blkg_lookup() for consistency. Also, throtl / cfq related functions are similarly [re]named for consistency. This simplifies blkcg policy implementations and enables further cleanup. -v2: Vivek noticed that blkg_lookup_create() incorrectly tested blk_queue_dead() instead of blk_queue_bypass() leading a user of the function ending up creating a new blkg on bypassing queue. This is a bug introduced while relocating bypass patches before this one. Fixed. -v3: ERR_PTR patch folded into this one. @for_root added to blkg_lookup_create() to allow creating root group on a bypassed queue during elevator switch. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Showing 5 changed files with 193 additions and 248 deletions Side-by-side Diff
block/blk-cgroup.c
... | ... | @@ -465,38 +465,93 @@ |
465 | 465 | } |
466 | 466 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); |
467 | 467 | |
468 | -/* | |
469 | - * This function allocates the per cpu stats for blkio_group. Should be called | |
470 | - * from sleepable context as alloc_per_cpu() requires that. | |
471 | - */ | |
472 | -int blkio_alloc_blkg_stats(struct blkio_group *blkg) | |
468 | +struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, | |
469 | + struct request_queue *q, | |
470 | + enum blkio_policy_id plid, | |
471 | + bool for_root) | |
472 | + __releases(q->queue_lock) __acquires(q->queue_lock) | |
473 | 473 | { |
474 | - /* Allocate memory for per cpu stats */ | |
475 | - blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu); | |
476 | - if (!blkg->stats_cpu) | |
477 | - return -ENOMEM; | |
478 | - return 0; | |
479 | -} | |
480 | -EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats); | |
474 | + struct blkio_policy_type *pol = blkio_policy[plid]; | |
475 | + struct blkio_group *blkg, *new_blkg; | |
481 | 476 | |
482 | -void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |
483 | - struct blkio_group *blkg, struct request_queue *q, dev_t dev, | |
484 | - enum blkio_policy_id plid) | |
485 | -{ | |
486 | - unsigned long flags; | |
477 | + WARN_ON_ONCE(!rcu_read_lock_held()); | |
478 | + lockdep_assert_held(q->queue_lock); | |
487 | 479 | |
488 | - spin_lock_irqsave(&blkcg->lock, flags); | |
489 | - spin_lock_init(&blkg->stats_lock); | |
490 | - rcu_assign_pointer(blkg->q, q); | |
491 | - blkg->blkcg_id = css_id(&blkcg->css); | |
480 | + /* | |
481 | + * This could be the first entry point of blkcg implementation and | |
482 | + * we shouldn't allow anything to go through for a bypassing queue. | |
483 | + * The following can be removed if blkg lookup is guaranteed to | |
484 | + * fail on a bypassing queue. | |
485 | + */ | |
486 | + if (unlikely(blk_queue_bypass(q)) && !for_root) | |
487 | + return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); | |
488 | + | |
489 | + blkg = blkg_lookup(blkcg, q, plid); | |
490 | + if (blkg) | |
491 | + return blkg; | |
492 | + | |
493 | + if (!css_tryget(&blkcg->css)) | |
494 | + return ERR_PTR(-EINVAL); | |
495 | + | |
496 | + /* | |
497 | + * Allocate and initialize. | |
498 | + * | |
499 | + * FIXME: The following is broken. Percpu memory allocation | |
500 | + * requires %GFP_KERNEL context and can't be performed from IO | |
501 | + * path. Allocation here should inherently be atomic and the | |
502 | + * following lock dancing can be removed once the broken percpu | |
503 | + * allocation is fixed. | |
504 | + */ | |
505 | + spin_unlock_irq(q->queue_lock); | |
506 | + rcu_read_unlock(); | |
507 | + | |
508 | + new_blkg = pol->ops.blkio_alloc_group_fn(q, blkcg); | |
509 | + if (new_blkg) { | |
510 | + new_blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu); | |
511 | + | |
512 | + spin_lock_init(&new_blkg->stats_lock); | |
513 | + rcu_assign_pointer(new_blkg->q, q); | |
514 | + new_blkg->blkcg_id = css_id(&blkcg->css); | |
515 | + new_blkg->plid = plid; | |
516 | + cgroup_path(blkcg->css.cgroup, new_blkg->path, | |
517 | + sizeof(new_blkg->path)); | |
518 | + } | |
519 | + | |
520 | + rcu_read_lock(); | |
521 | + spin_lock_irq(q->queue_lock); | |
522 | + css_put(&blkcg->css); | |
523 | + | |
524 | + /* did bypass get turned on inbetween? */ | |
525 | + if (unlikely(blk_queue_bypass(q)) && !for_root) { | |
526 | + blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); | |
527 | + goto out; | |
528 | + } | |
529 | + | |
530 | + /* did someone beat us to it? */ | |
531 | + blkg = blkg_lookup(blkcg, q, plid); | |
532 | + if (unlikely(blkg)) | |
533 | + goto out; | |
534 | + | |
535 | + /* did alloc fail? */ | |
536 | + if (unlikely(!new_blkg || !new_blkg->stats_cpu)) { | |
537 | + blkg = ERR_PTR(-ENOMEM); | |
538 | + goto out; | |
539 | + } | |
540 | + | |
541 | + /* insert */ | |
542 | + spin_lock(&blkcg->lock); | |
543 | + swap(blkg, new_blkg); | |
492 | 544 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
493 | - blkg->plid = plid; | |
494 | - spin_unlock_irqrestore(&blkcg->lock, flags); | |
495 | - /* Need to take css reference ? */ | |
496 | - cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); | |
497 | - blkg->dev = dev; | |
545 | + pol->ops.blkio_link_group_fn(q, blkg); | |
546 | + spin_unlock(&blkcg->lock); | |
547 | +out: | |
548 | + if (new_blkg) { | |
549 | + free_percpu(new_blkg->stats_cpu); | |
550 | + kfree(new_blkg); | |
551 | + } | |
552 | + return blkg; | |
498 | 553 | } |
499 | -EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); | |
554 | +EXPORT_SYMBOL_GPL(blkg_lookup_create); | |
500 | 555 | |
501 | 556 | static void __blkiocg_del_blkio_group(struct blkio_group *blkg) |
502 | 557 | { |
... | ... | @@ -533,9 +588,9 @@ |
533 | 588 | EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); |
534 | 589 | |
535 | 590 | /* called under rcu_read_lock(). */ |
536 | -struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | |
537 | - struct request_queue *q, | |
538 | - enum blkio_policy_id plid) | |
591 | +struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, | |
592 | + struct request_queue *q, | |
593 | + enum blkio_policy_id plid) | |
539 | 594 | { |
540 | 595 | struct blkio_group *blkg; |
541 | 596 | struct hlist_node *n; |
... | ... | @@ -545,7 +600,7 @@ |
545 | 600 | return blkg; |
546 | 601 | return NULL; |
547 | 602 | } |
548 | -EXPORT_SYMBOL_GPL(blkiocg_lookup_group); | |
603 | +EXPORT_SYMBOL_GPL(blkg_lookup); | |
549 | 604 | |
550 | 605 | void blkg_destroy_all(struct request_queue *q) |
551 | 606 | { |
block/blk-cgroup.h
... | ... | @@ -204,6 +204,10 @@ |
204 | 204 | extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, |
205 | 205 | dev_t dev); |
206 | 206 | |
207 | +typedef struct blkio_group *(blkio_alloc_group_fn)(struct request_queue *q, | |
208 | + struct blkio_cgroup *blkcg); | |
209 | +typedef void (blkio_link_group_fn)(struct request_queue *q, | |
210 | + struct blkio_group *blkg); | |
207 | 211 | typedef void (blkio_unlink_group_fn)(struct request_queue *q, |
208 | 212 | struct blkio_group *blkg); |
209 | 213 | typedef bool (blkio_clear_queue_fn)(struct request_queue *q); |
... | ... | @@ -219,6 +223,8 @@ |
219 | 223 | struct blkio_group *blkg, unsigned int write_iops); |
220 | 224 | |
221 | 225 | struct blkio_policy_ops { |
226 | + blkio_alloc_group_fn *blkio_alloc_group_fn; | |
227 | + blkio_link_group_fn *blkio_link_group_fn; | |
222 | 228 | blkio_unlink_group_fn *blkio_unlink_group_fn; |
223 | 229 | blkio_clear_queue_fn *blkio_clear_queue_fn; |
224 | 230 | blkio_update_group_weight_fn *blkio_update_group_weight_fn; |
225 | 231 | |
... | ... | @@ -307,14 +313,14 @@ |
307 | 313 | extern struct blkio_cgroup blkio_root_cgroup; |
308 | 314 | extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); |
309 | 315 | extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); |
310 | -extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |
311 | - struct blkio_group *blkg, struct request_queue *q, dev_t dev, | |
312 | - enum blkio_policy_id plid); | |
313 | -extern int blkio_alloc_blkg_stats(struct blkio_group *blkg); | |
314 | 316 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); |
315 | -extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | |
316 | - struct request_queue *q, | |
317 | - enum blkio_policy_id plid); | |
317 | +extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, | |
318 | + struct request_queue *q, | |
319 | + enum blkio_policy_id plid); | |
320 | +struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, | |
321 | + struct request_queue *q, | |
322 | + enum blkio_policy_id plid, | |
323 | + bool for_root); | |
318 | 324 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
319 | 325 | unsigned long time, |
320 | 326 | unsigned long unaccounted_time); |
321 | 327 | |
... | ... | @@ -335,17 +341,11 @@ |
335 | 341 | static inline struct blkio_cgroup * |
336 | 342 | task_blkio_cgroup(struct task_struct *tsk) { return NULL; } |
337 | 343 | |
338 | -static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |
339 | - struct blkio_group *blkg, void *key, dev_t dev, | |
340 | - enum blkio_policy_id plid) {} | |
341 | - | |
342 | -static inline int blkio_alloc_blkg_stats(struct blkio_group *blkg) { return 0; } | |
343 | - | |
344 | 344 | static inline int |
345 | 345 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } |
346 | 346 | |
347 | -static inline struct blkio_group * | |
348 | -blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } | |
347 | +static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, | |
348 | + void *key) { return NULL; } | |
349 | 349 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
350 | 350 | unsigned long time, |
351 | 351 | unsigned long unaccounted_time) |
block/blk-throttle.c
... | ... | @@ -181,17 +181,25 @@ |
181 | 181 | call_rcu(&tg->rcu_head, throtl_free_tg); |
182 | 182 | } |
183 | 183 | |
184 | -static void throtl_init_group(struct throtl_grp *tg) | |
184 | +static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q, | |
185 | + struct blkio_cgroup *blkcg) | |
185 | 186 | { |
187 | + struct throtl_grp *tg; | |
188 | + | |
189 | + tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node); | |
190 | + if (!tg) | |
191 | + return NULL; | |
192 | + | |
186 | 193 | INIT_HLIST_NODE(&tg->tg_node); |
187 | 194 | RB_CLEAR_NODE(&tg->rb_node); |
188 | 195 | bio_list_init(&tg->bio_lists[0]); |
189 | 196 | bio_list_init(&tg->bio_lists[1]); |
190 | 197 | tg->limits_changed = false; |
191 | 198 | |
192 | - /* Practically unlimited BW */ | |
193 | - tg->bps[0] = tg->bps[1] = -1; | |
194 | - tg->iops[0] = tg->iops[1] = -1; | |
199 | + tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); | |
200 | + tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); | |
201 | + tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); | |
202 | + tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); | |
195 | 203 | |
196 | 204 | /* |
197 | 205 | * Take the initial reference that will be released on destroy |
198 | 206 | |
... | ... | @@ -200,14 +208,8 @@ |
200 | 208 | * exit or cgroup deletion path depending on who is exiting first. |
201 | 209 | */ |
202 | 210 | atomic_set(&tg->ref, 1); |
203 | -} | |
204 | 211 | |
205 | -/* Should be called with rcu read lock held (needed for blkcg) */ | |
206 | -static void | |
207 | -throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg) | |
208 | -{ | |
209 | - hlist_add_head(&tg->tg_node, &td->tg_list); | |
210 | - td->nr_undestroyed_grps++; | |
212 | + return &tg->blkg; | |
211 | 213 | } |
212 | 214 | |
213 | 215 | static void |
214 | 216 | |
215 | 217 | |
216 | 218 | |
217 | 219 | |
218 | 220 | |
219 | 221 | |
220 | 222 | |
221 | 223 | |
222 | 224 | |
223 | 225 | |
224 | 226 | |
225 | 227 | |
226 | 228 | |
227 | 229 | |
228 | 230 | |
... | ... | @@ -246,119 +248,62 @@ |
246 | 248 | spin_unlock_irq(td->queue->queue_lock); |
247 | 249 | } |
248 | 250 | |
249 | -static void throtl_init_add_tg_lists(struct throtl_data *td, | |
250 | - struct throtl_grp *tg, struct blkio_cgroup *blkcg) | |
251 | +static void throtl_link_blkio_group(struct request_queue *q, | |
252 | + struct blkio_group *blkg) | |
251 | 253 | { |
254 | + struct throtl_data *td = q->td; | |
255 | + struct throtl_grp *tg = tg_of_blkg(blkg); | |
256 | + | |
252 | 257 | __throtl_tg_fill_dev_details(td, tg); |
253 | 258 | |
254 | - /* Add group onto cgroup list */ | |
255 | - blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue, | |
256 | - tg->blkg.dev, BLKIO_POLICY_THROTL); | |
257 | - | |
258 | - tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); | |
259 | - tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); | |
260 | - tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); | |
261 | - tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); | |
262 | - | |
263 | - throtl_add_group_to_td_list(td, tg); | |
259 | + hlist_add_head(&tg->tg_node, &td->tg_list); | |
260 | + td->nr_undestroyed_grps++; | |
264 | 261 | } |
265 | 262 | |
266 | -/* Should be called without queue lock and outside of rcu period */ | |
267 | -static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td) | |
268 | -{ | |
269 | - struct throtl_grp *tg = NULL; | |
270 | - int ret; | |
271 | - | |
272 | - tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node); | |
273 | - if (!tg) | |
274 | - return NULL; | |
275 | - | |
276 | - ret = blkio_alloc_blkg_stats(&tg->blkg); | |
277 | - | |
278 | - if (ret) { | |
279 | - kfree(tg); | |
280 | - return NULL; | |
281 | - } | |
282 | - | |
283 | - throtl_init_group(tg); | |
284 | - return tg; | |
285 | -} | |
286 | - | |
287 | 263 | static struct |
288 | -throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) | |
264 | +throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) | |
289 | 265 | { |
290 | 266 | struct throtl_grp *tg = NULL; |
291 | 267 | |
292 | 268 | /* |
293 | 269 | * This is the common case when there are no blkio cgroups. |
294 | - * Avoid lookup in this case | |
295 | - */ | |
270 | + * Avoid lookup in this case | |
271 | + */ | |
296 | 272 | if (blkcg == &blkio_root_cgroup) |
297 | 273 | tg = td->root_tg; |
298 | 274 | else |
299 | - tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue, | |
300 | - BLKIO_POLICY_THROTL)); | |
275 | + tg = tg_of_blkg(blkg_lookup(blkcg, td->queue, | |
276 | + BLKIO_POLICY_THROTL)); | |
301 | 277 | |
302 | 278 | __throtl_tg_fill_dev_details(td, tg); |
303 | 279 | return tg; |
304 | 280 | } |
305 | 281 | |
306 | -static struct throtl_grp *throtl_get_tg(struct throtl_data *td, | |
307 | - struct blkio_cgroup *blkcg) | |
282 | +static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, | |
283 | + struct blkio_cgroup *blkcg) | |
308 | 284 | { |
309 | - struct throtl_grp *tg = NULL, *__tg = NULL; | |
310 | 285 | struct request_queue *q = td->queue; |
286 | + struct throtl_grp *tg = NULL; | |
311 | 287 | |
312 | - /* no throttling for dead queue */ | |
313 | - if (unlikely(blk_queue_bypass(q))) | |
314 | - return NULL; | |
315 | - | |
316 | - tg = throtl_find_tg(td, blkcg); | |
317 | - if (tg) | |
318 | - return tg; | |
319 | - | |
320 | - if (!css_tryget(&blkcg->css)) | |
321 | - return NULL; | |
322 | - | |
323 | 288 | /* |
324 | - * Need to allocate a group. Allocation of group also needs allocation | |
325 | - * of per cpu stats which in-turn takes a mutex() and can block. Hence | |
326 | - * we need to drop rcu lock and queue_lock before we call alloc. | |
289 | + * This is the common case when there are no blkio cgroups. | |
290 | + * Avoid lookup in this case | |
327 | 291 | */ |
328 | - spin_unlock_irq(q->queue_lock); | |
329 | - rcu_read_unlock(); | |
292 | + if (blkcg == &blkio_root_cgroup) { | |
293 | + tg = td->root_tg; | |
294 | + } else { | |
295 | + struct blkio_group *blkg; | |
330 | 296 | |
331 | - tg = throtl_alloc_tg(td); | |
297 | + blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false); | |
332 | 298 | |
333 | - /* Group allocated and queue is still alive. take the lock */ | |
334 | - rcu_read_lock(); | |
335 | - spin_lock_irq(q->queue_lock); | |
336 | - css_put(&blkcg->css); | |
337 | - | |
338 | - /* Make sure @q is still alive */ | |
339 | - if (unlikely(blk_queue_bypass(q))) { | |
340 | - kfree(tg); | |
341 | - return NULL; | |
299 | + /* if %NULL and @q is alive, fall back to root_tg */ | |
300 | + if (!IS_ERR(blkg)) | |
301 | + tg = tg_of_blkg(blkg); | |
302 | + else if (!blk_queue_dead(q)) | |
303 | + tg = td->root_tg; | |
342 | 304 | } |
343 | 305 | |
344 | - /* | |
345 | - * If some other thread already allocated the group while we were | |
346 | - * not holding queue lock, free up the group | |
347 | - */ | |
348 | - __tg = throtl_find_tg(td, blkcg); | |
349 | - | |
350 | - if (__tg) { | |
351 | - kfree(tg); | |
352 | - return __tg; | |
353 | - } | |
354 | - | |
355 | - /* Group allocation failed. Account the IO to root group */ | |
356 | - if (!tg) { | |
357 | - tg = td->root_tg; | |
358 | - return tg; | |
359 | - } | |
360 | - | |
361 | - throtl_init_add_tg_lists(td, tg, blkcg); | |
306 | + __throtl_tg_fill_dev_details(td, tg); | |
362 | 307 | return tg; |
363 | 308 | } |
364 | 309 | |
... | ... | @@ -1107,6 +1052,8 @@ |
1107 | 1052 | |
1108 | 1053 | static struct blkio_policy_type blkio_policy_throtl = { |
1109 | 1054 | .ops = { |
1055 | + .blkio_alloc_group_fn = throtl_alloc_blkio_group, | |
1056 | + .blkio_link_group_fn = throtl_link_blkio_group, | |
1110 | 1057 | .blkio_unlink_group_fn = throtl_unlink_blkio_group, |
1111 | 1058 | .blkio_clear_queue_fn = throtl_clear_queue, |
1112 | 1059 | .blkio_update_group_read_bps_fn = |
... | ... | @@ -1141,7 +1088,7 @@ |
1141 | 1088 | */ |
1142 | 1089 | rcu_read_lock(); |
1143 | 1090 | blkcg = task_blkio_cgroup(current); |
1144 | - tg = throtl_find_tg(td, blkcg); | |
1091 | + tg = throtl_lookup_tg(td, blkcg); | |
1145 | 1092 | if (tg) { |
1146 | 1093 | throtl_tg_fill_dev_details(td, tg); |
1147 | 1094 | |
... | ... | @@ -1157,7 +1104,7 @@ |
1157 | 1104 | * IO group |
1158 | 1105 | */ |
1159 | 1106 | spin_lock_irq(q->queue_lock); |
1160 | - tg = throtl_get_tg(td, blkcg); | |
1107 | + tg = throtl_lookup_create_tg(td, blkcg); | |
1161 | 1108 | if (unlikely(!tg)) |
1162 | 1109 | goto out_unlock; |
1163 | 1110 | |
... | ... | @@ -1252,6 +1199,7 @@ |
1252 | 1199 | int blk_throtl_init(struct request_queue *q) |
1253 | 1200 | { |
1254 | 1201 | struct throtl_data *td; |
1202 | + struct blkio_group *blkg; | |
1255 | 1203 | |
1256 | 1204 | td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); |
1257 | 1205 | if (!td) |
1258 | 1206 | |
1259 | 1207 | |
... | ... | @@ -1262,13 +1210,17 @@ |
1262 | 1210 | td->limits_changed = false; |
1263 | 1211 | INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); |
1264 | 1212 | |
1265 | - /* alloc and Init root group. */ | |
1213 | + q->td = td; | |
1266 | 1214 | td->queue = q; |
1267 | 1215 | |
1216 | + /* alloc and init root group. */ | |
1268 | 1217 | rcu_read_lock(); |
1269 | 1218 | spin_lock_irq(q->queue_lock); |
1270 | 1219 | |
1271 | - td->root_tg = throtl_get_tg(td, &blkio_root_cgroup); | |
1220 | + blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL, | |
1221 | + true); | |
1222 | + if (!IS_ERR(blkg)) | |
1223 | + td->root_tg = tg_of_blkg(blkg); | |
1272 | 1224 | |
1273 | 1225 | spin_unlock_irq(q->queue_lock); |
1274 | 1226 | rcu_read_unlock(); |
... | ... | @@ -1277,9 +1229,6 @@ |
1277 | 1229 | kfree(td); |
1278 | 1230 | return -ENOMEM; |
1279 | 1231 | } |
1280 | - | |
1281 | - /* Attach throtl data to request queue */ | |
1282 | - q->td = td; | |
1283 | 1232 | return 0; |
1284 | 1233 | } |
1285 | 1234 |
block/cfq-iosched.c
... | ... | @@ -1048,10 +1048,12 @@ |
1048 | 1048 | cfqg->needs_update = true; |
1049 | 1049 | } |
1050 | 1050 | |
1051 | -static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, | |
1052 | - struct cfq_group *cfqg, struct blkio_cgroup *blkcg) | |
1051 | +static void cfq_link_blkio_group(struct request_queue *q, | |
1052 | + struct blkio_group *blkg) | |
1053 | 1053 | { |
1054 | - struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | |
1054 | + struct cfq_data *cfqd = q->elevator->elevator_data; | |
1055 | + struct backing_dev_info *bdi = &q->backing_dev_info; | |
1056 | + struct cfq_group *cfqg = cfqg_of_blkg(blkg); | |
1055 | 1057 | unsigned int major, minor; |
1056 | 1058 | |
1057 | 1059 | /* |
1058 | 1060 | |
1059 | 1061 | |
1060 | 1062 | |
1061 | 1063 | |
1062 | 1064 | |
... | ... | @@ -1062,34 +1064,26 @@ |
1062 | 1064 | */ |
1063 | 1065 | if (bdi->dev) { |
1064 | 1066 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
1065 | - cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, | |
1066 | - cfqd->queue, MKDEV(major, minor)); | |
1067 | - } else | |
1068 | - cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, | |
1069 | - cfqd->queue, 0); | |
1067 | + blkg->dev = MKDEV(major, minor); | |
1068 | + } | |
1070 | 1069 | |
1071 | 1070 | cfqd->nr_blkcg_linked_grps++; |
1072 | - cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | |
1073 | 1071 | |
1074 | 1072 | /* Add group on cfqd list */ |
1075 | 1073 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); |
1076 | 1074 | } |
1077 | 1075 | |
1078 | -/* | |
1079 | - * Should be called from sleepable context. No request queue lock as per | |
1080 | - * cpu stats are allocated dynamically and alloc_percpu needs to be called | |
1081 | - * from sleepable context. | |
1082 | - */ | |
1083 | -static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) | |
1076 | +static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q, | |
1077 | + struct blkio_cgroup *blkcg) | |
1084 | 1078 | { |
1085 | 1079 | struct cfq_group *cfqg; |
1086 | - int ret; | |
1087 | 1080 | |
1088 | - cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); | |
1081 | + cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, q->node); | |
1089 | 1082 | if (!cfqg) |
1090 | 1083 | return NULL; |
1091 | 1084 | |
1092 | 1085 | cfq_init_cfqg_base(cfqg); |
1086 | + cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | |
1093 | 1087 | |
1094 | 1088 | /* |
1095 | 1089 | * Take the initial reference that will be released on destroy |
1096 | 1090 | |
1097 | 1091 | |
1098 | 1092 | |
1099 | 1093 | |
1100 | 1094 | |
1101 | 1095 | |
1102 | 1096 | |
1103 | 1097 | |
1104 | 1098 | |
... | ... | @@ -1099,90 +1093,38 @@ |
1099 | 1093 | */ |
1100 | 1094 | cfqg->ref = 1; |
1101 | 1095 | |
1102 | - ret = blkio_alloc_blkg_stats(&cfqg->blkg); | |
1103 | - if (ret) { | |
1104 | - kfree(cfqg); | |
1105 | - return NULL; | |
1106 | - } | |
1107 | - | |
1108 | - return cfqg; | |
1096 | + return &cfqg->blkg; | |
1109 | 1097 | } |
1110 | 1098 | |
1111 | -static struct cfq_group * | |
1112 | -cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg) | |
1113 | -{ | |
1114 | - struct cfq_group *cfqg = NULL; | |
1115 | - struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | |
1116 | - unsigned int major, minor; | |
1117 | - | |
1118 | - /* | |
1119 | - * This is the common case when there are no blkio cgroups. | |
1120 | - * Avoid lookup in this case | |
1121 | - */ | |
1122 | - if (blkcg == &blkio_root_cgroup) | |
1123 | - cfqg = cfqd->root_group; | |
1124 | - else | |
1125 | - cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, cfqd->queue, | |
1126 | - BLKIO_POLICY_PROP)); | |
1127 | - | |
1128 | - if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { | |
1129 | - sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | |
1130 | - cfqg->blkg.dev = MKDEV(major, minor); | |
1131 | - } | |
1132 | - | |
1133 | - return cfqg; | |
1134 | -} | |
1135 | - | |
1136 | 1099 | /* |
1137 | 1100 | * Search for the cfq group current task belongs to. request_queue lock must |
1138 | 1101 | * be held. |
1139 | 1102 | */ |
1140 | -static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, | |
1141 | - struct blkio_cgroup *blkcg) | |
1103 | +static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, | |
1104 | + struct blkio_cgroup *blkcg) | |
1142 | 1105 | { |
1143 | - struct cfq_group *cfqg = NULL, *__cfqg = NULL; | |
1144 | 1106 | struct request_queue *q = cfqd->queue; |
1107 | + struct backing_dev_info *bdi = &q->backing_dev_info; | |
1108 | + struct cfq_group *cfqg = NULL; | |
1145 | 1109 | |
1146 | - cfqg = cfq_find_cfqg(cfqd, blkcg); | |
1147 | - if (cfqg) | |
1148 | - return cfqg; | |
1110 | + /* avoid lookup for the common case where there's no blkio cgroup */ | |
1111 | + if (blkcg == &blkio_root_cgroup) { | |
1112 | + cfqg = cfqd->root_group; | |
1113 | + } else { | |
1114 | + struct blkio_group *blkg; | |
1149 | 1115 | |
1150 | - if (!css_tryget(&blkcg->css)) | |
1151 | - return NULL; | |
1116 | + blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false); | |
1117 | + if (!IS_ERR(blkg)) | |
1118 | + cfqg = cfqg_of_blkg(blkg); | |
1119 | + } | |
1152 | 1120 | |
1153 | - /* | |
1154 | - * Need to allocate a group. Allocation of group also needs allocation | |
1155 | - * of per cpu stats which in-turn takes a mutex() and can block. Hence | |
1156 | - * we need to drop rcu lock and queue_lock before we call alloc. | |
1157 | - * | |
1158 | - * Not taking any queue reference here and assuming that queue is | |
1159 | - * around by the time we return. CFQ queue allocation code does | |
1160 | - * the same. It might be racy though. | |
1161 | - */ | |
1162 | - rcu_read_unlock(); | |
1163 | - spin_unlock_irq(q->queue_lock); | |
1121 | + if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { | |
1122 | + unsigned int major, minor; | |
1164 | 1123 | |
1165 | - cfqg = cfq_alloc_cfqg(cfqd); | |
1166 | - | |
1167 | - spin_lock_irq(q->queue_lock); | |
1168 | - rcu_read_lock(); | |
1169 | - css_put(&blkcg->css); | |
1170 | - | |
1171 | - /* | |
1172 | - * If some other thread already allocated the group while we were | |
1173 | - * not holding queue lock, free up the group | |
1174 | - */ | |
1175 | - __cfqg = cfq_find_cfqg(cfqd, blkcg); | |
1176 | - | |
1177 | - if (__cfqg) { | |
1178 | - kfree(cfqg); | |
1179 | - return __cfqg; | |
1124 | + sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | |
1125 | + cfqg->blkg.dev = MKDEV(major, minor); | |
1180 | 1126 | } |
1181 | 1127 | |
1182 | - if (!cfqg) | |
1183 | - cfqg = cfqd->root_group; | |
1184 | - | |
1185 | - cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg); | |
1186 | 1128 | return cfqg; |
1187 | 1129 | } |
1188 | 1130 | |
... | ... | @@ -1294,8 +1236,8 @@ |
1294 | 1236 | } |
1295 | 1237 | |
1296 | 1238 | #else /* GROUP_IOSCHED */ |
1297 | -static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, | |
1298 | - struct blkio_cgroup *blkcg) | |
1239 | +static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, | |
1240 | + struct blkio_cgroup *blkcg) | |
1299 | 1241 | { |
1300 | 1242 | return cfqd->root_group; |
1301 | 1243 | } |
... | ... | @@ -2887,7 +2829,8 @@ |
2887 | 2829 | |
2888 | 2830 | blkcg = task_blkio_cgroup(current); |
2889 | 2831 | |
2890 | - cfqg = cfq_get_cfqg(cfqd, blkcg); | |
2832 | + cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); | |
2833 | + | |
2891 | 2834 | cic = cfq_cic_lookup(cfqd, ioc); |
2892 | 2835 | /* cic always exists here */ |
2893 | 2836 | cfqq = cic_to_cfqq(cic, is_sync); |
... | ... | @@ -3694,6 +3637,7 @@ |
3694 | 3637 | static int cfq_init_queue(struct request_queue *q) |
3695 | 3638 | { |
3696 | 3639 | struct cfq_data *cfqd; |
3640 | + struct blkio_group *blkg __maybe_unused; | |
3697 | 3641 | int i; |
3698 | 3642 | |
3699 | 3643 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
... | ... | @@ -3711,7 +3655,10 @@ |
3711 | 3655 | rcu_read_lock(); |
3712 | 3656 | spin_lock_irq(q->queue_lock); |
3713 | 3657 | |
3714 | - cfqd->root_group = cfq_get_cfqg(cfqd, &blkio_root_cgroup); | |
3658 | + blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP, | |
3659 | + true); | |
3660 | + if (!IS_ERR(blkg)) | |
3661 | + cfqd->root_group = cfqg_of_blkg(blkg); | |
3715 | 3662 | |
3716 | 3663 | spin_unlock_irq(q->queue_lock); |
3717 | 3664 | rcu_read_unlock(); |
... | ... | @@ -3897,6 +3844,8 @@ |
3897 | 3844 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3898 | 3845 | static struct blkio_policy_type blkio_policy_cfq = { |
3899 | 3846 | .ops = { |
3847 | + .blkio_alloc_group_fn = cfq_alloc_blkio_group, | |
3848 | + .blkio_link_group_fn = cfq_link_blkio_group, | |
3900 | 3849 | .blkio_unlink_group_fn = cfq_unlink_blkio_group, |
3901 | 3850 | .blkio_clear_queue_fn = cfq_clear_queue, |
3902 | 3851 | .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, |
block/cfq.h
... | ... | @@ -67,12 +67,6 @@ |
67 | 67 | direction, sync); |
68 | 68 | } |
69 | 69 | |
70 | -static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |
71 | - struct blkio_group *blkg, struct request_queue *q, dev_t dev) | |
72 | -{ | |
73 | - blkiocg_add_blkio_group(blkcg, blkg, q, dev, BLKIO_POLICY_PROP); | |
74 | -} | |
75 | - | |
76 | 70 | static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) |
77 | 71 | { |
78 | 72 | return blkiocg_del_blkio_group(blkg); |
... | ... | @@ -105,8 +99,6 @@ |
105 | 99 | uint64_t bytes, bool direction, bool sync) {} |
106 | 100 | static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {} |
107 | 101 | |
108 | -static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |
109 | - struct blkio_group *blkg, struct request_queue *q, dev_t dev) {} | |
110 | 102 | static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) |
111 | 103 | { |
112 | 104 | return 0; |