Commit 2a4fd070ee8561d918a3776388331bb7e92ea59e

Authored by Tejun Heo
1 parent db61367038

blkcg: move bulk of blkcg_gq release operations to the RCU callback

Currently, when the last reference of a blkcg_gq is put, all then
release operations sans the actual freeing happen directly in
blkg_put().  As blkg_put() may be called under queue_lock, all
pd_exit_fn()s may be too.  This makes it impossible for pd_exit_fn()s
to use del_timer_sync() on timers which grab the queue_lock which is
an irq-safe lock due to the deadlock possibility described in the
comment on top of del_timer_sync().

This can be easily avoided by perfoming the release operations in the
RCU callback instead of directly from blkg_put().  This patch moves
the blkcg_gq release operations to the RCU callback.

As this leaves __blkg_release() with only call_rcu() invocation,
blkg_rcu_free() is renamed to __blkg_release_rcu(), exported and
call_rcu() invocation is now done directly from blkg_put() instead of
going through __blkg_release() which is removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>

Showing 2 changed files with 18 additions and 20 deletions Side-by-side Diff

... ... @@ -369,13 +369,17 @@
369 369 q->root_rl.blkg = NULL;
370 370 }
371 371  
372   -static void blkg_rcu_free(struct rcu_head *rcu_head)
  372 +/*
  373 + * A group is RCU protected, but having an rcu lock does not mean that one
  374 + * can access all the fields of blkg and assume these are valid. For
  375 + * example, don't try to follow throtl_data and request queue links.
  376 + *
  377 + * Having a reference to blkg under an rcu allows accesses to only values
  378 + * local to groups like group stats and group rate limits.
  379 + */
  380 +void __blkg_release_rcu(struct rcu_head *rcu_head)
373 381 {
374   - blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head));
375   -}
376   -
377   -void __blkg_release(struct blkcg_gq *blkg)
378   -{
  382 + struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
379 383 int i;
380 384  
381 385 /* tell policies that this one is being freed */
382 386  
383 387  
384 388  
... ... @@ -388,21 +392,15 @@
388 392  
389 393 /* release the blkcg and parent blkg refs this blkg has been holding */
390 394 css_put(&blkg->blkcg->css);
391   - if (blkg->parent)
  395 + if (blkg->parent) {
  396 + spin_lock_irq(blkg->q->queue_lock);
392 397 blkg_put(blkg->parent);
  398 + spin_unlock_irq(blkg->q->queue_lock);
  399 + }
393 400  
394   - /*
395   - * A group is freed in rcu manner. But having an rcu lock does not
396   - * mean that one can access all the fields of blkg and assume these
397   - * are valid. For example, don't try to follow throtl_data and
398   - * request queue links.
399   - *
400   - * Having a reference to blkg under an rcu allows acess to only
401   - * values local to groups like group stats and group rate limits
402   - */
403   - call_rcu(&blkg->rcu_head, blkg_rcu_free);
  401 + blkg_free(blkg);
404 402 }
405   -EXPORT_SYMBOL_GPL(__blkg_release);
  403 +EXPORT_SYMBOL_GPL(__blkg_release_rcu);
406 404  
407 405 /*
408 406 * The next function used by blk_queue_for_each_rl(). It's a bit tricky
... ... @@ -266,7 +266,7 @@
266 266 blkg->refcnt++;
267 267 }
268 268  
269   -void __blkg_release(struct blkcg_gq *blkg);
  269 +void __blkg_release_rcu(struct rcu_head *rcu);
270 270  
271 271 /**
272 272 * blkg_put - put a blkg reference
... ... @@ -279,7 +279,7 @@
279 279 lockdep_assert_held(blkg->q->queue_lock);
280 280 WARN_ON_ONCE(blkg->refcnt <= 0);
281 281 if (!--blkg->refcnt)
282   - __blkg_release(blkg);
  282 + call_rcu(&blkg->rcu_head, __blkg_release_rcu);
283 283 }
284 284  
285 285 struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,