Commit 16b3de6652c7aef151f38726faf90f0dbc9e9c71

Authored by Tejun Heo
1 parent b50da39f51

blkcg: implement blkg_[rw]stat_recursive_sum() and blkg_[rw]stat_merge()

Implement blkg_[rw]stat_recursive_sum() and blkg_[rw]stat_merge().
The former two collect the [rw]stats designated by the target policy
data and offset from the pd's subtree.  The latter two add one
[rw]stat to another.

Note that the recursive sum functions require the queue lock to be
held on entry to make blkg online test reliable.  This is necessary to
properly handle stats of a dying blkg.

These will be used to implement hierarchical stats.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>

Showing 2 changed files with 142 additions and 0 deletions Side-by-side Diff

... ... @@ -32,6 +32,26 @@
32 32  
33 33 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
34 34  
  35 +static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
  36 + struct request_queue *q, bool update_hint);
  37 +
  38 +/**
  39 + * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
  40 + * @d_blkg: loop cursor pointing to the current descendant
  41 + * @pos_cgrp: used for iteration
  42 + * @p_blkg: target blkg to walk descendants of
  43 + *
  44 + * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
  45 + * read locked. If called under either blkcg or queue lock, the iteration
  46 + * is guaranteed to include all and only online blkgs. The caller may
  47 + * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
  48 + * subtree.
  49 + */
  50 +#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
  51 + cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
  52 + if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
  53 + (p_blkg)->q, false)))
  54 +
35 55 static bool blkcg_policy_enabled(struct request_queue *q,
36 56 const struct blkcg_policy *pol)
37 57 {
... ... @@ -127,6 +147,17 @@
127 147 return NULL;
128 148 }
129 149  
  150 +/**
  151 + * __blkg_lookup - internal version of blkg_lookup()
  152 + * @blkcg: blkcg of interest
  153 + * @q: request_queue of interest
  154 + * @update_hint: whether to update lookup hint with the result or not
  155 + *
  156 + * This is internal version and shouldn't be used by policy
  157 + * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
  158 + * @q's bypass state. If @update_hint is %true, the caller should be
  159 + * holding @q->queue_lock and lookup hint is updated on success.
  160 + */
130 161 static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
131 162 struct request_queue *q, bool update_hint)
132 163 {
... ... @@ -584,6 +615,82 @@
584 615 return __blkg_prfill_rwstat(sf, pd, &rwstat);
585 616 }
586 617 EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
  618 +
  619 +/**
  620 + * blkg_stat_recursive_sum - collect hierarchical blkg_stat
  621 + * @pd: policy private data of interest
  622 + * @off: offset to the blkg_stat in @pd
  623 + *
  624 + * Collect the blkg_stat specified by @off from @pd and all its online
  625 + * descendants and return the sum. The caller must be holding the queue
  626 + * lock for online tests.
  627 + */
  628 +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
  629 +{
  630 + struct blkcg_policy *pol = blkcg_policy[pd->plid];
  631 + struct blkcg_gq *pos_blkg;
  632 + struct cgroup *pos_cgrp;
  633 + u64 sum;
  634 +
  635 + lockdep_assert_held(pd->blkg->q->queue_lock);
  636 +
  637 + sum = blkg_stat_read((void *)pd + off);
  638 +
  639 + rcu_read_lock();
  640 + blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
  641 + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
  642 + struct blkg_stat *stat = (void *)pos_pd + off;
  643 +
  644 + if (pos_blkg->online)
  645 + sum += blkg_stat_read(stat);
  646 + }
  647 + rcu_read_unlock();
  648 +
  649 + return sum;
  650 +}
  651 +EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum);
  652 +
  653 +/**
  654 + * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
  655 + * @pd: policy private data of interest
  656 + * @off: offset to the blkg_stat in @pd
  657 + *
  658 + * Collect the blkg_rwstat specified by @off from @pd and all its online
  659 + * descendants and return the sum. The caller must be holding the queue
  660 + * lock for online tests.
  661 + */
  662 +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
  663 + int off)
  664 +{
  665 + struct blkcg_policy *pol = blkcg_policy[pd->plid];
  666 + struct blkcg_gq *pos_blkg;
  667 + struct cgroup *pos_cgrp;
  668 + struct blkg_rwstat sum;
  669 + int i;
  670 +
  671 + lockdep_assert_held(pd->blkg->q->queue_lock);
  672 +
  673 + sum = blkg_rwstat_read((void *)pd + off);
  674 +
  675 + rcu_read_lock();
  676 + blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
  677 + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
  678 + struct blkg_rwstat *rwstat = (void *)pos_pd + off;
  679 + struct blkg_rwstat tmp;
  680 +
  681 + if (!pos_blkg->online)
  682 + continue;
  683 +
  684 + tmp = blkg_rwstat_read(rwstat);
  685 +
  686 + for (i = 0; i < BLKG_RWSTAT_NR; i++)
  687 + sum.cnt[i] += tmp.cnt[i];
  688 + }
  689 + rcu_read_unlock();
  690 +
  691 + return sum;
  692 +}
  693 +EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
587 694  
588 695 /**
589 696 * blkg_conf_prep - parse and prepare for per-blkg config update
... ... @@ -164,6 +164,10 @@
164 164 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
165 165 int off);
166 166  
  167 +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off);
  168 +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
  169 + int off);
  170 +
167 171 struct blkg_conf_ctx {
168 172 struct gendisk *disk;
169 173 struct blkcg_gq *blkg;
... ... @@ -414,6 +418,18 @@
414 418 }
415 419  
416 420 /**
  421 + * blkg_stat_merge - merge a blkg_stat into another
  422 + * @to: the destination blkg_stat
  423 + * @from: the source
  424 + *
  425 + * Add @from's count to @to.
  426 + */
  427 +static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from)
  428 +{
  429 + blkg_stat_add(to, blkg_stat_read(from));
  430 +}
  431 +
  432 +/**
417 433 * blkg_rwstat_add - add a value to a blkg_rwstat
418 434 * @rwstat: target blkg_rwstat
419 435 * @rw: mask of REQ_{WRITE|SYNC}
... ... @@ -482,6 +498,25 @@
482 498 static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
483 499 {
484 500 memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
  501 +}
  502 +
  503 +/**
  504 + * blkg_rwstat_merge - merge a blkg_rwstat into another
  505 + * @to: the destination blkg_rwstat
  506 + * @from: the source
  507 + *
  508 + * Add @from's counts to @to.
  509 + */
  510 +static inline void blkg_rwstat_merge(struct blkg_rwstat *to,
  511 + struct blkg_rwstat *from)
  512 +{
  513 + struct blkg_rwstat v = blkg_rwstat_read(from);
  514 + int i;
  515 +
  516 + u64_stats_update_begin(&to->syncp);
  517 + for (i = 0; i < BLKG_RWSTAT_NR; i++)
  518 + to->cnt[i] += v.cnt[i];
  519 + u64_stats_update_end(&to->syncp);
485 520 }
486 521  
487 522 #else /* CONFIG_BLK_CGROUP */