Commit 03aa264ac15637b6f98374270bcdf31400965505

Authored by Tejun Heo
Committed by Jens Axboe
1 parent 4eef304998

blkcg: let blkcg core manage per-queue blkg list and counter

With the previous patch to move blkg list heads and counters to
request_queue and blkg, logic to manage them in both policies are
almost identical and can be moved to blkcg core.

This patch moves blkg link logic into blkg_lookup_create(), implements
common blkg unlink code in blkg_destroy(), and updates
blkg_destory_all() so that it's policy specific and can skip root
group.  The updated blkg_destroy_all() is now used to both clear queue
for bypassing and elv switching, and release all blkgs on q exit.

This patch introduces a race window where policy [de]registration may
race against queue blkg clearing.  This can only be a problem on cfq
unload and shouldn't be a real problem in practice (and we have many
other places where this race already exists).  Future patches will
remove these unlikely races.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

Showing 6 changed files with 74 additions and 221 deletions Side-by-side Diff

... ... @@ -596,8 +596,11 @@
596 596 /* insert */
597 597 spin_lock(&blkcg->lock);
598 598 swap(blkg, new_blkg);
  599 +
599 600 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
600   - pol->ops.blkio_link_group_fn(q, blkg);
  601 + list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
  602 + q->nr_blkgs[plid]++;
  603 +
601 604 spin_unlock(&blkcg->lock);
602 605 out:
603 606 blkg_free(new_blkg);
604 607  
605 608  
606 609  
607 610  
608 611  
609 612  
610 613  
611 614  
... ... @@ -646,36 +649,69 @@
646 649 }
647 650 EXPORT_SYMBOL_GPL(blkg_lookup);
648 651  
649   -void blkg_destroy_all(struct request_queue *q)
  652 +static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
650 653 {
651   - struct blkio_policy_type *pol;
  654 + struct request_queue *q = blkg->q;
652 655  
  656 + lockdep_assert_held(q->queue_lock);
  657 +
  658 + /* Something wrong if we are trying to remove same group twice */
  659 + WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
  660 + list_del_init(&blkg->q_node[plid]);
  661 +
  662 + WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
  663 + q->nr_blkgs[plid]--;
  664 +
  665 + /*
  666 + * Put the reference taken at the time of creation so that when all
  667 + * queues are gone, group can be destroyed.
  668 + */
  669 + blkg_put(blkg);
  670 +}
  671 +
  672 +void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
  673 + bool destroy_root)
  674 +{
  675 + struct blkio_group *blkg, *n;
  676 +
653 677 while (true) {
654 678 bool done = true;
655 679  
656   - spin_lock(&blkio_list_lock);
657 680 spin_lock_irq(q->queue_lock);
658 681  
659   - /*
660   - * clear_queue_fn() might return with non-empty group list
661   - * if it raced cgroup removal and lost. cgroup removal is
662   - * guaranteed to make forward progress and retrying after a
663   - * while is enough. This ugliness is scheduled to be
664   - * removed after locking update.
665   - */
666   - list_for_each_entry(pol, &blkio_list, list)
667   - if (!pol->ops.blkio_clear_queue_fn(q))
  682 + list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
  683 + q_node[plid]) {
  684 + /* skip root? */
  685 + if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
  686 + continue;
  687 +
  688 + /*
  689 + * If cgroup removal path got to blk_group first
  690 + * and removed it from cgroup list, then it will
  691 + * take care of destroying cfqg also.
  692 + */
  693 + if (!blkiocg_del_blkio_group(blkg))
  694 + blkg_destroy(blkg, plid);
  695 + else
668 696 done = false;
  697 + }
669 698  
670 699 spin_unlock_irq(q->queue_lock);
671   - spin_unlock(&blkio_list_lock);
672 700  
  701 + /*
  702 + * Group list may not be empty if we raced cgroup removal
  703 + * and lost. cgroup removal is guaranteed to make forward
  704 + * progress and retrying after a while is enough. This
  705 + * ugliness is scheduled to be removed after locking
  706 + * update.
  707 + */
673 708 if (done)
674 709 break;
675 710  
676 711 msleep(10); /* just some random duration I like */
677 712 }
678 713 }
  714 +EXPORT_SYMBOL_GPL(blkg_destroy_all);
679 715  
680 716 static void blkg_rcu_free(struct rcu_head *rcu_head)
681 717 {
682 718  
683 719  
... ... @@ -1549,11 +1585,13 @@
1549 1585 * this event.
1550 1586 */
1551 1587 spin_lock(&blkio_list_lock);
  1588 + spin_lock_irqsave(q->queue_lock, flags);
1552 1589 list_for_each_entry(blkiop, &blkio_list, list) {
1553 1590 if (blkiop->plid != blkg->plid)
1554 1591 continue;
1555   - blkiop->ops.blkio_unlink_group_fn(q, blkg);
  1592 + blkg_destroy(blkg, blkiop->plid);
1556 1593 }
  1594 + spin_unlock_irqrestore(q->queue_lock, flags);
1557 1595 spin_unlock(&blkio_list_lock);
1558 1596 } while (1);
1559 1597  
1560 1598  
... ... @@ -1695,12 +1733,14 @@
1695 1733 __acquires(&all_q_mutex)
1696 1734 {
1697 1735 struct request_queue *q;
  1736 + int i;
1698 1737  
1699 1738 mutex_lock(&all_q_mutex);
1700 1739  
1701 1740 list_for_each_entry(q, &all_q_list, all_q_node) {
1702 1741 blk_queue_bypass_start(q);
1703   - blkg_destroy_all(q);
  1742 + for (i = 0; i < BLKIO_NR_POLICIES; i++)
  1743 + blkg_destroy_all(q, i, false);
1704 1744 }
1705 1745 }
1706 1746  
... ... @@ -196,11 +196,6 @@
196 196 };
197 197  
198 198 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
199   -typedef void (blkio_link_group_fn)(struct request_queue *q,
200   - struct blkio_group *blkg);
201   -typedef void (blkio_unlink_group_fn)(struct request_queue *q,
202   - struct blkio_group *blkg);
203   -typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
204 199 typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
205 200 struct blkio_group *blkg, unsigned int weight);
206 201 typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
... ... @@ -214,9 +209,6 @@
214 209  
215 210 struct blkio_policy_ops {
216 211 blkio_init_group_fn *blkio_init_group_fn;
217   - blkio_link_group_fn *blkio_link_group_fn;
218   - blkio_unlink_group_fn *blkio_unlink_group_fn;
219   - blkio_clear_queue_fn *blkio_clear_queue_fn;
220 212 blkio_update_group_weight_fn *blkio_update_group_weight_fn;
221 213 blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
222 214 blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
... ... @@ -238,7 +230,8 @@
238 230 /* Blkio controller policy registration */
239 231 extern void blkio_policy_register(struct blkio_policy_type *);
240 232 extern void blkio_policy_unregister(struct blkio_policy_type *);
241   -extern void blkg_destroy_all(struct request_queue *q);
  233 +extern void blkg_destroy_all(struct request_queue *q,
  234 + enum blkio_policy_id plid, bool destroy_root);
242 235  
243 236 /**
244 237 * blkg_to_pdata - get policy private data
... ... @@ -319,7 +312,9 @@
319 312 static inline void blkcg_exit_queue(struct request_queue *q) { }
320 313 static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
321 314 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
322   -static inline void blkg_destroy_all(struct request_queue *q) { }
  315 +static inline void blkg_destroy_all(struct request_queue *q,
  316 + enum blkio_policy_id plid,
  317 + bool destory_root) { }
323 318  
324 319 static inline void *blkg_to_pdata(struct blkio_group *blkg,
325 320 struct blkio_policy_type *pol) { return NULL; }
block/blk-throttle.c
... ... @@ -157,14 +157,6 @@
157 157 tg->iops[WRITE] = -1;
158 158 }
159 159  
160   -static void throtl_link_blkio_group(struct request_queue *q,
161   - struct blkio_group *blkg)
162   -{
163   - list_add(&blkg->q_node[BLKIO_POLICY_THROTL],
164   - &q->blkg_list[BLKIO_POLICY_THROTL]);
165   - q->nr_blkgs[BLKIO_POLICY_THROTL]++;
166   -}
167   -
168 160 static struct
169 161 throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
170 162 {
... ... @@ -813,89 +805,6 @@
813 805 }
814 806 }
815 807  
816   -static void
817   -throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
818   -{
819   - struct blkio_group *blkg = tg_to_blkg(tg);
820   -
821   - /* Something wrong if we are trying to remove same group twice */
822   - WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL]));
823   -
824   - list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]);
825   -
826   - /*
827   - * Put the reference taken at the time of creation so that when all
828   - * queues are gone, group can be destroyed.
829   - */
830   - blkg_put(tg_to_blkg(tg));
831   - td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--;
832   -}
833   -
834   -static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
835   -{
836   - struct request_queue *q = td->queue;
837   - struct blkio_group *blkg, *n;
838   - bool empty = true;
839   -
840   - list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
841   - q_node[BLKIO_POLICY_THROTL]) {
842   - struct throtl_grp *tg = blkg_to_tg(blkg);
843   -
844   - /* skip root? */
845   - if (!release_root && tg == td->root_tg)
846   - continue;
847   -
848   - /*
849   - * If cgroup removal path got to blk_group first and removed
850   - * it from cgroup list, then it will take care of destroying
851   - * cfqg also.
852   - */
853   - if (!blkiocg_del_blkio_group(blkg))
854   - throtl_destroy_tg(td, tg);
855   - else
856   - empty = false;
857   - }
858   - return empty;
859   -}
860   -
861   -/*
862   - * Blk cgroup controller notification saying that blkio_group object is being
863   - * delinked as associated cgroup object is going away. That also means that
864   - * no new IO will come in this group. So get rid of this group as soon as
865   - * any pending IO in the group is finished.
866   - *
867   - * This function is called under rcu_read_lock(). @q is the rcu protected
868   - * pointer. That means @q is a valid request_queue pointer as long as we
869   - * are rcu read lock.
870   - *
871   - * @q was fetched from blkio_group under blkio_cgroup->lock. That means
872   - * it should not be NULL as even if queue was going away, cgroup deltion
873   - * path got to it first.
874   - */
875   -void throtl_unlink_blkio_group(struct request_queue *q,
876   - struct blkio_group *blkg)
877   -{
878   - unsigned long flags;
879   -
880   - spin_lock_irqsave(q->queue_lock, flags);
881   - throtl_destroy_tg(q->td, blkg_to_tg(blkg));
882   - spin_unlock_irqrestore(q->queue_lock, flags);
883   -}
884   -
885   -static bool throtl_clear_queue(struct request_queue *q)
886   -{
887   - lockdep_assert_held(q->queue_lock);
888   -
889   - /*
890   - * Clear tgs but leave the root one alone. This is necessary
891   - * because root_tg is expected to be persistent and safe because
892   - * blk-throtl can never be disabled while @q is alive. This is a
893   - * kludge to prepare for unified blkg. This whole function will be
894   - * removed soon.
895   - */
896   - return throtl_release_tgs(q->td, false);
897   -}
898   -
899 808 static void throtl_update_blkio_group_common(struct throtl_data *td,
900 809 struct throtl_grp *tg)
901 810 {
... ... @@ -960,9 +869,6 @@
960 869 static struct blkio_policy_type blkio_policy_throtl = {
961 870 .ops = {
962 871 .blkio_init_group_fn = throtl_init_blkio_group,
963   - .blkio_link_group_fn = throtl_link_blkio_group,
964   - .blkio_unlink_group_fn = throtl_unlink_blkio_group,
965   - .blkio_clear_queue_fn = throtl_clear_queue,
966 872 .blkio_update_group_read_bps_fn =
967 873 throtl_update_blkio_group_read_bps,
968 874 .blkio_update_group_write_bps_fn =
969 875  
970 876  
... ... @@ -1148,12 +1054,11 @@
1148 1054  
1149 1055 throtl_shutdown_wq(q);
1150 1056  
1151   - spin_lock_irq(q->queue_lock);
1152   - throtl_release_tgs(td, true);
  1057 + blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);
1153 1058  
1154 1059 /* If there are other groups */
  1060 + spin_lock_irq(q->queue_lock);
1155 1061 wait = q->nr_blkgs[BLKIO_POLICY_THROTL];
1156   -
1157 1062 spin_unlock_irq(q->queue_lock);
1158 1063  
1159 1064 /*
... ... @@ -1045,14 +1045,6 @@
1045 1045 cfqg->needs_update = true;
1046 1046 }
1047 1047  
1048   -static void cfq_link_blkio_group(struct request_queue *q,
1049   - struct blkio_group *blkg)
1050   -{
1051   - list_add(&blkg->q_node[BLKIO_POLICY_PROP],
1052   - &q->blkg_list[BLKIO_POLICY_PROP]);
1053   - q->nr_blkgs[BLKIO_POLICY_PROP]++;
1054   -}
1055   -
1056 1048 static void cfq_init_blkio_group(struct blkio_group *blkg)
1057 1049 {
1058 1050 struct cfq_group *cfqg = blkg_to_cfqg(blkg);
... ... @@ -1096,84 +1088,6 @@
1096 1088 blkg_get(cfqg_to_blkg(cfqg));
1097 1089 }
1098 1090  
1099   -static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1100   -{
1101   - struct blkio_group *blkg = cfqg_to_blkg(cfqg);
1102   -
1103   - /* Something wrong if we are trying to remove same group twice */
1104   - BUG_ON(list_empty(&blkg->q_node[BLKIO_POLICY_PROP]));
1105   -
1106   - list_del_init(&blkg->q_node[BLKIO_POLICY_PROP]);
1107   -
1108   - BUG_ON(cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP] <= 0);
1109   - cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP]--;
1110   -
1111   - /*
1112   - * Put the reference taken at the time of creation so that when all
1113   - * queues are gone, group can be destroyed.
1114   - */
1115   - blkg_put(cfqg_to_blkg(cfqg));
1116   -}
1117   -
1118   -static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
1119   -{
1120   - struct request_queue *q = cfqd->queue;
1121   - struct blkio_group *blkg, *n;
1122   - bool empty = true;
1123   -
1124   - list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_PROP],
1125   - q_node[BLKIO_POLICY_PROP]) {
1126   - /*
1127   - * If cgroup removal path got to blk_group first and removed
1128   - * it from cgroup list, then it will take care of destroying
1129   - * cfqg also.
1130   - */
1131   - if (!cfq_blkiocg_del_blkio_group(blkg))
1132   - cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
1133   - else
1134   - empty = false;
1135   - }
1136   - return empty;
1137   -}
1138   -
1139   -/*
1140   - * Blk cgroup controller notification saying that blkio_group object is being
1141   - * delinked as associated cgroup object is going away. That also means that
1142   - * no new IO will come in this group. So get rid of this group as soon as
1143   - * any pending IO in the group is finished.
1144   - *
1145   - * This function is called under rcu_read_lock(). key is the rcu protected
1146   - * pointer. That means @q is a valid request_queue pointer as long as we
1147   - * are rcu read lock.
1148   - *
1149   - * @q was fetched from blkio_group under blkio_cgroup->lock. That means
1150   - * it should not be NULL as even if elevator was exiting, cgroup deltion
1151   - * path got to it first.
1152   - */
1153   -static void cfq_unlink_blkio_group(struct request_queue *q,
1154   - struct blkio_group *blkg)
1155   -{
1156   - struct cfq_data *cfqd = q->elevator->elevator_data;
1157   - unsigned long flags;
1158   -
1159   - spin_lock_irqsave(q->queue_lock, flags);
1160   - cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
1161   - spin_unlock_irqrestore(q->queue_lock, flags);
1162   -}
1163   -
1164   -static struct elevator_type iosched_cfq;
1165   -
1166   -static bool cfq_clear_queue(struct request_queue *q)
1167   -{
1168   - lockdep_assert_held(q->queue_lock);
1169   -
1170   - /* shoot down blkgs iff the current elevator is cfq */
1171   - if (!q->elevator || q->elevator->type != &iosched_cfq)
1172   - return true;
1173   -
1174   - return cfq_release_cfq_groups(q->elevator->elevator_data);
1175   -}
1176   -
1177 1091 #else /* GROUP_IOSCHED */
1178 1092 static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
1179 1093 struct blkio_cgroup *blkcg)
... ... @@ -1186,8 +1100,6 @@
1186 1100 cfqq->cfqg = cfqg;
1187 1101 }
1188 1102  
1189   -static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
1190   -
1191 1103 #endif /* GROUP_IOSCHED */
1192 1104  
1193 1105 /*
1194 1106  
1195 1107  
1196 1108  
1197 1109  
... ... @@ -3547,17 +3459,20 @@
3547 3459 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
3548 3460  
3549 3461 cfq_put_async_queues(cfqd);
3550   - cfq_release_cfq_groups(cfqd);
3551 3462  
  3463 + spin_unlock_irq(q->queue_lock);
  3464 +
  3465 + blkg_destroy_all(q, BLKIO_POLICY_PROP, true);
  3466 +
3552 3467 #ifdef CONFIG_BLK_CGROUP
3553 3468 /*
3554 3469 * If there are groups which we could not unlink from blkcg list,
3555 3470 * wait for a rcu period for them to be freed.
3556 3471 */
  3472 + spin_lock_irq(q->queue_lock);
3557 3473 wait = q->nr_blkgs[BLKIO_POLICY_PROP];
3558   -#endif
3559 3474 spin_unlock_irq(q->queue_lock);
3560   -
  3475 +#endif
3561 3476 cfq_shutdown_timer_wq(cfqd);
3562 3477  
3563 3478 /*
... ... @@ -3794,9 +3709,6 @@
3794 3709 static struct blkio_policy_type blkio_policy_cfq = {
3795 3710 .ops = {
3796 3711 .blkio_init_group_fn = cfq_init_blkio_group,
3797   - .blkio_link_group_fn = cfq_link_blkio_group,
3798   - .blkio_unlink_group_fn = cfq_unlink_blkio_group,
3799   - .blkio_clear_queue_fn = cfq_clear_queue,
3800 3712 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
3801 3713 },
3802 3714 .plid = BLKIO_POLICY_PROP,
... ... @@ -876,7 +876,7 @@
876 876 {
877 877 struct elevator_queue *old = q->elevator;
878 878 bool registered = old->registered;
879   - int err;
  879 + int i, err;
880 880  
881 881 /*
882 882 * Turn on BYPASS and drain all requests w/ elevator private data.
... ... @@ -895,7 +895,8 @@
895 895 ioc_clear_queue(q);
896 896 spin_unlock_irq(q->queue_lock);
897 897  
898   - blkg_destroy_all(q);
  898 + for (i = 0; i < BLKIO_NR_POLICIES; i++)
  899 + blkg_destroy_all(q, i, false);
899 900  
900 901 /* allocate, init and register new elevator */
901 902 err = -ENOMEM;
include/linux/blkdev.h
... ... @@ -364,8 +364,8 @@
364 364 struct list_head icq_list;
365 365 #ifdef CONFIG_BLK_CGROUP
366 366 /* XXX: array size hardcoded to avoid include dependency (temporary) */
367   - struct list_head blkg_list[2];
368   - int nr_blkgs[2];
  367 + struct list_head blkg_list;
  368 + int nr_blkgs;
369 369 #endif
370 370  
371 371 struct queue_limits limits;