blkcg: let blkcg core manage per-queue blkg list and counter

With the previous patch to move blkg list heads and counters to request_queue and blkg, logic to manage them in both policies are almost identical and can be moved to blkcg core. This patch moves blkg link logic into blkg_lookup_create(), implements common blkg unlink code in blkg_destroy(), and updates blkg_destory_all() so that it's policy specific and can skip root group. The updated blkg_destroy_all() is now used to both clear queue for bypassing and elv switching, and release all blkgs on q exit. This patch introduces a race window where policy [de]registration may race against queue blkg clearing. This can only be a problem on cfq unload and shouldn't be a real problem in practice (and we have many other places where this race already exists). Future patches will remove these unlikely races. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>

blkcg: let blkcg core manage per-queue blkg list and counter
With the previous patch to move blkg list heads and counters to request_queue and blkg, logic to manage them in both policies are almost identical and can be moved to blkcg core. This patch moves blkg link logic into blkg_lookup_create(), implements common blkg unlink code in blkg_destroy(), and updates blkg_destory_all() so that it's policy specific and can skip root group. The updated blkg_destroy_all() is now used to both clear queue for bypassing and elv switching, and release all blkgs on q exit. This patch introduces a race window where policy [de]registration may race against queue blkg clearing. This can only be a problem on cfq unload and shouldn't be a real problem in practice (and we have many other places where this race already exists). Future patches will remove these unlikely races. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Tejun Heo · Jens Axboe
1 parent 4eef304998
Showing 6 changed files with 74 additions and 221 deletions Side-by-side Diff
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-throttle.c
block/cfq-iosched.c
block/elevator.c
include/linux/blkdev.h
@@ -596,8 +596,11 @@
 	/* insert */
 	spin_lock(&blkcg->lock);
 	swap(blkg, new_blkg);
+
 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
-	pol->ops.blkio_link_group_fn(q, blkg);
+	list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
+	q->nr_blkgs[plid]++;
+
 	spin_unlock(&blkcg->lock);
 out:
 	blkg_free(new_blkg);
  
  
  
  
  
  
  
  
@@ -646,36 +649,69 @@
 }
 EXPORT_SYMBOL_GPL(blkg_lookup);
  
-void blkg_destroy_all(struct request_queue *q)
+static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
 {
-	struct blkio_policy_type *pol;
+	struct request_queue *q = blkg->q;
  
+	lockdep_assert_held(q->queue_lock);
+
+	/* Something wrong if we are trying to remove same group twice */
+	WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
+	list_del_init(&blkg->q_node[plid]);
+
+	WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
+	q->nr_blkgs[plid]--;
+
+	/*
+	 * Put the reference taken at the time of creation so that when all
+	 * queues are gone, group can be destroyed.
+	 */
+	blkg_put(blkg);
+}
+
+void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
+		      bool destroy_root)
+{
+	struct blkio_group *blkg, *n;
+
 	while (true) {
 		bool done = true;
  
-		spin_lock(&blkio_list_lock);
 		spin_lock_irq(q->queue_lock);
  
-		/*
-		 * clear_queue_fn() might return with non-empty group list
-		 * if it raced cgroup removal and lost.  cgroup removal is
-		 * guaranteed to make forward progress and retrying after a
-		 * while is enough.  This ugliness is scheduled to be
-		 * removed after locking update.
-		 */
-		list_for_each_entry(pol, &blkio_list, list)
-			if (!pol->ops.blkio_clear_queue_fn(q))
+		list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
+					 q_node[plid]) {
+			/* skip root? */
+			if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
+				continue;
+
+			/*
+			 * If cgroup removal path got to blk_group first
+			 * and removed it from cgroup list, then it will
+			 * take care of destroying cfqg also.
+			 */
+			if (!blkiocg_del_blkio_group(blkg))
+				blkg_destroy(blkg, plid);
+			else
 				done = false;
+		}
  
 		spin_unlock_irq(q->queue_lock);
-		spin_unlock(&blkio_list_lock);
  
+		/*
+		 * Group list may not be empty if we raced cgroup removal
+		 * and lost.  cgroup removal is guaranteed to make forward
+		 * progress and retrying after a while is enough.  This
+		 * ugliness is scheduled to be removed after locking
+		 * update.
+		 */
 		if (done)
 			break;
  
 		msleep(10);	/* just some random duration I like */
 	}
 }
+EXPORT_SYMBOL_GPL(blkg_destroy_all);
  
 static void blkg_rcu_free(struct rcu_head *rcu_head)
 {
  
  
@@ -1549,11 +1585,13 @@
 		 * this event.
 		 */
 		spin_lock(&blkio_list_lock);
+		spin_lock_irqsave(q->queue_lock, flags);
 		list_for_each_entry(blkiop, &blkio_list, list) {
 			if (blkiop->plid != blkg->plid)
 				continue;
-			blkiop->ops.blkio_unlink_group_fn(q, blkg);
+			blkg_destroy(blkg, blkiop->plid);
 		}
+		spin_unlock_irqrestore(q->queue_lock, flags);
 		spin_unlock(&blkio_list_lock);
 	} while (1);
  
  
@@ -1695,12 +1733,14 @@
 	__acquires(&all_q_mutex)
 {
 	struct request_queue *q;
+	int i;
  
 	mutex_lock(&all_q_mutex);
  
 	list_for_each_entry(q, &all_q_list, all_q_node) {
 		blk_queue_bypass_start(q);
-		blkg_destroy_all(q);
+		for (i = 0; i < BLKIO_NR_POLICIES; i++)
+			blkg_destroy_all(q, i, false);
 	}
 }
  
@@ -196,11 +196,6 @@
 };
  
 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
-typedef void (blkio_link_group_fn)(struct request_queue *q,
-			struct blkio_group *blkg);
-typedef void (blkio_unlink_group_fn)(struct request_queue *q,
-			struct blkio_group *blkg);
-typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
 typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int weight);
 typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
@@ -214,9 +209,6 @@
  
 struct blkio_policy_ops {
 	blkio_init_group_fn *blkio_init_group_fn;
-	blkio_link_group_fn *blkio_link_group_fn;
-	blkio_unlink_group_fn *blkio_unlink_group_fn;
-	blkio_clear_queue_fn *blkio_clear_queue_fn;
 	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
 	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
 	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
@@ -238,7 +230,8 @@
 /* Blkio controller policy registration */
 extern void blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
-extern void blkg_destroy_all(struct request_queue *q);
+extern void blkg_destroy_all(struct request_queue *q,
+			     enum blkio_policy_id plid, bool destroy_root);
  
 /**
  * blkg_to_pdata - get policy private data
@@ -319,7 +312,9 @@
 static inline void blkcg_exit_queue(struct request_queue *q) { }
 static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
-static inline void blkg_destroy_all(struct request_queue *q) { }
+static inline void blkg_destroy_all(struct request_queue *q,
+				    enum blkio_policy_id plid,
+				    bool destory_root) { }
  
 static inline void *blkg_to_pdata(struct blkio_group *blkg,
 				struct blkio_policy_type *pol) { return NULL; }
@@ -157,14 +157,6 @@
 	tg->iops[WRITE] = -1;
 }
  
-static void throtl_link_blkio_group(struct request_queue *q,
-				    struct blkio_group *blkg)
-{
-	list_add(&blkg->q_node[BLKIO_POLICY_THROTL],
-		 &q->blkg_list[BLKIO_POLICY_THROTL]);
-	q->nr_blkgs[BLKIO_POLICY_THROTL]++;
-}
-
 static struct
 throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
@@ -813,89 +805,6 @@
 	}
 }
  
-static void
-throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
-{
-	struct blkio_group *blkg = tg_to_blkg(tg);
-
-	/* Something wrong if we are trying to remove same group twice */
-	WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL]));
-
-	list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]);
-
-	/*
-	 * Put the reference taken at the time of creation so that when all
-	 * queues are gone, group can be destroyed.
-	 */
-	blkg_put(tg_to_blkg(tg));
-	td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--;
-}
-
-static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
-{
-	struct request_queue *q = td->queue;
-	struct blkio_group *blkg, *n;
-	bool empty = true;
-
-	list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
-				 q_node[BLKIO_POLICY_THROTL]) {
-		struct throtl_grp *tg = blkg_to_tg(blkg);
-
-		/* skip root? */
-		if (!release_root && tg == td->root_tg)
-			continue;
-
-		/*
-		 * If cgroup removal path got to blk_group first and removed
-		 * it from cgroup list, then it will take care of destroying
-		 * cfqg also.
-		 */
-		if (!blkiocg_del_blkio_group(blkg))
-			throtl_destroy_tg(td, tg);
-		else
-			empty = false;
-	}
-	return empty;
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). @q is the rcu protected
- * pointer. That means @q is a valid request_queue pointer as long as we
- * are rcu read lock.
- *
- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if queue was going away, cgroup deltion
- * path got to it first.
- */
-void throtl_unlink_blkio_group(struct request_queue *q,
-			       struct blkio_group *blkg)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	throtl_destroy_tg(q->td, blkg_to_tg(blkg));
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static bool throtl_clear_queue(struct request_queue *q)
-{
-	lockdep_assert_held(q->queue_lock);
-
-	/*
-	 * Clear tgs but leave the root one alone.  This is necessary
-	 * because root_tg is expected to be persistent and safe because
-	 * blk-throtl can never be disabled while @q is alive.  This is a
-	 * kludge to prepare for unified blkg.  This whole function will be
-	 * removed soon.
-	 */
-	return throtl_release_tgs(q->td, false);
-}
-
 static void throtl_update_blkio_group_common(struct throtl_data *td,
 				struct throtl_grp *tg)
 {
@@ -960,9 +869,6 @@
 static struct blkio_policy_type blkio_policy_throtl = {
 	.ops = {
 		.blkio_init_group_fn = throtl_init_blkio_group,
-		.blkio_link_group_fn = throtl_link_blkio_group,
-		.blkio_unlink_group_fn = throtl_unlink_blkio_group,
-		.blkio_clear_queue_fn = throtl_clear_queue,
 		.blkio_update_group_read_bps_fn =
 					throtl_update_blkio_group_read_bps,
 		.blkio_update_group_write_bps_fn =
  
  
@@ -1148,12 +1054,11 @@
  
 	throtl_shutdown_wq(q);
  
-	spin_lock_irq(q->queue_lock);
-	throtl_release_tgs(td, true);
+	blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);
  
 	/* If there are other groups */
+	spin_lock_irq(q->queue_lock);
 	wait = q->nr_blkgs[BLKIO_POLICY_THROTL];
-
 	spin_unlock_irq(q->queue_lock);
  
 	/*
@@ -1045,14 +1045,6 @@
 	cfqg->needs_update = true;
 }
  
-static void cfq_link_blkio_group(struct request_queue *q,
-				 struct blkio_group *blkg)
-{
-	list_add(&blkg->q_node[BLKIO_POLICY_PROP],
-		 &q->blkg_list[BLKIO_POLICY_PROP]);
-	q->nr_blkgs[BLKIO_POLICY_PROP]++;
-}
-
 static void cfq_init_blkio_group(struct blkio_group *blkg)
 {
 	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
@@ -1096,84 +1088,6 @@
 	blkg_get(cfqg_to_blkg(cfqg));
 }
  
-static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
-	struct blkio_group *blkg = cfqg_to_blkg(cfqg);
-
-	/* Something wrong if we are trying to remove same group twice */
-	BUG_ON(list_empty(&blkg->q_node[BLKIO_POLICY_PROP]));
-
-	list_del_init(&blkg->q_node[BLKIO_POLICY_PROP]);
-
-	BUG_ON(cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP] <= 0);
-	cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP]--;
-
-	/*
-	 * Put the reference taken at the time of creation so that when all
-	 * queues are gone, group can be destroyed.
-	 */
-	blkg_put(cfqg_to_blkg(cfqg));
-}
-
-static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
-{
-	struct request_queue *q = cfqd->queue;
-	struct blkio_group *blkg, *n;
-	bool empty = true;
-
-	list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_PROP],
-				 q_node[BLKIO_POLICY_PROP]) {
-		/*
-		 * If cgroup removal path got to blk_group first and removed
-		 * it from cgroup list, then it will take care of destroying
-		 * cfqg also.
-		 */
-		if (!cfq_blkiocg_del_blkio_group(blkg))
-			cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
-		else
-			empty = false;
-	}
-	return empty;
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). key is the rcu protected
- * pointer. That means @q is a valid request_queue pointer as long as we
- * are rcu read lock.
- *
- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if elevator was exiting, cgroup deltion
- * path got to it first.
- */
-static void cfq_unlink_blkio_group(struct request_queue *q,
-				   struct blkio_group *blkg)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static struct elevator_type iosched_cfq;
-
-static bool cfq_clear_queue(struct request_queue *q)
-{
-	lockdep_assert_held(q->queue_lock);
-
-	/* shoot down blkgs iff the current elevator is cfq */
-	if (!q->elevator || q->elevator->type != &iosched_cfq)
-		return true;
-
-	return cfq_release_cfq_groups(q->elevator->elevator_data);
-}
-
 #else /* GROUP_IOSCHED */
 static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 						struct blkio_cgroup *blkcg)
@@ -1186,8 +1100,6 @@
 	cfqq->cfqg = cfqg;
 }
  
-static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
-
 #endif /* GROUP_IOSCHED */
  
 /*
  
  
  
  
@@ -3547,17 +3459,20 @@
 		__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
  
 	cfq_put_async_queues(cfqd);
-	cfq_release_cfq_groups(cfqd);
  
+	spin_unlock_irq(q->queue_lock);
+
+	blkg_destroy_all(q, BLKIO_POLICY_PROP, true);
+
 #ifdef CONFIG_BLK_CGROUP
 	/*
 	 * If there are groups which we could not unlink from blkcg list,
 	 * wait for a rcu period for them to be freed.
 	 */
+	spin_lock_irq(q->queue_lock);
 	wait = q->nr_blkgs[BLKIO_POLICY_PROP];
-#endif
 	spin_unlock_irq(q->queue_lock);
-
+#endif
 	cfq_shutdown_timer_wq(cfqd);
  
 	/*
@@ -3794,9 +3709,6 @@
 static struct blkio_policy_type blkio_policy_cfq = {
 	.ops = {
 		.blkio_init_group_fn =		cfq_init_blkio_group,
-		.blkio_link_group_fn =		cfq_link_blkio_group,
-		.blkio_unlink_group_fn =	cfq_unlink_blkio_group,
-		.blkio_clear_queue_fn = cfq_clear_queue,
 		.blkio_update_group_weight_fn =	cfq_update_blkio_group_weight,
 	},
 	.plid = BLKIO_POLICY_PROP,
@@ -876,7 +876,7 @@
 {
 	struct elevator_queue *old = q->elevator;
 	bool registered = old->registered;
-	int err;
+	int i, err;
  
 	/*
 	 * Turn on BYPASS and drain all requests w/ elevator private data.
@@ -895,7 +895,8 @@
 	ioc_clear_queue(q);
 	spin_unlock_irq(q->queue_lock);
  
-	blkg_destroy_all(q);
+	for (i = 0; i < BLKIO_NR_POLICIES; i++)
+		blkg_destroy_all(q, i, false);
  
 	/* allocate, init and register new elevator */
 	err = -ENOMEM;
@@ -364,8 +364,8 @@
 	struct list_head	icq_list;
 #ifdef CONFIG_BLK_CGROUP
 	/* XXX: array size hardcoded to avoid include dependency (temporary) */
-	struct list_head	blkg_list[2];
-	int			nr_blkgs[2];
+	struct list_head	blkg_list;
+	int			nr_blkgs;
 #endif
  
 	struct queue_limits	limits;
...	...	@@ -596,8 +596,11 @@
596	596	/* insert */
597	597	spin_lock(&blkcg->lock);
598	598	swap(blkg, new_blkg);
	599	+
599	600	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
600		- pol->ops.blkio_link_group_fn(q, blkg);
	601	+ list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
	602	+ q->nr_blkgs[plid]++;
	603	+
601	604	spin_unlock(&blkcg->lock);
602	605	out:
603	606	blkg_free(new_blkg);
604	607
605	608
606	609
607	610
608	611
609	612
610	613
611	614
...	...	@@ -646,36 +649,69 @@
646	649	}
647	650	EXPORT_SYMBOL_GPL(blkg_lookup);
648	651
649		-void blkg_destroy_all(struct request_queue *q)
	652	+static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
650	653	{
651		- struct blkio_policy_type *pol;
	654	+ struct request_queue *q = blkg->q;
652	655
	656	+ lockdep_assert_held(q->queue_lock);
	657	+
	658	+ /* Something wrong if we are trying to remove same group twice */
	659	+ WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
	660	+ list_del_init(&blkg->q_node[plid]);
	661	+
	662	+ WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
	663	+ q->nr_blkgs[plid]--;
	664	+
	665	+ /*
	666	+ * Put the reference taken at the time of creation so that when all
	667	+ * queues are gone, group can be destroyed.
	668	+ */
	669	+ blkg_put(blkg);
	670	+}
	671	+
	672	+void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
	673	+ bool destroy_root)
	674	+{
	675	+ struct blkio_group blkg, n;
	676	+
653	677	while (true) {
654	678	bool done = true;
655	679
656		- spin_lock(&blkio_list_lock);
657	680	spin_lock_irq(q->queue_lock);
658	681
659		- /*
660		- * clear_queue_fn() might return with non-empty group list
661		- * if it raced cgroup removal and lost. cgroup removal is
662		- * guaranteed to make forward progress and retrying after a
663		- * while is enough. This ugliness is scheduled to be
664		- * removed after locking update.
665		- */
666		- list_for_each_entry(pol, &blkio_list, list)
667		- if (!pol->ops.blkio_clear_queue_fn(q))
	682	+ list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
	683	+ q_node[plid]) {
	684	+ /* skip root? */
	685	+ if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
	686	+ continue;
	687	+
	688	+ /*
	689	+ * If cgroup removal path got to blk_group first
	690	+ * and removed it from cgroup list, then it will
	691	+ * take care of destroying cfqg also.
	692	+ */
	693	+ if (!blkiocg_del_blkio_group(blkg))
	694	+ blkg_destroy(blkg, plid);
	695	+ else
668	696	done = false;
	697	+ }
669	698
670	699	spin_unlock_irq(q->queue_lock);
671		- spin_unlock(&blkio_list_lock);
672	700
	701	+ /*
	702	+ * Group list may not be empty if we raced cgroup removal
	703	+ * and lost. cgroup removal is guaranteed to make forward
	704	+ * progress and retrying after a while is enough. This
	705	+ * ugliness is scheduled to be removed after locking
	706	+ * update.
	707	+ */
673	708	if (done)
674	709	break;
675	710
676	711	msleep(10); /* just some random duration I like */
677	712	}
678	713	}
	714	+EXPORT_SYMBOL_GPL(blkg_destroy_all);
679	715
680	716	static void blkg_rcu_free(struct rcu_head *rcu_head)
681	717	{
682	718
683	719
...	...	@@ -1549,11 +1585,13 @@
1549	1585	* this event.
1550	1586	*/
1551	1587	spin_lock(&blkio_list_lock);
	1588	+ spin_lock_irqsave(q->queue_lock, flags);
1552	1589	list_for_each_entry(blkiop, &blkio_list, list) {
1553	1590	if (blkiop->plid != blkg->plid)
1554	1591	continue;
1555		- blkiop->ops.blkio_unlink_group_fn(q, blkg);
	1592	+ blkg_destroy(blkg, blkiop->plid);
1556	1593	}
	1594	+ spin_unlock_irqrestore(q->queue_lock, flags);
1557	1595	spin_unlock(&blkio_list_lock);
1558	1596	} while (1);
1559	1597
1560	1598
...	...	@@ -1695,12 +1733,14 @@
1695	1733	__acquires(&all_q_mutex)
1696	1734	{
1697	1735	struct request_queue *q;
	1736	+ int i;
1698	1737
1699	1738	mutex_lock(&all_q_mutex);
1700	1739
1701	1740	list_for_each_entry(q, &all_q_list, all_q_node) {
1702	1741	blk_queue_bypass_start(q);
1703		- blkg_destroy_all(q);
	1742	+ for (i = 0; i < BLKIO_NR_POLICIES; i++)
	1743	+ blkg_destroy_all(q, i, false);
1704	1744	}
1705	1745	}
1706	1746
...	...	@@ -196,11 +196,6 @@
196	196	};
197	197
198	198	typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
199		-typedef void (blkio_link_group_fn)(struct request_queue *q,
200		- struct blkio_group *blkg);
201		-typedef void (blkio_unlink_group_fn)(struct request_queue *q,
202		- struct blkio_group *blkg);
203		-typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
204	199	typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
205	200	struct blkio_group *blkg, unsigned int weight);
206	201	typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
...	...	@@ -214,9 +209,6 @@
214	209
215	210	struct blkio_policy_ops {
216	211	blkio_init_group_fn *blkio_init_group_fn;
217		- blkio_link_group_fn *blkio_link_group_fn;
218		- blkio_unlink_group_fn *blkio_unlink_group_fn;
219		- blkio_clear_queue_fn *blkio_clear_queue_fn;
220	212	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
221	213	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
222	214	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
...	...	@@ -238,7 +230,8 @@
238	230	/* Blkio controller policy registration */
239	231	extern void blkio_policy_register(struct blkio_policy_type *);
240	232	extern void blkio_policy_unregister(struct blkio_policy_type *);
241		-extern void blkg_destroy_all(struct request_queue *q);
	233	+extern void blkg_destroy_all(struct request_queue *q,
	234	+ enum blkio_policy_id plid, bool destroy_root);
242	235
243	236	/**
244	237	* blkg_to_pdata - get policy private data
...	...	@@ -319,7 +312,9 @@
319	312	static inline void blkcg_exit_queue(struct request_queue *q) { }
320	313	static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
321	314	static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
322		-static inline void blkg_destroy_all(struct request_queue *q) { }
	315	+static inline void blkg_destroy_all(struct request_queue *q,
	316	+ enum blkio_policy_id plid,
	317	+ bool destory_root) { }
323	318
324	319	static inline void blkg_to_pdata(struct blkio_group blkg,
325	320	struct blkio_policy_type *pol) { return NULL; }
...	...	@@ -157,14 +157,6 @@
157	157	tg->iops[WRITE] = -1;
158	158	}
159	159
160		-static void throtl_link_blkio_group(struct request_queue *q,
161		- struct blkio_group *blkg)
162		-{
163		- list_add(&blkg->q_node[BLKIO_POLICY_THROTL],
164		- &q->blkg_list[BLKIO_POLICY_THROTL]);
165		- q->nr_blkgs[BLKIO_POLICY_THROTL]++;
166		-}
167		-
168	160	static struct
169	161	throtl_grp throtl_lookup_tg(struct throtl_data td, struct blkio_cgroup *blkcg)
170	162	{
...	...	@@ -813,89 +805,6 @@
813	805	}
814	806	}
815	807
816		-static void
817		-throtl_destroy_tg(struct throtl_data td, struct throtl_grp tg)
818		-{
819		- struct blkio_group *blkg = tg_to_blkg(tg);
820		-
821		- /* Something wrong if we are trying to remove same group twice */
822		- WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL]));
823		-
824		- list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]);
825		-
826		- /*
827		- * Put the reference taken at the time of creation so that when all
828		- * queues are gone, group can be destroyed.
829		- */
830		- blkg_put(tg_to_blkg(tg));
831		- td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--;
832		-}
833		-
834		-static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
835		-{
836		- struct request_queue *q = td->queue;
837		- struct blkio_group blkg, n;
838		- bool empty = true;
839		-
840		- list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
841		- q_node[BLKIO_POLICY_THROTL]) {
842		- struct throtl_grp *tg = blkg_to_tg(blkg);
843		-
844		- /* skip root? */
845		- if (!release_root && tg == td->root_tg)
846		- continue;
847		-
848		- /*
849		- * If cgroup removal path got to blk_group first and removed
850		- * it from cgroup list, then it will take care of destroying
851		- * cfqg also.
852		- */
853		- if (!blkiocg_del_blkio_group(blkg))
854		- throtl_destroy_tg(td, tg);
855		- else
856		- empty = false;
857		- }
858		- return empty;
859		-}
860		-
861		-/*
862		- * Blk cgroup controller notification saying that blkio_group object is being
863		- * delinked as associated cgroup object is going away. That also means that
864		- * no new IO will come in this group. So get rid of this group as soon as
865		- * any pending IO in the group is finished.
866		- *
867		- * This function is called under rcu_read_lock(). @q is the rcu protected
868		- * pointer. That means @q is a valid request_queue pointer as long as we
869		- * are rcu read lock.
870		- *
871		- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
872		- * it should not be NULL as even if queue was going away, cgroup deltion
873		- * path got to it first.
874		- */
875		-void throtl_unlink_blkio_group(struct request_queue *q,
876		- struct blkio_group *blkg)
877		-{
878		- unsigned long flags;
879		-
880		- spin_lock_irqsave(q->queue_lock, flags);
881		- throtl_destroy_tg(q->td, blkg_to_tg(blkg));
882		- spin_unlock_irqrestore(q->queue_lock, flags);
883		-}
884		-
885		-static bool throtl_clear_queue(struct request_queue *q)
886		-{
887		- lockdep_assert_held(q->queue_lock);
888		-
889		- /*
890		- * Clear tgs but leave the root one alone. This is necessary
891		- * because root_tg is expected to be persistent and safe because
892		- * blk-throtl can never be disabled while @q is alive. This is a
893		- * kludge to prepare for unified blkg. This whole function will be
894		- * removed soon.
895		- */
896		- return throtl_release_tgs(q->td, false);
897		-}
898		-
899	808	static void throtl_update_blkio_group_common(struct throtl_data *td,
900	809	struct throtl_grp *tg)
901	810	{
...	...	@@ -960,9 +869,6 @@
960	869	static struct blkio_policy_type blkio_policy_throtl = {
961	870	.ops = {
962	871	.blkio_init_group_fn = throtl_init_blkio_group,
963		- .blkio_link_group_fn = throtl_link_blkio_group,
964		- .blkio_unlink_group_fn = throtl_unlink_blkio_group,
965		- .blkio_clear_queue_fn = throtl_clear_queue,
966	872	.blkio_update_group_read_bps_fn =
967	873	throtl_update_blkio_group_read_bps,
968	874	.blkio_update_group_write_bps_fn =
969	875
970	876
...	...	@@ -1148,12 +1054,11 @@
1148	1054
1149	1055	throtl_shutdown_wq(q);
1150	1056
1151		- spin_lock_irq(q->queue_lock);
1152		- throtl_release_tgs(td, true);
	1057	+ blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);
1153	1058
1154	1059	/* If there are other groups */
	1060	+ spin_lock_irq(q->queue_lock);
1155	1061	wait = q->nr_blkgs[BLKIO_POLICY_THROTL];
1156		-
1157	1062	spin_unlock_irq(q->queue_lock);
1158	1063
1159	1064	/*
...	...	@@ -1045,14 +1045,6 @@
1045	1045	cfqg->needs_update = true;
1046	1046	}
1047	1047
1048		-static void cfq_link_blkio_group(struct request_queue *q,
1049		- struct blkio_group *blkg)
1050		-{
1051		- list_add(&blkg->q_node[BLKIO_POLICY_PROP],
1052		- &q->blkg_list[BLKIO_POLICY_PROP]);
1053		- q->nr_blkgs[BLKIO_POLICY_PROP]++;
1054		-}
1055		-
1056	1048	static void cfq_init_blkio_group(struct blkio_group *blkg)
1057	1049	{
1058	1050	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
...	...	@@ -1096,84 +1088,6 @@
1096	1088	blkg_get(cfqg_to_blkg(cfqg));
1097	1089	}
1098	1090
1099		-static void cfq_destroy_cfqg(struct cfq_data cfqd, struct cfq_group cfqg)
1100		-{
1101		- struct blkio_group *blkg = cfqg_to_blkg(cfqg);
1102		-
1103		- /* Something wrong if we are trying to remove same group twice */
1104		- BUG_ON(list_empty(&blkg->q_node[BLKIO_POLICY_PROP]));
1105		-
1106		- list_del_init(&blkg->q_node[BLKIO_POLICY_PROP]);
1107		-
1108		- BUG_ON(cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP] <= 0);
1109		- cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP]--;
1110		-
1111		- /*
1112		- * Put the reference taken at the time of creation so that when all
1113		- * queues are gone, group can be destroyed.
1114		- */
1115		- blkg_put(cfqg_to_blkg(cfqg));
1116		-}
1117		-
1118		-static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
1119		-{
1120		- struct request_queue *q = cfqd->queue;
1121		- struct blkio_group blkg, n;
1122		- bool empty = true;
1123		-
1124		- list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_PROP],
1125		- q_node[BLKIO_POLICY_PROP]) {
1126		- /*
1127		- * If cgroup removal path got to blk_group first and removed
1128		- * it from cgroup list, then it will take care of destroying
1129		- * cfqg also.
1130		- */
1131		- if (!cfq_blkiocg_del_blkio_group(blkg))
1132		- cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
1133		- else
1134		- empty = false;
1135		- }
1136		- return empty;
1137		-}
1138		-
1139		-/*
1140		- * Blk cgroup controller notification saying that blkio_group object is being
1141		- * delinked as associated cgroup object is going away. That also means that
1142		- * no new IO will come in this group. So get rid of this group as soon as
1143		- * any pending IO in the group is finished.
1144		- *
1145		- * This function is called under rcu_read_lock(). key is the rcu protected
1146		- * pointer. That means @q is a valid request_queue pointer as long as we
1147		- * are rcu read lock.
1148		- *
1149		- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
1150		- * it should not be NULL as even if elevator was exiting, cgroup deltion
1151		- * path got to it first.
1152		- */
1153		-static void cfq_unlink_blkio_group(struct request_queue *q,
1154		- struct blkio_group *blkg)
1155		-{
1156		- struct cfq_data *cfqd = q->elevator->elevator_data;
1157		- unsigned long flags;
1158		-
1159		- spin_lock_irqsave(q->queue_lock, flags);
1160		- cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
1161		- spin_unlock_irqrestore(q->queue_lock, flags);
1162		-}
1163		-
1164		-static struct elevator_type iosched_cfq;
1165		-
1166		-static bool cfq_clear_queue(struct request_queue *q)
1167		-{
1168		- lockdep_assert_held(q->queue_lock);
1169		-
1170		- /* shoot down blkgs iff the current elevator is cfq */
1171		- if (!q->elevator \|\| q->elevator->type != &iosched_cfq)
1172		- return true;
1173		-
1174		- return cfq_release_cfq_groups(q->elevator->elevator_data);
1175		-}
1176		-
1177	1091	#else /* GROUP_IOSCHED */
1178	1092	static struct cfq_group cfq_lookup_create_cfqg(struct cfq_data cfqd,
1179	1093	struct blkio_cgroup *blkcg)
...	...	@@ -1186,8 +1100,6 @@
1186	1100	cfqq->cfqg = cfqg;
1187	1101	}
1188	1102
1189		-static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
1190		-
1191	1103	#endif /* GROUP_IOSCHED */
1192	1104
1193	1105	/*
1194	1106
1195	1107
1196	1108
1197	1109
...	...	@@ -3547,17 +3459,20 @@
3547	3459	__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
3548	3460
3549	3461	cfq_put_async_queues(cfqd);
3550		- cfq_release_cfq_groups(cfqd);
3551	3462
	3463	+ spin_unlock_irq(q->queue_lock);
	3464	+
	3465	+ blkg_destroy_all(q, BLKIO_POLICY_PROP, true);
	3466	+
3552	3467	#ifdef CONFIG_BLK_CGROUP
3553	3468	/*
3554	3469	* If there are groups which we could not unlink from blkcg list,
3555	3470	* wait for a rcu period for them to be freed.
3556	3471	*/
	3472	+ spin_lock_irq(q->queue_lock);
3557	3473	wait = q->nr_blkgs[BLKIO_POLICY_PROP];
3558		-#endif
3559	3474	spin_unlock_irq(q->queue_lock);
3560		-
	3475	+#endif
3561	3476	cfq_shutdown_timer_wq(cfqd);
3562	3477
3563	3478	/*
...	...	@@ -3794,9 +3709,6 @@
3794	3709	static struct blkio_policy_type blkio_policy_cfq = {
3795	3710	.ops = {
3796	3711	.blkio_init_group_fn = cfq_init_blkio_group,
3797		- .blkio_link_group_fn = cfq_link_blkio_group,
3798		- .blkio_unlink_group_fn = cfq_unlink_blkio_group,
3799		- .blkio_clear_queue_fn = cfq_clear_queue,
3800	3712	.blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
3801	3713	},
3802	3714	.plid = BLKIO_POLICY_PROP,
...	...	@@ -876,7 +876,7 @@
876	876	{
877	877	struct elevator_queue *old = q->elevator;
878	878	bool registered = old->registered;
879		- int err;
	879	+ int i, err;
880	880
881	881	/*
882	882	* Turn on BYPASS and drain all requests w/ elevator private data.
...	...	@@ -895,7 +895,8 @@
895	895	ioc_clear_queue(q);
896	896	spin_unlock_irq(q->queue_lock);
897	897
898		- blkg_destroy_all(q);
	898	+ for (i = 0; i < BLKIO_NR_POLICIES; i++)
	899	+ blkg_destroy_all(q, i, false);
899	900
900	901	/* allocate, init and register new elevator */
901	902	err = -ENOMEM;
...	...	@@ -364,8 +364,8 @@
364	364	struct list_head icq_list;
365	365	#ifdef CONFIG_BLK_CGROUP
366	366	/* XXX: array size hardcoded to avoid include dependency (temporary) */
367		- struct list_head blkg_list[2];
368		- int nr_blkgs[2];
	367	+ struct list_head blkg_list;
	368	+ int nr_blkgs;
369	369	#endif
370	370
371	371	struct queue_limits limits;