Commit cd1604fab4f95f7cfc227d3955fd7ae14da61f38

Authored by Tejun Heo
Committed by Jens Axboe
1 parent f51b802c17

blkcg: factor out blkio_group creation

Currently both blk-throttle and cfq-iosched implement their own
blkio_group creation code in throtl_get_tg() and cfq_get_cfqg().  This
patch factors out the common code into blkg_lookup_create(), which
returns ERR_PTR value so that transitional failures due to queue
bypass can be distinguished from other failures.

* New plkio_policy_ops methods blkio_alloc_group_fn() and
  blkio_link_group_fn added.  Both are transitional and will be
  removed once the blkg management code is fully moved into
  blk-cgroup.c.

* blkio_alloc_group_fn() allocates policy-specific blkg which is
  usually a larger data structure with blkg as the first entry and
  intiailizes it.  Note that initialization of blkg proper, including
  percpu stats, is responsibility of blk-cgroup proper.

  Note that default config (weight, bps...) initialization is done
  from this method; otherwise, we end up violating locking order
  between blkcg and q locks via blkcg_get_CONF() functions.

* blkio_link_group_fn() is called under queue_lock and responsible for
  linking the blkg to the queue.  blkcg side is handled by blk-cgroup
  proper.

* The common blkg creation function is named blkg_lookup_create() and
  blkiocg_lookup_group() is renamed to blkg_lookup() for consistency.
  Also, throtl / cfq related functions are similarly [re]named for
  consistency.

This simplifies blkcg policy implementations and enables further
cleanup.

-v2: Vivek noticed that blkg_lookup_create() incorrectly tested
     blk_queue_dead() instead of blk_queue_bypass() leading a user of
     the function ending up creating a new blkg on bypassing queue.
     This is a bug introduced while relocating bypass patches before
     this one.  Fixed.

-v3: ERR_PTR patch folded into this one.  @for_root added to
     blkg_lookup_create() to allow creating root group on a bypassed
     queue during elevator switch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

Showing 5 changed files with 193 additions and 248 deletions Side-by-side Diff

... ... @@ -465,38 +465,93 @@
465 465 }
466 466 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
467 467  
468   -/*
469   - * This function allocates the per cpu stats for blkio_group. Should be called
470   - * from sleepable context as alloc_per_cpu() requires that.
471   - */
472   -int blkio_alloc_blkg_stats(struct blkio_group *blkg)
  468 +struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
  469 + struct request_queue *q,
  470 + enum blkio_policy_id plid,
  471 + bool for_root)
  472 + __releases(q->queue_lock) __acquires(q->queue_lock)
473 473 {
474   - /* Allocate memory for per cpu stats */
475   - blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
476   - if (!blkg->stats_cpu)
477   - return -ENOMEM;
478   - return 0;
479   -}
480   -EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);
  474 + struct blkio_policy_type *pol = blkio_policy[plid];
  475 + struct blkio_group *blkg, *new_blkg;
481 476  
482   -void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
483   - struct blkio_group *blkg, struct request_queue *q, dev_t dev,
484   - enum blkio_policy_id plid)
485   -{
486   - unsigned long flags;
  477 + WARN_ON_ONCE(!rcu_read_lock_held());
  478 + lockdep_assert_held(q->queue_lock);
487 479  
488   - spin_lock_irqsave(&blkcg->lock, flags);
489   - spin_lock_init(&blkg->stats_lock);
490   - rcu_assign_pointer(blkg->q, q);
491   - blkg->blkcg_id = css_id(&blkcg->css);
  480 + /*
  481 + * This could be the first entry point of blkcg implementation and
  482 + * we shouldn't allow anything to go through for a bypassing queue.
  483 + * The following can be removed if blkg lookup is guaranteed to
  484 + * fail on a bypassing queue.
  485 + */
  486 + if (unlikely(blk_queue_bypass(q)) && !for_root)
  487 + return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
  488 +
  489 + blkg = blkg_lookup(blkcg, q, plid);
  490 + if (blkg)
  491 + return blkg;
  492 +
  493 + if (!css_tryget(&blkcg->css))
  494 + return ERR_PTR(-EINVAL);
  495 +
  496 + /*
  497 + * Allocate and initialize.
  498 + *
  499 + * FIXME: The following is broken. Percpu memory allocation
  500 + * requires %GFP_KERNEL context and can't be performed from IO
  501 + * path. Allocation here should inherently be atomic and the
  502 + * following lock dancing can be removed once the broken percpu
  503 + * allocation is fixed.
  504 + */
  505 + spin_unlock_irq(q->queue_lock);
  506 + rcu_read_unlock();
  507 +
  508 + new_blkg = pol->ops.blkio_alloc_group_fn(q, blkcg);
  509 + if (new_blkg) {
  510 + new_blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
  511 +
  512 + spin_lock_init(&new_blkg->stats_lock);
  513 + rcu_assign_pointer(new_blkg->q, q);
  514 + new_blkg->blkcg_id = css_id(&blkcg->css);
  515 + new_blkg->plid = plid;
  516 + cgroup_path(blkcg->css.cgroup, new_blkg->path,
  517 + sizeof(new_blkg->path));
  518 + }
  519 +
  520 + rcu_read_lock();
  521 + spin_lock_irq(q->queue_lock);
  522 + css_put(&blkcg->css);
  523 +
  524 + /* did bypass get turned on inbetween? */
  525 + if (unlikely(blk_queue_bypass(q)) && !for_root) {
  526 + blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
  527 + goto out;
  528 + }
  529 +
  530 + /* did someone beat us to it? */
  531 + blkg = blkg_lookup(blkcg, q, plid);
  532 + if (unlikely(blkg))
  533 + goto out;
  534 +
  535 + /* did alloc fail? */
  536 + if (unlikely(!new_blkg || !new_blkg->stats_cpu)) {
  537 + blkg = ERR_PTR(-ENOMEM);
  538 + goto out;
  539 + }
  540 +
  541 + /* insert */
  542 + spin_lock(&blkcg->lock);
  543 + swap(blkg, new_blkg);
492 544 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
493   - blkg->plid = plid;
494   - spin_unlock_irqrestore(&blkcg->lock, flags);
495   - /* Need to take css reference ? */
496   - cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
497   - blkg->dev = dev;
  545 + pol->ops.blkio_link_group_fn(q, blkg);
  546 + spin_unlock(&blkcg->lock);
  547 +out:
  548 + if (new_blkg) {
  549 + free_percpu(new_blkg->stats_cpu);
  550 + kfree(new_blkg);
  551 + }
  552 + return blkg;
498 553 }
499   -EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group);
  554 +EXPORT_SYMBOL_GPL(blkg_lookup_create);
500 555  
501 556 static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
502 557 {
... ... @@ -533,9 +588,9 @@
533 588 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
534 589  
535 590 /* called under rcu_read_lock(). */
536   -struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
537   - struct request_queue *q,
538   - enum blkio_policy_id plid)
  591 +struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  592 + struct request_queue *q,
  593 + enum blkio_policy_id plid)
539 594 {
540 595 struct blkio_group *blkg;
541 596 struct hlist_node *n;
... ... @@ -545,7 +600,7 @@
545 600 return blkg;
546 601 return NULL;
547 602 }
548   -EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
  603 +EXPORT_SYMBOL_GPL(blkg_lookup);
549 604  
550 605 void blkg_destroy_all(struct request_queue *q)
551 606 {
... ... @@ -204,6 +204,10 @@
204 204 extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
205 205 dev_t dev);
206 206  
  207 +typedef struct blkio_group *(blkio_alloc_group_fn)(struct request_queue *q,
  208 + struct blkio_cgroup *blkcg);
  209 +typedef void (blkio_link_group_fn)(struct request_queue *q,
  210 + struct blkio_group *blkg);
207 211 typedef void (blkio_unlink_group_fn)(struct request_queue *q,
208 212 struct blkio_group *blkg);
209 213 typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
... ... @@ -219,6 +223,8 @@
219 223 struct blkio_group *blkg, unsigned int write_iops);
220 224  
221 225 struct blkio_policy_ops {
  226 + blkio_alloc_group_fn *blkio_alloc_group_fn;
  227 + blkio_link_group_fn *blkio_link_group_fn;
222 228 blkio_unlink_group_fn *blkio_unlink_group_fn;
223 229 blkio_clear_queue_fn *blkio_clear_queue_fn;
224 230 blkio_update_group_weight_fn *blkio_update_group_weight_fn;
225 231  
... ... @@ -307,14 +313,14 @@
307 313 extern struct blkio_cgroup blkio_root_cgroup;
308 314 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
309 315 extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
310   -extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
311   - struct blkio_group *blkg, struct request_queue *q, dev_t dev,
312   - enum blkio_policy_id plid);
313   -extern int blkio_alloc_blkg_stats(struct blkio_group *blkg);
314 316 extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
315   -extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
316   - struct request_queue *q,
317   - enum blkio_policy_id plid);
  317 +extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  318 + struct request_queue *q,
  319 + enum blkio_policy_id plid);
  320 +struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
  321 + struct request_queue *q,
  322 + enum blkio_policy_id plid,
  323 + bool for_root);
318 324 void blkiocg_update_timeslice_used(struct blkio_group *blkg,
319 325 unsigned long time,
320 326 unsigned long unaccounted_time);
321 327  
... ... @@ -335,17 +341,11 @@
335 341 static inline struct blkio_cgroup *
336 342 task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
337 343  
338   -static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
339   - struct blkio_group *blkg, void *key, dev_t dev,
340   - enum blkio_policy_id plid) {}
341   -
342   -static inline int blkio_alloc_blkg_stats(struct blkio_group *blkg) { return 0; }
343   -
344 344 static inline int
345 345 blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
346 346  
347   -static inline struct blkio_group *
348   -blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
  347 +static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  348 + void *key) { return NULL; }
349 349 static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
350 350 unsigned long time,
351 351 unsigned long unaccounted_time)
block/blk-throttle.c
... ... @@ -181,17 +181,25 @@
181 181 call_rcu(&tg->rcu_head, throtl_free_tg);
182 182 }
183 183  
184   -static void throtl_init_group(struct throtl_grp *tg)
  184 +static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
  185 + struct blkio_cgroup *blkcg)
185 186 {
  187 + struct throtl_grp *tg;
  188 +
  189 + tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node);
  190 + if (!tg)
  191 + return NULL;
  192 +
186 193 INIT_HLIST_NODE(&tg->tg_node);
187 194 RB_CLEAR_NODE(&tg->rb_node);
188 195 bio_list_init(&tg->bio_lists[0]);
189 196 bio_list_init(&tg->bio_lists[1]);
190 197 tg->limits_changed = false;
191 198  
192   - /* Practically unlimited BW */
193   - tg->bps[0] = tg->bps[1] = -1;
194   - tg->iops[0] = tg->iops[1] = -1;
  199 + tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
  200 + tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
  201 + tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
  202 + tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
195 203  
196 204 /*
197 205 * Take the initial reference that will be released on destroy
198 206  
... ... @@ -200,14 +208,8 @@
200 208 * exit or cgroup deletion path depending on who is exiting first.
201 209 */
202 210 atomic_set(&tg->ref, 1);
203   -}
204 211  
205   -/* Should be called with rcu read lock held (needed for blkcg) */
206   -static void
207   -throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
208   -{
209   - hlist_add_head(&tg->tg_node, &td->tg_list);
210   - td->nr_undestroyed_grps++;
  212 + return &tg->blkg;
211 213 }
212 214  
213 215 static void
214 216  
215 217  
216 218  
217 219  
218 220  
219 221  
220 222  
221 223  
222 224  
223 225  
224 226  
225 227  
226 228  
227 229  
228 230  
... ... @@ -246,119 +248,62 @@
246 248 spin_unlock_irq(td->queue->queue_lock);
247 249 }
248 250  
249   -static void throtl_init_add_tg_lists(struct throtl_data *td,
250   - struct throtl_grp *tg, struct blkio_cgroup *blkcg)
  251 +static void throtl_link_blkio_group(struct request_queue *q,
  252 + struct blkio_group *blkg)
251 253 {
  254 + struct throtl_data *td = q->td;
  255 + struct throtl_grp *tg = tg_of_blkg(blkg);
  256 +
252 257 __throtl_tg_fill_dev_details(td, tg);
253 258  
254   - /* Add group onto cgroup list */
255   - blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue,
256   - tg->blkg.dev, BLKIO_POLICY_THROTL);
257   -
258   - tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
259   - tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
260   - tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
261   - tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
262   -
263   - throtl_add_group_to_td_list(td, tg);
  259 + hlist_add_head(&tg->tg_node, &td->tg_list);
  260 + td->nr_undestroyed_grps++;
264 261 }
265 262  
266   -/* Should be called without queue lock and outside of rcu period */
267   -static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
268   -{
269   - struct throtl_grp *tg = NULL;
270   - int ret;
271   -
272   - tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
273   - if (!tg)
274   - return NULL;
275   -
276   - ret = blkio_alloc_blkg_stats(&tg->blkg);
277   -
278   - if (ret) {
279   - kfree(tg);
280   - return NULL;
281   - }
282   -
283   - throtl_init_group(tg);
284   - return tg;
285   -}
286   -
287 263 static struct
288   -throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
  264 +throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
289 265 {
290 266 struct throtl_grp *tg = NULL;
291 267  
292 268 /*
293 269 * This is the common case when there are no blkio cgroups.
294   - * Avoid lookup in this case
295   - */
  270 + * Avoid lookup in this case
  271 + */
296 272 if (blkcg == &blkio_root_cgroup)
297 273 tg = td->root_tg;
298 274 else
299   - tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue,
300   - BLKIO_POLICY_THROTL));
  275 + tg = tg_of_blkg(blkg_lookup(blkcg, td->queue,
  276 + BLKIO_POLICY_THROTL));
301 277  
302 278 __throtl_tg_fill_dev_details(td, tg);
303 279 return tg;
304 280 }
305 281  
306   -static struct throtl_grp *throtl_get_tg(struct throtl_data *td,
307   - struct blkio_cgroup *blkcg)
  282 +static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
  283 + struct blkio_cgroup *blkcg)
308 284 {
309   - struct throtl_grp *tg = NULL, *__tg = NULL;
310 285 struct request_queue *q = td->queue;
  286 + struct throtl_grp *tg = NULL;
311 287  
312   - /* no throttling for dead queue */
313   - if (unlikely(blk_queue_bypass(q)))
314   - return NULL;
315   -
316   - tg = throtl_find_tg(td, blkcg);
317   - if (tg)
318   - return tg;
319   -
320   - if (!css_tryget(&blkcg->css))
321   - return NULL;
322   -
323 288 /*
324   - * Need to allocate a group. Allocation of group also needs allocation
325   - * of per cpu stats which in-turn takes a mutex() and can block. Hence
326   - * we need to drop rcu lock and queue_lock before we call alloc.
  289 + * This is the common case when there are no blkio cgroups.
  290 + * Avoid lookup in this case
327 291 */
328   - spin_unlock_irq(q->queue_lock);
329   - rcu_read_unlock();
  292 + if (blkcg == &blkio_root_cgroup) {
  293 + tg = td->root_tg;
  294 + } else {
  295 + struct blkio_group *blkg;
330 296  
331   - tg = throtl_alloc_tg(td);
  297 + blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false);
332 298  
333   - /* Group allocated and queue is still alive. take the lock */
334   - rcu_read_lock();
335   - spin_lock_irq(q->queue_lock);
336   - css_put(&blkcg->css);
337   -
338   - /* Make sure @q is still alive */
339   - if (unlikely(blk_queue_bypass(q))) {
340   - kfree(tg);
341   - return NULL;
  299 + /* if %NULL and @q is alive, fall back to root_tg */
  300 + if (!IS_ERR(blkg))
  301 + tg = tg_of_blkg(blkg);
  302 + else if (!blk_queue_dead(q))
  303 + tg = td->root_tg;
342 304 }
343 305  
344   - /*
345   - * If some other thread already allocated the group while we were
346   - * not holding queue lock, free up the group
347   - */
348   - __tg = throtl_find_tg(td, blkcg);
349   -
350   - if (__tg) {
351   - kfree(tg);
352   - return __tg;
353   - }
354   -
355   - /* Group allocation failed. Account the IO to root group */
356   - if (!tg) {
357   - tg = td->root_tg;
358   - return tg;
359   - }
360   -
361   - throtl_init_add_tg_lists(td, tg, blkcg);
  306 + __throtl_tg_fill_dev_details(td, tg);
362 307 return tg;
363 308 }
364 309  
... ... @@ -1107,6 +1052,8 @@
1107 1052  
1108 1053 static struct blkio_policy_type blkio_policy_throtl = {
1109 1054 .ops = {
  1055 + .blkio_alloc_group_fn = throtl_alloc_blkio_group,
  1056 + .blkio_link_group_fn = throtl_link_blkio_group,
1110 1057 .blkio_unlink_group_fn = throtl_unlink_blkio_group,
1111 1058 .blkio_clear_queue_fn = throtl_clear_queue,
1112 1059 .blkio_update_group_read_bps_fn =
... ... @@ -1141,7 +1088,7 @@
1141 1088 */
1142 1089 rcu_read_lock();
1143 1090 blkcg = task_blkio_cgroup(current);
1144   - tg = throtl_find_tg(td, blkcg);
  1091 + tg = throtl_lookup_tg(td, blkcg);
1145 1092 if (tg) {
1146 1093 throtl_tg_fill_dev_details(td, tg);
1147 1094  
... ... @@ -1157,7 +1104,7 @@
1157 1104 * IO group
1158 1105 */
1159 1106 spin_lock_irq(q->queue_lock);
1160   - tg = throtl_get_tg(td, blkcg);
  1107 + tg = throtl_lookup_create_tg(td, blkcg);
1161 1108 if (unlikely(!tg))
1162 1109 goto out_unlock;
1163 1110  
... ... @@ -1252,6 +1199,7 @@
1252 1199 int blk_throtl_init(struct request_queue *q)
1253 1200 {
1254 1201 struct throtl_data *td;
  1202 + struct blkio_group *blkg;
1255 1203  
1256 1204 td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
1257 1205 if (!td)
1258 1206  
1259 1207  
... ... @@ -1262,13 +1210,17 @@
1262 1210 td->limits_changed = false;
1263 1211 INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
1264 1212  
1265   - /* alloc and Init root group. */
  1213 + q->td = td;
1266 1214 td->queue = q;
1267 1215  
  1216 + /* alloc and init root group. */
1268 1217 rcu_read_lock();
1269 1218 spin_lock_irq(q->queue_lock);
1270 1219  
1271   - td->root_tg = throtl_get_tg(td, &blkio_root_cgroup);
  1220 + blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL,
  1221 + true);
  1222 + if (!IS_ERR(blkg))
  1223 + td->root_tg = tg_of_blkg(blkg);
1272 1224  
1273 1225 spin_unlock_irq(q->queue_lock);
1274 1226 rcu_read_unlock();
... ... @@ -1277,9 +1229,6 @@
1277 1229 kfree(td);
1278 1230 return -ENOMEM;
1279 1231 }
1280   -
1281   - /* Attach throtl data to request queue */
1282   - q->td = td;
1283 1232 return 0;
1284 1233 }
1285 1234  
... ... @@ -1048,10 +1048,12 @@
1048 1048 cfqg->needs_update = true;
1049 1049 }
1050 1050  
1051   -static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
1052   - struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
  1051 +static void cfq_link_blkio_group(struct request_queue *q,
  1052 + struct blkio_group *blkg)
1053 1053 {
1054   - struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
  1054 + struct cfq_data *cfqd = q->elevator->elevator_data;
  1055 + struct backing_dev_info *bdi = &q->backing_dev_info;
  1056 + struct cfq_group *cfqg = cfqg_of_blkg(blkg);
1055 1057 unsigned int major, minor;
1056 1058  
1057 1059 /*
1058 1060  
1059 1061  
1060 1062  
1061 1063  
1062 1064  
... ... @@ -1062,34 +1064,26 @@
1062 1064 */
1063 1065 if (bdi->dev) {
1064 1066 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1065   - cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
1066   - cfqd->queue, MKDEV(major, minor));
1067   - } else
1068   - cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
1069   - cfqd->queue, 0);
  1067 + blkg->dev = MKDEV(major, minor);
  1068 + }
1070 1069  
1071 1070 cfqd->nr_blkcg_linked_grps++;
1072   - cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
1073 1071  
1074 1072 /* Add group on cfqd list */
1075 1073 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
1076 1074 }
1077 1075  
1078   -/*
1079   - * Should be called from sleepable context. No request queue lock as per
1080   - * cpu stats are allocated dynamically and alloc_percpu needs to be called
1081   - * from sleepable context.
1082   - */
1083   -static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
  1076 +static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
  1077 + struct blkio_cgroup *blkcg)
1084 1078 {
1085 1079 struct cfq_group *cfqg;
1086   - int ret;
1087 1080  
1088   - cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
  1081 + cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, q->node);
1089 1082 if (!cfqg)
1090 1083 return NULL;
1091 1084  
1092 1085 cfq_init_cfqg_base(cfqg);
  1086 + cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
1093 1087  
1094 1088 /*
1095 1089 * Take the initial reference that will be released on destroy
1096 1090  
1097 1091  
1098 1092  
1099 1093  
1100 1094  
1101 1095  
1102 1096  
1103 1097  
1104 1098  
... ... @@ -1099,90 +1093,38 @@
1099 1093 */
1100 1094 cfqg->ref = 1;
1101 1095  
1102   - ret = blkio_alloc_blkg_stats(&cfqg->blkg);
1103   - if (ret) {
1104   - kfree(cfqg);
1105   - return NULL;
1106   - }
1107   -
1108   - return cfqg;
  1096 + return &cfqg->blkg;
1109 1097 }
1110 1098  
1111   -static struct cfq_group *
1112   -cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
1113   -{
1114   - struct cfq_group *cfqg = NULL;
1115   - struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
1116   - unsigned int major, minor;
1117   -
1118   - /*
1119   - * This is the common case when there are no blkio cgroups.
1120   - * Avoid lookup in this case
1121   - */
1122   - if (blkcg == &blkio_root_cgroup)
1123   - cfqg = cfqd->root_group;
1124   - else
1125   - cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, cfqd->queue,
1126   - BLKIO_POLICY_PROP));
1127   -
1128   - if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
1129   - sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1130   - cfqg->blkg.dev = MKDEV(major, minor);
1131   - }
1132   -
1133   - return cfqg;
1134   -}
1135   -
1136 1099 /*
1137 1100 * Search for the cfq group current task belongs to. request_queue lock must
1138 1101 * be held.
1139 1102 */
1140   -static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd,
1141   - struct blkio_cgroup *blkcg)
  1103 +static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
  1104 + struct blkio_cgroup *blkcg)
1142 1105 {
1143   - struct cfq_group *cfqg = NULL, *__cfqg = NULL;
1144 1106 struct request_queue *q = cfqd->queue;
  1107 + struct backing_dev_info *bdi = &q->backing_dev_info;
  1108 + struct cfq_group *cfqg = NULL;
1145 1109  
1146   - cfqg = cfq_find_cfqg(cfqd, blkcg);
1147   - if (cfqg)
1148   - return cfqg;
  1110 + /* avoid lookup for the common case where there's no blkio cgroup */
  1111 + if (blkcg == &blkio_root_cgroup) {
  1112 + cfqg = cfqd->root_group;
  1113 + } else {
  1114 + struct blkio_group *blkg;
1149 1115  
1150   - if (!css_tryget(&blkcg->css))
1151   - return NULL;
  1116 + blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false);
  1117 + if (!IS_ERR(blkg))
  1118 + cfqg = cfqg_of_blkg(blkg);
  1119 + }
1152 1120  
1153   - /*
1154   - * Need to allocate a group. Allocation of group also needs allocation
1155   - * of per cpu stats which in-turn takes a mutex() and can block. Hence
1156   - * we need to drop rcu lock and queue_lock before we call alloc.
1157   - *
1158   - * Not taking any queue reference here and assuming that queue is
1159   - * around by the time we return. CFQ queue allocation code does
1160   - * the same. It might be racy though.
1161   - */
1162   - rcu_read_unlock();
1163   - spin_unlock_irq(q->queue_lock);
  1121 + if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
  1122 + unsigned int major, minor;
1164 1123  
1165   - cfqg = cfq_alloc_cfqg(cfqd);
1166   -
1167   - spin_lock_irq(q->queue_lock);
1168   - rcu_read_lock();
1169   - css_put(&blkcg->css);
1170   -
1171   - /*
1172   - * If some other thread already allocated the group while we were
1173   - * not holding queue lock, free up the group
1174   - */
1175   - __cfqg = cfq_find_cfqg(cfqd, blkcg);
1176   -
1177   - if (__cfqg) {
1178   - kfree(cfqg);
1179   - return __cfqg;
  1124 + sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
  1125 + cfqg->blkg.dev = MKDEV(major, minor);
1180 1126 }
1181 1127  
1182   - if (!cfqg)
1183   - cfqg = cfqd->root_group;
1184   -
1185   - cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
1186 1128 return cfqg;
1187 1129 }
1188 1130  
... ... @@ -1294,8 +1236,8 @@
1294 1236 }
1295 1237  
1296 1238 #else /* GROUP_IOSCHED */
1297   -static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd,
1298   - struct blkio_cgroup *blkcg)
  1239 +static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
  1240 + struct blkio_cgroup *blkcg)
1299 1241 {
1300 1242 return cfqd->root_group;
1301 1243 }
... ... @@ -2887,7 +2829,8 @@
2887 2829  
2888 2830 blkcg = task_blkio_cgroup(current);
2889 2831  
2890   - cfqg = cfq_get_cfqg(cfqd, blkcg);
  2832 + cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
  2833 +
2891 2834 cic = cfq_cic_lookup(cfqd, ioc);
2892 2835 /* cic always exists here */
2893 2836 cfqq = cic_to_cfqq(cic, is_sync);
... ... @@ -3694,6 +3637,7 @@
3694 3637 static int cfq_init_queue(struct request_queue *q)
3695 3638 {
3696 3639 struct cfq_data *cfqd;
  3640 + struct blkio_group *blkg __maybe_unused;
3697 3641 int i;
3698 3642  
3699 3643 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
... ... @@ -3711,7 +3655,10 @@
3711 3655 rcu_read_lock();
3712 3656 spin_lock_irq(q->queue_lock);
3713 3657  
3714   - cfqd->root_group = cfq_get_cfqg(cfqd, &blkio_root_cgroup);
  3658 + blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP,
  3659 + true);
  3660 + if (!IS_ERR(blkg))
  3661 + cfqd->root_group = cfqg_of_blkg(blkg);
3715 3662  
3716 3663 spin_unlock_irq(q->queue_lock);
3717 3664 rcu_read_unlock();
... ... @@ -3897,6 +3844,8 @@
3897 3844 #ifdef CONFIG_CFQ_GROUP_IOSCHED
3898 3845 static struct blkio_policy_type blkio_policy_cfq = {
3899 3846 .ops = {
  3847 + .blkio_alloc_group_fn = cfq_alloc_blkio_group,
  3848 + .blkio_link_group_fn = cfq_link_blkio_group,
3900 3849 .blkio_unlink_group_fn = cfq_unlink_blkio_group,
3901 3850 .blkio_clear_queue_fn = cfq_clear_queue,
3902 3851 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
... ... @@ -67,12 +67,6 @@
67 67 direction, sync);
68 68 }
69 69  
70   -static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
71   - struct blkio_group *blkg, struct request_queue *q, dev_t dev)
72   -{
73   - blkiocg_add_blkio_group(blkcg, blkg, q, dev, BLKIO_POLICY_PROP);
74   -}
75   -
76 70 static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
77 71 {
78 72 return blkiocg_del_blkio_group(blkg);
... ... @@ -105,8 +99,6 @@
105 99 uint64_t bytes, bool direction, bool sync) {}
106 100 static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {}
107 101  
108   -static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
109   - struct blkio_group *blkg, struct request_queue *q, dev_t dev) {}
110 102 static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
111 103 {
112 104 return 0;