Commit 02b35081fc98f681411586d3acf9eaad8b8f6e07

Authored by Vivek Goyal
Committed by Jens Axboe
1 parent b6508c1618

cfq-iosched: Do group share accounting in IOPS when slice_idle=0

o Implement another CFQ mode where we charge group in terms of number
  of requests dispatched instead of measuring the time. Measuring in terms
  of time is not possible when we are driving deeper queue depths and there
  are requests from multiple cfq queues in the request queue.

o This mode currently gets activated if one sets slice_idle=0 and associated
  disk supports NCQ. Again the idea is that on an NCQ disk with idling disabled
  most of the queues will dispatch 1 or more requests and then cfq queue
  expiry happens and we don't have a way to measure time. So start providing
  fairness in terms of IOPS.

o Currently IOPS mode works only with cfq group scheduling. CFQ is following
  different scheduling algorithms for queue and group scheduling. These IOPS
  stats are used only for group scheduling hence in non-croup mode nothing
  should change.

o For CFQ group scheduling one can disable slice idling so that we don't idle
  on queue and drive deeper request queue depths (achieving better throughput),
  at the same time group idle is enabled so one should get service
  differentiation among groups.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

Showing 1 changed file with 24 additions and 6 deletions Side-by-side Diff

... ... @@ -378,6 +378,21 @@
378 378 &cfqg->service_trees[i][j]: NULL) \
379 379  
380 380  
  381 +static inline bool iops_mode(struct cfq_data *cfqd)
  382 +{
  383 + /*
  384 + * If we are not idling on queues and it is a NCQ drive, parallel
  385 + * execution of requests is on and measuring time is not possible
  386 + * in most of the cases until and unless we drive shallower queue
  387 + * depths and that becomes a performance bottleneck. In such cases
  388 + * switch to start providing fairness in terms of number of IOs.
  389 + */
  390 + if (!cfqd->cfq_slice_idle && cfqd->hw_tag)
  391 + return true;
  392 + else
  393 + return false;
  394 +}
  395 +
381 396 static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
382 397 {
383 398 if (cfq_class_idle(cfqq))
... ... @@ -906,7 +921,6 @@
906 921 slice_used = cfqq->allocated_slice;
907 922 }
908 923  
909   - cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used);
910 924 return slice_used;
911 925 }
912 926  
913 927  
914 928  
915 929  
... ... @@ -914,19 +928,21 @@
914 928 struct cfq_queue *cfqq)
915 929 {
916 930 struct cfq_rb_root *st = &cfqd->grp_service_tree;
917   - unsigned int used_sl, charge_sl;
  931 + unsigned int used_sl, charge;
918 932 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
919 933 - cfqg->service_tree_idle.count;
920 934  
921 935 BUG_ON(nr_sync < 0);
922   - used_sl = charge_sl = cfq_cfqq_slice_usage(cfqq);
  936 + used_sl = charge = cfq_cfqq_slice_usage(cfqq);
923 937  
924   - if (!cfq_cfqq_sync(cfqq) && !nr_sync)
925   - charge_sl = cfqq->allocated_slice;
  938 + if (iops_mode(cfqd))
  939 + charge = cfqq->slice_dispatch;
  940 + else if (!cfq_cfqq_sync(cfqq) && !nr_sync)
  941 + charge = cfqq->allocated_slice;
926 942  
927 943 /* Can't update vdisktime while group is on service tree */
928 944 cfq_rb_erase(&cfqg->rb_node, st);
929   - cfqg->vdisktime += cfq_scale_slice(charge_sl, cfqg);
  945 + cfqg->vdisktime += cfq_scale_slice(charge, cfqg);
930 946 __cfq_group_service_tree_add(st, cfqg);
931 947  
932 948 /* This group is being expired. Save the context */
... ... @@ -940,6 +956,8 @@
940 956  
941 957 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
942 958 st->min_vdisktime);
  959 + cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u",
  960 + used_sl, cfqq->slice_dispatch, charge, iops_mode(cfqd));
943 961 cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
944 962 cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
945 963 }