Commit 0871714e08fed7ba66cadad11b2e4f85a9dc9b96

Authored by Jens Axboe
1 parent fadad878cc

cfq-iosched: relax IOPRIO_CLASS_IDLE restrictions

Currently you must be root to set idle io prio class on a process. This
is due to the fact that the idle class is implemented as a true idle
class, meaning that it will not make progress if someone else is
requesting disk access. Unfortunately this means that it opens DOS
opportunities by locking down file system resources, hence it is root
only at the moment.

This patch relaxes the idle class a little, by removing the truly idle
part (which entals a grace period with associated timer). The
modifications make the idle class as close to zero impact as can be done
while still guarenteeing progress. This means we can relax the root only
criteria as well.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 2 changed files with 34 additions and 85 deletions Side-by-side Diff

... ... @@ -26,9 +26,9 @@
26 26 static int cfq_slice_idle = HZ / 125;
27 27  
28 28 /*
29   - * grace period before allowing idle class to get disk access
  29 + * offset from end of service tree
30 30 */
31   -#define CFQ_IDLE_GRACE (HZ / 10)
  31 +#define CFQ_IDLE_DELAY (HZ / 5)
32 32  
33 33 /*
34 34 * below this threshold, we consider thinktime immediate
... ... @@ -98,8 +98,6 @@
98 98 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
99 99 struct cfq_queue *async_idle_cfqq;
100 100  
101   - struct timer_list idle_class_timer;
102   -
103 101 sector_t last_position;
104 102 unsigned long last_end_request;
105 103  
106 104  
... ... @@ -384,12 +382,15 @@
384 382 /*
385 383 * The below is leftmost cache rbtree addon
386 384 */
387   -static struct rb_node *cfq_rb_first(struct cfq_rb_root *root)
  385 +static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
388 386 {
389 387 if (!root->left)
390 388 root->left = rb_first(&root->rb);
391 389  
392   - return root->left;
  390 + if (root->left)
  391 + return rb_entry(root->left, struct cfq_queue, rb_node);
  392 +
  393 + return NULL;
393 394 }
394 395  
395 396 static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
396 397  
... ... @@ -446,12 +447,20 @@
446 447 static void cfq_service_tree_add(struct cfq_data *cfqd,
447 448 struct cfq_queue *cfqq, int add_front)
448 449 {
449   - struct rb_node **p = &cfqd->service_tree.rb.rb_node;
450   - struct rb_node *parent = NULL;
  450 + struct rb_node **p, *parent;
  451 + struct cfq_queue *__cfqq;
451 452 unsigned long rb_key;
452 453 int left;
453 454  
454   - if (!add_front) {
  455 + if (cfq_class_idle(cfqq)) {
  456 + rb_key = CFQ_IDLE_DELAY;
  457 + parent = rb_last(&cfqd->service_tree.rb);
  458 + if (parent && parent != &cfqq->rb_node) {
  459 + __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
  460 + rb_key += __cfqq->rb_key;
  461 + } else
  462 + rb_key += jiffies;
  463 + } else if (!add_front) {
455 464 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
456 465 rb_key += cfqq->slice_resid;
457 466 cfqq->slice_resid = 0;
458 467  
... ... @@ -469,8 +478,9 @@
469 478 }
470 479  
471 480 left = 1;
  481 + parent = NULL;
  482 + p = &cfqd->service_tree.rb.rb_node;
472 483 while (*p) {
473   - struct cfq_queue *__cfqq;
474 484 struct rb_node **n;
475 485  
476 486 parent = *p;
... ... @@ -736,11 +746,6 @@
736 746 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
737 747 {
738 748 if (cfqq) {
739   - /*
740   - * stop potential idle class queues waiting service
741   - */
742   - del_timer(&cfqd->idle_class_timer);
743   -
744 749 cfqq->slice_end = 0;
745 750 cfq_clear_cfqq_must_alloc_slice(cfqq);
746 751 cfq_clear_cfqq_fifo_expire(cfqq);
747 752  
748 753  
... ... @@ -789,47 +794,16 @@
789 794 __cfq_slice_expired(cfqd, cfqq, timed_out);
790 795 }
791 796  
792   -static int start_idle_class_timer(struct cfq_data *cfqd)
793   -{
794   - unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
795   - unsigned long now = jiffies;
796   -
797   - if (time_before(now, end) &&
798   - time_after_eq(now, cfqd->last_end_request)) {
799   - mod_timer(&cfqd->idle_class_timer, end);
800   - return 1;
801   - }
802   -
803   - return 0;
804   -}
805   -
806 797 /*
807 798 * Get next queue for service. Unless we have a queue preemption,
808 799 * we'll simply select the first cfqq in the service tree.
809 800 */
810 801 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
811 802 {
812   - struct cfq_queue *cfqq;
813   - struct rb_node *n;
814   -
815 803 if (RB_EMPTY_ROOT(&cfqd->service_tree.rb))
816 804 return NULL;
817 805  
818   - n = cfq_rb_first(&cfqd->service_tree);
819   - cfqq = rb_entry(n, struct cfq_queue, rb_node);
820   -
821   - if (cfq_class_idle(cfqq)) {
822   - /*
823   - * if we have idle queues and no rt or be queues had
824   - * pending requests, either allow immediate service if
825   - * the grace period has passed or arm the idle grace
826   - * timer
827   - */
828   - if (start_idle_class_timer(cfqd))
829   - cfqq = NULL;
830   - }
831   -
832   - return cfqq;
  806 + return cfq_rb_first(&cfqd->service_tree);
833 807 }
834 808  
835 809 /*
836 810  
837 811  
838 812  
... ... @@ -1087,14 +1061,11 @@
1087 1061 */
1088 1062 static int cfq_forced_dispatch(struct cfq_data *cfqd)
1089 1063 {
  1064 + struct cfq_queue *cfqq;
1090 1065 int dispatched = 0;
1091   - struct rb_node *n;
1092 1066  
1093   - while ((n = cfq_rb_first(&cfqd->service_tree)) != NULL) {
1094   - struct cfq_queue *cfqq = rb_entry(n, struct cfq_queue, rb_node);
1095   -
  1067 + while ((cfqq = cfq_rb_first(&cfqd->service_tree)) != NULL)
1096 1068 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
1097   - }
1098 1069  
1099 1070 cfq_slice_expired(cfqd, 0);
1100 1071  
1101 1072  
... ... @@ -1437,15 +1408,16 @@
1437 1408 atomic_set(&cfqq->ref, 0);
1438 1409 cfqq->cfqd = cfqd;
1439 1410  
1440   - if (is_sync) {
1441   - cfq_mark_cfqq_idle_window(cfqq);
1442   - cfq_mark_cfqq_sync(cfqq);
1443   - }
1444   -
1445 1411 cfq_mark_cfqq_prio_changed(cfqq);
1446 1412 cfq_mark_cfqq_queue_new(cfqq);
1447 1413  
1448 1414 cfq_init_prio_data(cfqq, ioc);
  1415 +
  1416 + if (is_sync) {
  1417 + if (!cfq_class_idle(cfqq))
  1418 + cfq_mark_cfqq_idle_window(cfqq);
  1419 + cfq_mark_cfqq_sync(cfqq);
  1420 + }
1449 1421 }
1450 1422  
1451 1423 if (new_cfqq)
... ... @@ -1697,7 +1669,10 @@
1697 1669 {
1698 1670 int enable_idle;
1699 1671  
1700   - if (!cfq_cfqq_sync(cfqq))
  1672 + /*
  1673 + * Don't idle for async or idle io prio class
  1674 + */
  1675 + if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
1701 1676 return;
1702 1677  
1703 1678 enable_idle = cfq_cfqq_idle_window(cfqq);
... ... @@ -1876,7 +1851,7 @@
1876 1851 cfq_set_prio_slice(cfqd, cfqq);
1877 1852 cfq_clear_cfqq_slice_new(cfqq);
1878 1853 }
1879   - if (cfq_slice_used(cfqq))
  1854 + if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
1880 1855 cfq_slice_expired(cfqd, 1);
1881 1856 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list))
1882 1857 cfq_arm_slice_timer(cfqd);
1883 1858  
... ... @@ -2080,29 +2055,9 @@
2080 2055 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2081 2056 }
2082 2057  
2083   -/*
2084   - * Timer running if an idle class queue is waiting for service
2085   - */
2086   -static void cfq_idle_class_timer(unsigned long data)
2087   -{
2088   - struct cfq_data *cfqd = (struct cfq_data *) data;
2089   - unsigned long flags;
2090   -
2091   - spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2092   -
2093   - /*
2094   - * race with a non-idle queue, reset timer
2095   - */
2096   - if (!start_idle_class_timer(cfqd))
2097   - cfq_schedule_dispatch(cfqd);
2098   -
2099   - spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2100   -}
2101   -
2102 2058 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
2103 2059 {
2104 2060 del_timer_sync(&cfqd->idle_slice_timer);
2105   - del_timer_sync(&cfqd->idle_class_timer);
2106 2061 kblockd_flush_work(&cfqd->unplug_work);
2107 2062 }
2108 2063  
... ... @@ -2166,10 +2121,6 @@
2166 2121 init_timer(&cfqd->idle_slice_timer);
2167 2122 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
2168 2123 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
2169   -
2170   - init_timer(&cfqd->idle_class_timer);
2171   - cfqd->idle_class_timer.function = cfq_idle_class_timer;
2172   - cfqd->idle_class_timer.data = (unsigned long) cfqd;
2173 2124  
2174 2125 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
2175 2126  
... ... @@ -85,8 +85,6 @@
85 85  
86 86 break;
87 87 case IOPRIO_CLASS_IDLE:
88   - if (!capable(CAP_SYS_ADMIN))
89   - return -EPERM;
90 88 break;
91 89 case IOPRIO_CLASS_NONE:
92 90 if (data)