Commit ad4b78bbcbab66998b05d422ac6106b645796e54

Authored by Peter Zijlstra
Committed by Ingo Molnar
1 parent eb24073bc1

sched: Add new wakeup preemption mode: WAKEUP_RUNNING

Create a new wakeup preemption mode, preempt towards tasks that run
shorter on avg. It sets next buddy to be sure we actually run the task
we preempted for.

Test results:

 root@twins:~# while :; do :; done &
 [1] 6537
 root@twins:~# while :; do :; done &
 [2] 6538
 root@twins:~# while :; do :; done &
 [3] 6539
 root@twins:~# while :; do :; done &
 [4] 6540

 root@twins:/home/peter# ./latt -c4 sleep 4
 Entries: 48 (clients=4)

 Averages:
 ------------------------------
        Max          4750 usec
        Avg           497 usec
        Stdev         737 usec

 root@twins:/home/peter# echo WAKEUP_RUNNING > /debug/sched_features

 root@twins:/home/peter# ./latt -c4 sleep 4
 Entries: 48 (clients=4)

 Averages:
 ------------------------------
        Max            14 usec
        Avg             5 usec
        Stdev           3 usec

Disabled by default - needs more testing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <new-submission>

Showing 5 changed files with 29 additions and 10 deletions Side-by-side Diff

include/linux/sched.h
... ... @@ -1113,6 +1113,8 @@
1113 1113 u64 start_runtime;
1114 1114 u64 avg_wakeup;
1115 1115  
  1116 + u64 avg_running;
  1117 +
1116 1118 #ifdef CONFIG_SCHEDSTATS
1117 1119 u64 wait_start;
1118 1120 u64 wait_max;
... ... @@ -2458,6 +2458,7 @@
2458 2458 p->se.avg_overlap = 0;
2459 2459 p->se.start_runtime = 0;
2460 2460 p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
  2461 + p->se.avg_running = 0;
2461 2462  
2462 2463 #ifdef CONFIG_SCHEDSTATS
2463 2464 p->se.wait_start = 0;
2464 2465  
2465 2466  
2466 2467  
... ... @@ -5310,14 +5311,13 @@
5310 5311 #endif
5311 5312 }
5312 5313  
5313   -static void put_prev_task(struct rq *rq, struct task_struct *prev)
  5314 +static void put_prev_task(struct rq *rq, struct task_struct *p)
5314 5315 {
5315   - if (prev->state == TASK_RUNNING) {
5316   - u64 runtime = prev->se.sum_exec_runtime;
  5316 + u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime;
5317 5317  
5318   - runtime -= prev->se.prev_sum_exec_runtime;
5319   - runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
  5318 + update_avg(&p->se.avg_running, runtime);
5320 5319  
  5320 + if (p->state == TASK_RUNNING) {
5321 5321 /*
5322 5322 * In order to avoid avg_overlap growing stale when we are
5323 5323 * indeed overlapping and hence not getting put to sleep, grow
5324 5324  
... ... @@ -5327,9 +5327,12 @@
5327 5327 * correlates to the amount of cache footprint a task can
5328 5328 * build up.
5329 5329 */
5330   - update_avg(&prev->se.avg_overlap, runtime);
  5330 + runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
  5331 + update_avg(&p->se.avg_overlap, runtime);
  5332 + } else {
  5333 + update_avg(&p->se.avg_running, 0);
5331 5334 }
5332   - prev->sched_class->put_prev_task(rq, prev);
  5335 + p->sched_class->put_prev_task(rq, p);
5333 5336 }
5334 5337  
5335 5338 /*
kernel/sched_debug.c
... ... @@ -395,6 +395,7 @@
395 395 PN(se.sum_exec_runtime);
396 396 PN(se.avg_overlap);
397 397 PN(se.avg_wakeup);
  398 + PN(se.avg_running);
398 399  
399 400 nr_switches = p->nvcsw + p->nivcsw;
400 401  
... ... @@ -1605,9 +1605,6 @@
1605 1605 return;
1606 1606 }
1607 1607  
1608   - if (!sched_feat(WAKEUP_PREEMPT))
1609   - return;
1610   -
1611 1608 if ((sched_feat(WAKEUP_SYNC) && sync) ||
1612 1609 (sched_feat(WAKEUP_OVERLAP) &&
1613 1610 (se->avg_overlap < sysctl_sched_migration_cost &&
... ... @@ -1615,6 +1612,17 @@
1615 1612 resched_task(curr);
1616 1613 return;
1617 1614 }
  1615 +
  1616 + if (sched_feat(WAKEUP_RUNNING)) {
  1617 + if (pse->avg_running < se->avg_running) {
  1618 + set_next_buddy(pse);
  1619 + resched_task(curr);
  1620 + return;
  1621 + }
  1622 + }
  1623 +
  1624 + if (!sched_feat(WAKEUP_PREEMPT))
  1625 + return;
1618 1626  
1619 1627 find_matching_se(&se, &pse);
1620 1628  
kernel/sched_features.h
... ... @@ -54,6 +54,11 @@
54 54 SCHED_FEAT(WAKEUP_OVERLAP, 0)
55 55  
56 56 /*
  57 + * Wakeup preemption towards tasks that run short
  58 + */
  59 +SCHED_FEAT(WAKEUP_RUNNING, 0)
  60 +
  61 +/*
57 62 * Use the SYNC wakeup hint, pipes and the likes use this to indicate
58 63 * the remote end is likely to consume the data we just wrote, and
59 64 * therefore has cache benefit from being placed on the same cpu, see