Commit af52a90a14cdaa54ecbfb6e6982abb13466a4b56

Authored by Steven Rostedt
Committed by Ingo Molnar
1 parent f7cce27f56

sched_clock: stop maximum check on NO HZ

Working with ftrace I would get large jumps of 11 millisecs or more with
the clock tracer. This killed the latencing timings of ftrace and also
caused the irqoff self tests to fail.

What was happening is with NO_HZ the idle would stop the jiffy counter and
before the jiffy counter was updated the sched_clock would have a bad
delta jiffies to compare with the gtod with the maximum.

The jiffies would stop and the last sched_tick would record the last gtod.
On wakeup, the sched clock update would compare the gtod + delta jiffies
(which would be zero) and compare it to the TSC. The TSC would have
correctly (with a stable TSC) moved forward several jiffies. But because the
jiffies has not been updated yet the clock would be prevented from moving
forward because it would appear that the TSC jumped too far ahead.

The clock would then virtually stop, until the jiffies are updated. Then
the next sched clock update would see that the clock was very much behind
since the delta jiffies is now correct. This would then jump the clock
forward by several jiffies.

This caused ftrace to report several milliseconds of interrupts off
latency at every resume from NO_HZ idle.

This patch adds hooks into the nohz code to disable the checking of the
maximum clock update when nohz is in effect. It resumes the max check
when nohz has updated the jiffies again.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 3 changed files with 56 additions and 2 deletions Side-by-side Diff

include/linux/sched.h
... ... @@ -1573,13 +1573,28 @@
1573 1573 static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
1574 1574 {
1575 1575 }
1576   -#else
  1576 +
  1577 +#ifdef CONFIG_NO_HZ
  1578 +static inline void sched_clock_tick_stop(int cpu)
  1579 +{
  1580 +}
  1581 +
  1582 +static inline void sched_clock_tick_start(int cpu)
  1583 +{
  1584 +}
  1585 +#endif
  1586 +
  1587 +#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
1577 1588 extern void sched_clock_init(void);
1578 1589 extern u64 sched_clock_cpu(int cpu);
1579 1590 extern void sched_clock_tick(void);
1580 1591 extern void sched_clock_idle_sleep_event(void);
1581 1592 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
  1593 +#ifdef CONFIG_NO_HZ
  1594 +extern void sched_clock_tick_stop(int cpu);
  1595 +extern void sched_clock_tick_start(int cpu);
1582 1596 #endif
  1597 +#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
1583 1598  
1584 1599 /*
1585 1600 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
kernel/sched_clock.c
... ... @@ -45,6 +45,9 @@
45 45 u64 tick_raw;
46 46 u64 tick_gtod;
47 47 u64 clock;
  48 +#ifdef CONFIG_NO_HZ
  49 + int check_max;
  50 +#endif
48 51 };
49 52  
50 53 static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
51 54  
52 55  
... ... @@ -76,12 +79,46 @@
76 79 scd->tick_raw = 0;
77 80 scd->tick_gtod = ktime_now;
78 81 scd->clock = ktime_now;
  82 +#ifdef CONFIG_NO_HZ
  83 + scd->check_max = 1;
  84 +#endif
79 85 }
80 86  
81 87 sched_clock_running = 1;
82 88 }
83 89  
  90 +#ifdef CONFIG_NO_HZ
84 91 /*
  92 + * The dynamic ticks makes the delta jiffies inaccurate. This
  93 + * prevents us from checking the maximum time update.
  94 + * Disable the maximum check during stopped ticks.
  95 + */
  96 +void sched_clock_tick_stop(int cpu)
  97 +{
  98 + struct sched_clock_data *scd = cpu_sdc(cpu);
  99 +
  100 + scd->check_max = 0;
  101 +}
  102 +
  103 +void sched_clock_tick_start(int cpu)
  104 +{
  105 + struct sched_clock_data *scd = cpu_sdc(cpu);
  106 +
  107 + scd->check_max = 1;
  108 +}
  109 +
  110 +static int check_max(struct sched_clock_data *scd)
  111 +{
  112 + return scd->check_max;
  113 +}
  114 +#else
  115 +static int check_max(struct sched_clock_data *scd)
  116 +{
  117 + return 1;
  118 +}
  119 +#endif /* CONFIG_NO_HZ */
  120 +
  121 +/*
85 122 * update the percpu scd from the raw @now value
86 123 *
87 124 * - filter out backward motion
... ... @@ -112,7 +149,7 @@
112 149 */
113 150 max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC;
114 151  
115   - if (unlikely(clock + delta > max_clock)) {
  152 + if (unlikely(clock + delta > max_clock) && check_max(scd)) {
116 153 if (clock < max_clock)
117 154 clock = max_clock;
118 155 else
kernel/time/tick-sched.c
... ... @@ -276,6 +276,7 @@
276 276 ts->tick_stopped = 1;
277 277 ts->idle_jiffies = last_jiffies;
278 278 rcu_enter_nohz();
  279 + sched_clock_tick_stop(cpu);
279 280 }
280 281  
281 282 /*
... ... @@ -375,6 +376,7 @@
375 376 select_nohz_load_balancer(0);
376 377 now = ktime_get();
377 378 tick_do_update_jiffies64(now);
  379 + sched_clock_tick_start(cpu);
378 380 cpu_clear(cpu, nohz_cpu_mask);
379 381  
380 382 /*