Commit a382bf934449ddeb625167537ae81daa0211b477

Authored by Frederic Weisbecker
1 parent a831881be2

nohz: Assign timekeeping duty to a CPU outside the full dynticks range

This way the full nohz CPUs can safely run with the tick
stopped with a guarantee that somebody else is taking
care of the jiffies and GTOD progression.

Once the duty is attributed to a CPU, it won't change. Also that
CPU can't enter into dyntick idle mode or be hot unplugged.

This may later be improved from a power consumption POV. At
least we should be able to share the duty amongst all CPUs
outside the full dynticks range. Then the duty could even be
shared with full dynticks CPUs when those can't stop their
tick for any reason.

But let's start with that very simple approach first.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
[fix have_nohz_full_mask offcase]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

Showing 3 changed files with 51 additions and 4 deletions Side-by-side Diff

kernel/time/tick-broadcast.c
... ... @@ -573,7 +573,8 @@
573 573 bc->event_handler = tick_handle_oneshot_broadcast;
574 574  
575 575 /* Take the do_timer update */
576   - tick_do_timer_cpu = cpu;
  576 + if (!tick_nohz_extended_cpu(cpu))
  577 + tick_do_timer_cpu = cpu;
577 578  
578 579 /*
579 580 * We must be careful here. There might be other CPUs
kernel/time/tick-common.c
... ... @@ -163,7 +163,10 @@
163 163 * this cpu:
164 164 */
165 165 if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
166   - tick_do_timer_cpu = cpu;
  166 + if (!tick_nohz_extended_cpu(cpu))
  167 + tick_do_timer_cpu = cpu;
  168 + else
  169 + tick_do_timer_cpu = TICK_DO_TIMER_NONE;
167 170 tick_next_period = ktime_get();
168 171 tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
169 172 }
kernel/time/tick-sched.c
... ... @@ -112,7 +112,8 @@
112 112 * this duty, then the jiffies update is still serialized by
113 113 * jiffies_lock.
114 114 */
115   - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
  115 + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
  116 + && !tick_nohz_extended_cpu(cpu))
116 117 tick_do_timer_cpu = cpu;
117 118 #endif
118 119  
... ... @@ -166,6 +167,25 @@
166 167 }
167 168 __setup("nohz_extended=", tick_nohz_extended_setup);
168 169  
  170 +static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
  171 + unsigned long action,
  172 + void *hcpu)
  173 +{
  174 + unsigned int cpu = (unsigned long)hcpu;
  175 +
  176 + switch (action & ~CPU_TASKS_FROZEN) {
  177 + case CPU_DOWN_PREPARE:
  178 + /*
  179 + * If we handle the timekeeping duty for full dynticks CPUs,
  180 + * we can't safely shutdown that CPU.
  181 + */
  182 + if (have_nohz_extended_mask && tick_do_timer_cpu == cpu)
  183 + return -EINVAL;
  184 + break;
  185 + }
  186 + return NOTIFY_OK;
  187 +}
  188 +
169 189 static int __init init_tick_nohz_extended(void)
170 190 {
171 191 cpumask_var_t online_nohz;
... ... @@ -174,6 +194,8 @@
174 194 if (!have_nohz_extended_mask)
175 195 return 0;
176 196  
  197 + cpu_notifier(tick_nohz_cpu_down_callback, 0);
  198 +
177 199 if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) {
178 200 pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n");
179 201 return -ENOMEM;
180 202  
181 203  
... ... @@ -188,11 +210,17 @@
188 210 /* Ensure we keep a CPU outside the dynticks range for timekeeping */
189 211 cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask);
190 212 if (cpumask_equal(online_nohz, cpu_online_mask)) {
191   - cpu = cpumask_any(cpu_online_mask);
192 213 pr_warning("NO_HZ: Must keep at least one online CPU "
193 214 "out of nohz_extended range\n");
  215 + /*
  216 + * We know the current CPU doesn't have its tick stopped.
  217 + * Let's use it for the timekeeping duty.
  218 + */
  219 + preempt_disable();
  220 + cpu = smp_processor_id();
194 221 pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu);
195 222 cpumask_clear_cpu(cpu, nohz_extended_mask);
  223 + preempt_enable();
196 224 }
197 225 put_online_cpus();
198 226 free_cpumask_var(online_nohz);
... ... @@ -549,6 +577,21 @@
549 577 ratelimit++;
550 578 }
551 579 return false;
  580 + }
  581 +
  582 + if (have_nohz_extended_mask) {
  583 + /*
  584 + * Keep the tick alive to guarantee timekeeping progression
  585 + * if there are full dynticks CPUs around
  586 + */
  587 + if (tick_do_timer_cpu == cpu)
  588 + return false;
  589 + /*
  590 + * Boot safety: make sure the timekeeping duty has been
  591 + * assigned before entering dyntick-idle mode,
  592 + */
  593 + if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
  594 + return false;
552 595 }
553 596  
554 597 return true;