Commit 6441402b1f173fa38e561d3cee7c01c32e5281ad

Authored by Thomas Gleixner
1 parent 72d31053f6

clockevents: prevent cpu online to interfere with nohz

Impact: rare hang which can be triggered on CPU online.

tick_do_timer_cpu keeps track of the CPU which updates jiffies
via do_timer. The value -1 is used to signal, that currently no
CPU is doing this. There are two cases, where the variable can
have this state:

 boot:
    necessary for systems where the boot cpu id can be != 0

 nohz long idle sleep:
    When the CPU which did the jiffies update last goes into
    a long idle sleep it drops the update jiffies duty so
    another CPU which is not idle can pick it up and keep
    jiffies going.

Using the same value for both situations is wrong, as the CPU online
code can see the -1 state when the timer of the newly onlined CPU is
setup. The setup for a newly onlined CPU goes through periodic mode
and can pick up the do_timer duty without being aware of the nohz /
highres mode of the already running system.

Use two separate states and make them constants to avoid magic
numbers confusion.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Showing 3 changed files with 12 additions and 7 deletions Side-by-side Diff

kernel/time/tick-common.c
... ... @@ -33,7 +33,7 @@
33 33 */
34 34 ktime_t tick_next_period;
35 35 ktime_t tick_period;
36   -int tick_do_timer_cpu __read_mostly = -1;
  36 +int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
37 37 DEFINE_SPINLOCK(tick_device_lock);
38 38  
39 39 /*
... ... @@ -148,7 +148,7 @@
148 148 * If no cpu took the do_timer update, assign it to
149 149 * this cpu:
150 150 */
151   - if (tick_do_timer_cpu == -1) {
  151 + if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
152 152 tick_do_timer_cpu = cpu;
153 153 tick_next_period = ktime_get();
154 154 tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
... ... @@ -300,7 +300,8 @@
300 300 if (*cpup == tick_do_timer_cpu) {
301 301 int cpu = first_cpu(cpu_online_map);
302 302  
303   - tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu : -1;
  303 + tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu :
  304 + TICK_DO_TIMER_NONE;
304 305 }
305 306 spin_unlock_irqrestore(&tick_device_lock, flags);
306 307 }
kernel/time/tick-internal.h
1 1 /*
2 2 * tick internal variable and functions used by low/high res code
3 3 */
  4 +
  5 +#define TICK_DO_TIMER_NONE -1
  6 +#define TICK_DO_TIMER_BOOT -2
  7 +
4 8 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
5 9 extern spinlock_t tick_device_lock;
6 10 extern ktime_t tick_next_period;
kernel/time/tick-sched.c
... ... @@ -221,7 +221,7 @@
221 221 */
222 222 if (unlikely(!cpu_online(cpu))) {
223 223 if (cpu == tick_do_timer_cpu)
224   - tick_do_timer_cpu = -1;
  224 + tick_do_timer_cpu = TICK_DO_TIMER_NONE;
225 225 }
226 226  
227 227 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
... ... @@ -303,7 +303,7 @@
303 303 * invoked.
304 304 */
305 305 if (cpu == tick_do_timer_cpu)
306   - tick_do_timer_cpu = -1;
  306 + tick_do_timer_cpu = TICK_DO_TIMER_NONE;
307 307  
308 308 ts->idle_sleeps++;
309 309  
... ... @@ -468,7 +468,7 @@
468 468 * this duty, then the jiffies update is still serialized by
469 469 * xtime_lock.
470 470 */
471   - if (unlikely(tick_do_timer_cpu == -1))
  471 + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
472 472 tick_do_timer_cpu = cpu;
473 473  
474 474 /* Check, if the jiffies need an update */
... ... @@ -570,7 +570,7 @@
570 570 * this duty, then the jiffies update is still serialized by
571 571 * xtime_lock.
572 572 */
573   - if (unlikely(tick_do_timer_cpu == -1))
  573 + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
574 574 tick_do_timer_cpu = cpu;
575 575 #endif
576 576