Commit fb02fbc14d17837b4b7b02dbb36142c16a7bf208
Committed by
Thomas Gleixner
1 parent
c34bec5a44
Exists in
master
and in
20 other branches
NOHZ: restart tick device from irq_enter()
We did not restart the tick device from irq_enter() to avoid double reprogramming and extra events in the return immediate to idle case. But long lasting softirqs can lead to a situation where jiffies become stale: idle() tick stopped (reprogrammed to next pending timer) halt() interrupt jiffies updated from irq_enter() interrupt handler softirq function 1 runs 20ms softirq function 2 arms a 10ms timer with a stale jiffies value jiffies updated from irq_exit() timer wheel has now an already expired timer (the one added in function 2) timer fires and timer softirq runs This was discovered when debugging a timer problem which happend only when the ath5k driver is active. The debugging proved that there is a softirq function running for more than 20ms, which is a bug by itself. To solve this we restart the tick timer right from irq_enter(), but do not go through the other functions which are necessary to return from idle when need_resched() is set. Reported-by: Elias Oltmanns <eo@nebensachen.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Elias Oltmanns <eo@nebensachen.de>
Showing 3 changed files with 38 additions and 8 deletions Side-by-side Diff
kernel/time/tick-broadcast.c
... | ... | @@ -384,6 +384,19 @@ |
384 | 384 | } |
385 | 385 | |
386 | 386 | /* |
387 | + * Called from irq_enter() when idle was interrupted to reenable the | |
388 | + * per cpu device. | |
389 | + */ | |
390 | +void tick_check_oneshot_broadcast(int cpu) | |
391 | +{ | |
392 | + if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) { | |
393 | + struct tick_device *td = &per_cpu(tick_cpu_device, cpu); | |
394 | + | |
395 | + clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); | |
396 | + } | |
397 | +} | |
398 | + | |
399 | +/* | |
387 | 400 | * Handle oneshot mode broadcasting |
388 | 401 | */ |
389 | 402 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) |
kernel/time/tick-internal.h
... | ... | @@ -36,6 +36,7 @@ |
36 | 36 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); |
37 | 37 | extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); |
38 | 38 | extern int tick_broadcast_oneshot_active(void); |
39 | +extern void tick_check_oneshot_broadcast(int cpu); | |
39 | 40 | # else /* BROADCAST */ |
40 | 41 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) |
41 | 42 | { |
... | ... | @@ -45,6 +46,7 @@ |
45 | 46 | static inline void tick_broadcast_switch_to_oneshot(void) { } |
46 | 47 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } |
47 | 48 | static inline int tick_broadcast_oneshot_active(void) { return 0; } |
49 | +static inline void tick_check_oneshot_broadcast(int cpu) { } | |
48 | 50 | # endif /* !BROADCAST */ |
49 | 51 | |
50 | 52 | #else /* !ONESHOT */ |
kernel/time/tick-sched.c
... | ... | @@ -508,10 +508,6 @@ |
508 | 508 | update_process_times(user_mode(regs)); |
509 | 509 | profile_tick(CPU_PROFILING); |
510 | 510 | |
511 | - /* Do not restart, when we are in the idle loop */ | |
512 | - if (ts->tick_stopped) | |
513 | - return; | |
514 | - | |
515 | 511 | while (tick_nohz_reprogram(ts, now)) { |
516 | 512 | now = ktime_get(); |
517 | 513 | tick_do_update_jiffies64(now); |
... | ... | @@ -557,6 +553,27 @@ |
557 | 553 | smp_processor_id()); |
558 | 554 | } |
559 | 555 | |
556 | +/* | |
557 | + * When NOHZ is enabled and the tick is stopped, we need to kick the | |
558 | + * tick timer from irq_enter() so that the jiffies update is kept | |
559 | + * alive during long running softirqs. That's ugly as hell, but | |
560 | + * correctness is key even if we need to fix the offending softirq in | |
561 | + * the first place. | |
562 | + * | |
563 | + * Note, this is different to tick_nohz_restart. We just kick the | |
564 | + * timer and do not touch the other magic bits which need to be done | |
565 | + * when idle is left. | |
566 | + */ | |
567 | +static void tick_nohz_kick_tick(int cpu) | |
568 | +{ | |
569 | + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | |
570 | + | |
571 | + if (!ts->tick_stopped) | |
572 | + return; | |
573 | + | |
574 | + tick_nohz_restart(ts, ktime_get()); | |
575 | +} | |
576 | + | |
560 | 577 | #else |
561 | 578 | |
562 | 579 | static inline void tick_nohz_switch_to_nohz(void) { } |
563 | 580 | |
... | ... | @@ -568,9 +585,11 @@ |
568 | 585 | */ |
569 | 586 | void tick_check_idle(int cpu) |
570 | 587 | { |
588 | + tick_check_oneshot_broadcast(cpu); | |
571 | 589 | #ifdef CONFIG_NO_HZ |
572 | 590 | tick_nohz_stop_idle(cpu); |
573 | 591 | tick_nohz_update_jiffies(); |
592 | + tick_nohz_kick_tick(cpu); | |
574 | 593 | #endif |
575 | 594 | } |
576 | 595 | |
... | ... | @@ -626,10 +645,6 @@ |
626 | 645 | update_process_times(user_mode(regs)); |
627 | 646 | profile_tick(CPU_PROFILING); |
628 | 647 | } |
629 | - | |
630 | - /* Do not restart, when we are in the idle loop */ | |
631 | - if (ts->tick_stopped) | |
632 | - return HRTIMER_NORESTART; | |
633 | 648 | |
634 | 649 | hrtimer_forward(timer, now, tick_period); |
635 | 650 |