Commit d689fe222a858c767cb8594faf280048e532b53f

Authored by Thomas Gleixner
1 parent 801a76050b

NOHZ: Check for nohz active instead of nohz enabled

RCU and the fine grained idle time accounting functions check
tick_nohz_enabled. But that variable is merily telling that NOHZ has
been enabled in the config and not been disabled on the command line.

But it does not tell anything about nohz being active. That's what all
this should check for.

Matthew reported, that the idle accounting on his old P1 machine
showed bogus values, when he enabled NOHZ in the config and did not
disable it on the kernel command line. The reason is that his machine
uses (refined) jiffies as a clocksource which explains why the "fine"
grained accounting went into lala land, because it depends on when the
system goes and leaves idle relative to the jiffies increment.

Provide a tick_nohz_active indicator and let RCU and the accounting
code use this instead of tick_nohz_enable.

Reported-and-tested-by: Matthew Whitehead <tedheadster@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: john.stultz@linaro.org
Cc: mwhitehe@redhat.com
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1311132052240.30673@ionos.tec.linutronix.de

Showing 2 changed files with 11 additions and 14 deletions Side-by-side Diff

kernel/rcu/tree_plugin.h
... ... @@ -1632,7 +1632,7 @@
1632 1632 static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
1633 1633 module_param(rcu_idle_lazy_gp_delay, int, 0644);
1634 1634  
1635   -extern int tick_nohz_enabled;
  1635 +extern int tick_nohz_active;
1636 1636  
1637 1637 /*
1638 1638 * Try to advance callbacks for all flavors of RCU on the current CPU, but
... ... @@ -1729,7 +1729,7 @@
1729 1729 int tne;
1730 1730  
1731 1731 /* Handle nohz enablement switches conservatively. */
1732   - tne = ACCESS_ONCE(tick_nohz_enabled);
  1732 + tne = ACCESS_ONCE(tick_nohz_active);
1733 1733 if (tne != rdtp->tick_nohz_enabled_snap) {
1734 1734 if (rcu_cpu_has_callbacks(cpu, NULL))
1735 1735 invoke_rcu_core(); /* force nohz to see update. */
kernel/time/tick-sched.c
... ... @@ -361,8 +361,8 @@
361 361 /*
362 362 * NO HZ enabled ?
363 363 */
364   -int tick_nohz_enabled __read_mostly = 1;
365   -
  364 +static int tick_nohz_enabled __read_mostly = 1;
  365 +int tick_nohz_active __read_mostly;
366 366 /*
367 367 * Enable / Disable tickless mode
368 368 */
... ... @@ -465,7 +465,7 @@
465 465 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
466 466 ktime_t now, idle;
467 467  
468   - if (!tick_nohz_enabled)
  468 + if (!tick_nohz_active)
469 469 return -1;
470 470  
471 471 now = ktime_get();
... ... @@ -506,7 +506,7 @@
506 506 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
507 507 ktime_t now, iowait;
508 508  
509   - if (!tick_nohz_enabled)
  509 + if (!tick_nohz_active)
510 510 return -1;
511 511  
512 512 now = ktime_get();
... ... @@ -799,11 +799,6 @@
799 799 local_irq_disable();
800 800  
801 801 ts = &__get_cpu_var(tick_cpu_sched);
802   - /*
803   - * set ts->inidle unconditionally. even if the system did not
804   - * switch to nohz mode the cpu frequency governers rely on the
805   - * update of the idle time accounting in tick_nohz_start_idle().
806   - */
807 802 ts->inidle = 1;
808 803 __tick_nohz_idle_enter(ts);
809 804  
... ... @@ -973,7 +968,7 @@
973 968 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
974 969 ktime_t next;
975 970  
976   - if (!tick_nohz_enabled)
  971 + if (!tick_nohz_active)
977 972 return;
978 973  
979 974 local_irq_disable();
... ... @@ -981,7 +976,7 @@
981 976 local_irq_enable();
982 977 return;
983 978 }
984   -
  979 + tick_nohz_active = 1;
985 980 ts->nohz_mode = NOHZ_MODE_LOWRES;
986 981  
987 982 /*
988 983  
... ... @@ -1139,8 +1134,10 @@
1139 1134 }
1140 1135  
1141 1136 #ifdef CONFIG_NO_HZ_COMMON
1142   - if (tick_nohz_enabled)
  1137 + if (tick_nohz_enabled) {
1143 1138 ts->nohz_mode = NOHZ_MODE_HIGHRES;
  1139 + tick_nohz_active = 1;
  1140 + }
1144 1141 #endif
1145 1142 }
1146 1143 #endif /* HIGH_RES_TIMERS */