Commit 27185016b806d5a1181ff501cae120582b2b27dd

Authored by Thomas Gleixner
1 parent 98962465ed

nohz: Track last do_timer() cpu

The previous patch which limits the sleep time to the maximum
deferment time of the time keeping clocksource has some limitations on
SMP machines: if all CPUs are idle then for all CPUs the maximum sleep
time is limited.

Solve this by keeping track of which cpu had the do_timer() duty
assigned last and limit the sleep time only for this cpu.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Cc: Jon Hunter <jon-hunter@ti.com>
Cc: John Stultz <johnstul@us.ibm.com>

Showing 2 changed files with 30 additions and 24 deletions Side-by-side Diff

include/linux/tick.h
... ... @@ -43,6 +43,7 @@
43 43 * @idle_exittime: Time when the idle state was left
44 44 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
45 45 * @sleep_length: Duration of the current idle sleep
  46 + * @do_timer_lst: CPU was the last one doing do_timer before going idle
46 47 */
47 48 struct tick_sched {
48 49 struct hrtimer sched_timer;
... ... @@ -64,6 +65,7 @@
64 65 unsigned long last_jiffies;
65 66 unsigned long next_jiffies;
66 67 ktime_t idle_expires;
  68 + int do_timer_last;
67 69 };
68 70  
69 71 extern void __init tick_init(void);
kernel/time/tick-sched.c
... ... @@ -263,17 +263,7 @@
263 263 seq = read_seqbegin(&xtime_lock);
264 264 last_update = last_jiffies_update;
265 265 last_jiffies = jiffies;
266   -
267   - /*
268   - * On SMP we really should only care for the CPU which
269   - * has the do_timer duty assigned. All other CPUs can
270   - * sleep as long as they want.
271   - */
272   - if (cpu == tick_do_timer_cpu ||
273   - tick_do_timer_cpu == TICK_DO_TIMER_NONE)
274   - time_delta = timekeeping_max_deferment();
275   - else
276   - time_delta = KTIME_MAX;
  266 + time_delta = timekeeping_max_deferment();
277 267 } while (read_seqretry(&xtime_lock, seq));
278 268  
279 269 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
... ... @@ -296,6 +286,29 @@
296 286 if ((long)delta_jiffies >= 1) {
297 287  
298 288 /*
  289 + * If this cpu is the one which updates jiffies, then
  290 + * give up the assignment and let it be taken by the
  291 + * cpu which runs the tick timer next, which might be
  292 + * this cpu as well. If we don't drop this here the
  293 + * jiffies might be stale and do_timer() never
  294 + * invoked. Keep track of the fact that it was the one
  295 + * which had the do_timer() duty last. If this cpu is
  296 + * the one which had the do_timer() duty last, we
  297 + * limit the sleep time to the timekeeping
  298 + * max_deferement value which we retrieved
  299 + * above. Otherwise we can sleep as long as we want.
  300 + */
  301 + if (cpu == tick_do_timer_cpu) {
  302 + tick_do_timer_cpu = TICK_DO_TIMER_NONE;
  303 + ts->do_timer_last = 1;
  304 + } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
  305 + time_delta = KTIME_MAX;
  306 + ts->do_timer_last = 0;
  307 + } else if (!ts->do_timer_last) {
  308 + time_delta = KTIME_MAX;
  309 + }
  310 +
  311 + /*
299 312 * calculate the expiry time for the next timer wheel
300 313 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
301 314 * that there is no timer pending or at least extremely
302 315  
... ... @@ -312,21 +325,12 @@
312 325 */
313 326 time_delta = min_t(u64, time_delta,
314 327 tick_period.tv64 * delta_jiffies);
315   - expires = ktime_add_ns(last_update, time_delta);
316   - } else {
317   - expires.tv64 = KTIME_MAX;
318 328 }
319 329  
320   - /*
321   - * If this cpu is the one which updates jiffies, then
322   - * give up the assignment and let it be taken by the
323   - * cpu which runs the tick timer next, which might be
324   - * this cpu as well. If we don't drop this here the
325   - * jiffies might be stale and do_timer() never
326   - * invoked.
327   - */
328   - if (cpu == tick_do_timer_cpu)
329   - tick_do_timer_cpu = TICK_DO_TIMER_NONE;
  330 + if (time_delta < KTIME_MAX)
  331 + expires = ktime_add_ns(last_update, time_delta);
  332 + else
  333 + expires.tv64 = KTIME_MAX;
330 334  
331 335 if (delta_jiffies > 1)
332 336 cpumask_set_cpu(cpu, nohz_cpu_mask);