Commit 27185016b806d5a1181ff501cae120582b2b27dd
1 parent
98962465ed
Exists in
master
and in
20 other branches
nohz: Track last do_timer() cpu
The previous patch which limits the sleep time to the maximum deferment time of the time keeping clocksource has some limitations on SMP machines: if all CPUs are idle then for all CPUs the maximum sleep time is limited. Solve this by keeping track of which cpu had the do_timer() duty assigned last and limit the sleep time only for this cpu. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> LKML-Reference: <new-submission> Cc: Jon Hunter <jon-hunter@ti.com> Cc: John Stultz <johnstul@us.ibm.com>
Showing 2 changed files with 30 additions and 24 deletions Side-by-side Diff
include/linux/tick.h
... | ... | @@ -43,6 +43,7 @@ |
43 | 43 | * @idle_exittime: Time when the idle state was left |
44 | 44 | * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped |
45 | 45 | * @sleep_length: Duration of the current idle sleep |
46 | + * @do_timer_lst: CPU was the last one doing do_timer before going idle | |
46 | 47 | */ |
47 | 48 | struct tick_sched { |
48 | 49 | struct hrtimer sched_timer; |
... | ... | @@ -64,6 +65,7 @@ |
64 | 65 | unsigned long last_jiffies; |
65 | 66 | unsigned long next_jiffies; |
66 | 67 | ktime_t idle_expires; |
68 | + int do_timer_last; | |
67 | 69 | }; |
68 | 70 | |
69 | 71 | extern void __init tick_init(void); |
kernel/time/tick-sched.c
... | ... | @@ -263,17 +263,7 @@ |
263 | 263 | seq = read_seqbegin(&xtime_lock); |
264 | 264 | last_update = last_jiffies_update; |
265 | 265 | last_jiffies = jiffies; |
266 | - | |
267 | - /* | |
268 | - * On SMP we really should only care for the CPU which | |
269 | - * has the do_timer duty assigned. All other CPUs can | |
270 | - * sleep as long as they want. | |
271 | - */ | |
272 | - if (cpu == tick_do_timer_cpu || | |
273 | - tick_do_timer_cpu == TICK_DO_TIMER_NONE) | |
274 | - time_delta = timekeeping_max_deferment(); | |
275 | - else | |
276 | - time_delta = KTIME_MAX; | |
266 | + time_delta = timekeeping_max_deferment(); | |
277 | 267 | } while (read_seqretry(&xtime_lock, seq)); |
278 | 268 | |
279 | 269 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || |
... | ... | @@ -296,6 +286,29 @@ |
296 | 286 | if ((long)delta_jiffies >= 1) { |
297 | 287 | |
298 | 288 | /* |
289 | + * If this cpu is the one which updates jiffies, then | |
290 | + * give up the assignment and let it be taken by the | |
291 | + * cpu which runs the tick timer next, which might be | |
292 | + * this cpu as well. If we don't drop this here the | |
293 | + * jiffies might be stale and do_timer() never | |
294 | + * invoked. Keep track of the fact that it was the one | |
295 | + * which had the do_timer() duty last. If this cpu is | |
296 | + * the one which had the do_timer() duty last, we | |
297 | + * limit the sleep time to the timekeeping | |
298 | + * max_deferement value which we retrieved | |
299 | + * above. Otherwise we can sleep as long as we want. | |
300 | + */ | |
301 | + if (cpu == tick_do_timer_cpu) { | |
302 | + tick_do_timer_cpu = TICK_DO_TIMER_NONE; | |
303 | + ts->do_timer_last = 1; | |
304 | + } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { | |
305 | + time_delta = KTIME_MAX; | |
306 | + ts->do_timer_last = 0; | |
307 | + } else if (!ts->do_timer_last) { | |
308 | + time_delta = KTIME_MAX; | |
309 | + } | |
310 | + | |
311 | + /* | |
299 | 312 | * calculate the expiry time for the next timer wheel |
300 | 313 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals |
301 | 314 | * that there is no timer pending or at least extremely |
302 | 315 | |
... | ... | @@ -312,21 +325,12 @@ |
312 | 325 | */ |
313 | 326 | time_delta = min_t(u64, time_delta, |
314 | 327 | tick_period.tv64 * delta_jiffies); |
315 | - expires = ktime_add_ns(last_update, time_delta); | |
316 | - } else { | |
317 | - expires.tv64 = KTIME_MAX; | |
318 | 328 | } |
319 | 329 | |
320 | - /* | |
321 | - * If this cpu is the one which updates jiffies, then | |
322 | - * give up the assignment and let it be taken by the | |
323 | - * cpu which runs the tick timer next, which might be | |
324 | - * this cpu as well. If we don't drop this here the | |
325 | - * jiffies might be stale and do_timer() never | |
326 | - * invoked. | |
327 | - */ | |
328 | - if (cpu == tick_do_timer_cpu) | |
329 | - tick_do_timer_cpu = TICK_DO_TIMER_NONE; | |
330 | + if (time_delta < KTIME_MAX) | |
331 | + expires = ktime_add_ns(last_update, time_delta); | |
332 | + else | |
333 | + expires.tv64 = KTIME_MAX; | |
330 | 334 | |
331 | 335 | if (delta_jiffies > 1) |
332 | 336 | cpumask_set_cpu(cpu, nohz_cpu_mask); |