Commit 3070f27d6ecb69364e7cffe16c8b15e1b8ef41dd
Exists in
master
and in
7 other branches
Merge branch 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kern…
…el/git/tip/linux-2.6-tip * 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: itimer: Fix the itimer trace print format hrtimer: move timer stats helper functions to hrtimer.c hrtimer: Tune hrtimer_interrupt hang logic
Showing 4 changed files Side-by-side Diff
include/linux/hrtimer.h
... | ... | @@ -162,10 +162,11 @@ |
162 | 162 | * @expires_next: absolute time of the next event which was scheduled |
163 | 163 | * via clock_set_next_event() |
164 | 164 | * @hres_active: State of high resolution mode |
165 | - * @check_clocks: Indictator, when set evaluate time source and clock | |
166 | - * event devices whether high resolution mode can be | |
167 | - * activated. | |
168 | - * @nr_events: Total number of timer interrupt events | |
165 | + * @hang_detected: The last hrtimer interrupt detected a hang | |
166 | + * @nr_events: Total number of hrtimer interrupt events | |
167 | + * @nr_retries: Total number of hrtimer interrupt retries | |
168 | + * @nr_hangs: Total number of hrtimer interrupt hangs | |
169 | + * @max_hang_time: Maximum time spent in hrtimer_interrupt | |
169 | 170 | */ |
170 | 171 | struct hrtimer_cpu_base { |
171 | 172 | spinlock_t lock; |
172 | 173 | |
... | ... | @@ -173,7 +174,11 @@ |
173 | 174 | #ifdef CONFIG_HIGH_RES_TIMERS |
174 | 175 | ktime_t expires_next; |
175 | 176 | int hres_active; |
177 | + int hang_detected; | |
176 | 178 | unsigned long nr_events; |
179 | + unsigned long nr_retries; | |
180 | + unsigned long nr_hangs; | |
181 | + ktime_t max_hang_time; | |
177 | 182 | #endif |
178 | 183 | }; |
179 | 184 | |
... | ... | @@ -434,49 +439,6 @@ |
434 | 439 | |
435 | 440 | /* Show pending timers: */ |
436 | 441 | extern void sysrq_timer_list_show(void); |
437 | - | |
438 | -/* | |
439 | - * Timer-statistics info: | |
440 | - */ | |
441 | -#ifdef CONFIG_TIMER_STATS | |
442 | - | |
443 | -extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |
444 | - void *timerf, char *comm, | |
445 | - unsigned int timer_flag); | |
446 | - | |
447 | -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | |
448 | -{ | |
449 | - if (likely(!timer_stats_active)) | |
450 | - return; | |
451 | - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | |
452 | - timer->function, timer->start_comm, 0); | |
453 | -} | |
454 | - | |
455 | -extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, | |
456 | - void *addr); | |
457 | - | |
458 | -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | |
459 | -{ | |
460 | - __timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0)); | |
461 | -} | |
462 | - | |
463 | -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | |
464 | -{ | |
465 | - timer->start_site = NULL; | |
466 | -} | |
467 | -#else | |
468 | -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | |
469 | -{ | |
470 | -} | |
471 | - | |
472 | -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | |
473 | -{ | |
474 | -} | |
475 | - | |
476 | -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | |
477 | -{ | |
478 | -} | |
479 | -#endif | |
480 | 442 | |
481 | 443 | #endif |
include/trace/events/timer.h
... | ... | @@ -301,8 +301,8 @@ |
301 | 301 | __entry->interval_usec = value->it_interval.tv_usec; |
302 | 302 | ), |
303 | 303 | |
304 | - TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", | |
305 | - __entry->which, __entry->expires, | |
304 | + TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld", | |
305 | + __entry->which, (unsigned long long)__entry->expires, | |
306 | 306 | __entry->value_sec, __entry->value_usec, |
307 | 307 | __entry->interval_sec, __entry->interval_usec) |
308 | 308 | ); |
... | ... | @@ -331,8 +331,8 @@ |
331 | 331 | __entry->pid = pid_nr(pid); |
332 | 332 | ), |
333 | 333 | |
334 | - TP_printk("which=%d pid=%d now=%lu", __entry->which, | |
335 | - (int) __entry->pid, __entry->now) | |
334 | + TP_printk("which=%d pid=%d now=%llu", __entry->which, | |
335 | + (int) __entry->pid, (unsigned long long)__entry->now) | |
336 | 336 | ); |
337 | 337 | |
338 | 338 | #endif /* _TRACE_TIMER_H */ |
kernel/hrtimer.c
... | ... | @@ -557,7 +557,7 @@ |
557 | 557 | static int hrtimer_reprogram(struct hrtimer *timer, |
558 | 558 | struct hrtimer_clock_base *base) |
559 | 559 | { |
560 | - ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; | |
560 | + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | |
561 | 561 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); |
562 | 562 | int res; |
563 | 563 | |
564 | 564 | |
565 | 565 | |
... | ... | @@ -582,15 +582,24 @@ |
582 | 582 | if (expires.tv64 < 0) |
583 | 583 | return -ETIME; |
584 | 584 | |
585 | - if (expires.tv64 >= expires_next->tv64) | |
585 | + if (expires.tv64 >= cpu_base->expires_next.tv64) | |
586 | 586 | return 0; |
587 | 587 | |
588 | 588 | /* |
589 | + * If a hang was detected in the last timer interrupt then we | |
590 | + * do not schedule a timer which is earlier than the expiry | |
591 | + * which we enforced in the hang detection. We want the system | |
592 | + * to make progress. | |
593 | + */ | |
594 | + if (cpu_base->hang_detected) | |
595 | + return 0; | |
596 | + | |
597 | + /* | |
589 | 598 | * Clockevents returns -ETIME, when the event was in the past. |
590 | 599 | */ |
591 | 600 | res = tick_program_event(expires, 0); |
592 | 601 | if (!IS_ERR_VALUE(res)) |
593 | - *expires_next = expires; | |
602 | + cpu_base->expires_next = expires; | |
594 | 603 | return res; |
595 | 604 | } |
596 | 605 | |
597 | 606 | |
598 | 607 | |
599 | 608 | |
600 | 609 | |
601 | 610 | |
602 | 611 | |
... | ... | @@ -747,18 +756,34 @@ |
747 | 756 | |
748 | 757 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
749 | 758 | |
750 | -#ifdef CONFIG_TIMER_STATS | |
751 | -void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) | |
759 | +static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | |
752 | 760 | { |
761 | +#ifdef CONFIG_TIMER_STATS | |
753 | 762 | if (timer->start_site) |
754 | 763 | return; |
755 | - | |
756 | - timer->start_site = addr; | |
764 | + timer->start_site = __builtin_return_address(0); | |
757 | 765 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); |
758 | 766 | timer->start_pid = current->pid; |
767 | +#endif | |
759 | 768 | } |
769 | + | |
770 | +static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | |
771 | +{ | |
772 | +#ifdef CONFIG_TIMER_STATS | |
773 | + timer->start_site = NULL; | |
760 | 774 | #endif |
775 | +} | |
761 | 776 | |
777 | +static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | |
778 | +{ | |
779 | +#ifdef CONFIG_TIMER_STATS | |
780 | + if (likely(!timer_stats_active)) | |
781 | + return; | |
782 | + timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | |
783 | + timer->function, timer->start_comm, 0); | |
784 | +#endif | |
785 | +} | |
786 | + | |
762 | 787 | /* |
763 | 788 | * Counterpart to lock_hrtimer_base above: |
764 | 789 | */ |
765 | 790 | |
... | ... | @@ -1217,31 +1242,7 @@ |
1217 | 1242 | |
1218 | 1243 | #ifdef CONFIG_HIGH_RES_TIMERS |
1219 | 1244 | |
1220 | -static int force_clock_reprogram; | |
1221 | - | |
1222 | 1245 | /* |
1223 | - * After 5 iteration's attempts, we consider that hrtimer_interrupt() | |
1224 | - * is hanging, which could happen with something that slows the interrupt | |
1225 | - * such as the tracing. Then we force the clock reprogramming for each future | |
1226 | - * hrtimer interrupts to avoid infinite loops and use the min_delta_ns | |
1227 | - * threshold that we will overwrite. | |
1228 | - * The next tick event will be scheduled to 3 times we currently spend on | |
1229 | - * hrtimer_interrupt(). This gives a good compromise, the cpus will spend | |
1230 | - * 1/4 of their time to process the hrtimer interrupts. This is enough to | |
1231 | - * let it running without serious starvation. | |
1232 | - */ | |
1233 | - | |
1234 | -static inline void | |
1235 | -hrtimer_interrupt_hanging(struct clock_event_device *dev, | |
1236 | - ktime_t try_time) | |
1237 | -{ | |
1238 | - force_clock_reprogram = 1; | |
1239 | - dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; | |
1240 | - printk(KERN_WARNING "hrtimer: interrupt too slow, " | |
1241 | - "forcing clock min delta to %llu ns\n", | |
1242 | - (unsigned long long) dev->min_delta_ns); | |
1243 | -} | |
1244 | -/* | |
1245 | 1246 | * High resolution timer interrupt |
1246 | 1247 | * Called with interrupts disabled |
1247 | 1248 | */ |
1248 | 1249 | |
... | ... | @@ -1249,21 +1250,15 @@ |
1249 | 1250 | { |
1250 | 1251 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
1251 | 1252 | struct hrtimer_clock_base *base; |
1252 | - ktime_t expires_next, now; | |
1253 | - int nr_retries = 0; | |
1254 | - int i; | |
1253 | + ktime_t expires_next, now, entry_time, delta; | |
1254 | + int i, retries = 0; | |
1255 | 1255 | |
1256 | 1256 | BUG_ON(!cpu_base->hres_active); |
1257 | 1257 | cpu_base->nr_events++; |
1258 | 1258 | dev->next_event.tv64 = KTIME_MAX; |
1259 | 1259 | |
1260 | - retry: | |
1261 | - /* 5 retries is enough to notice a hang */ | |
1262 | - if (!(++nr_retries % 5)) | |
1263 | - hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); | |
1264 | - | |
1265 | - now = ktime_get(); | |
1266 | - | |
1260 | + entry_time = now = ktime_get(); | |
1261 | +retry: | |
1267 | 1262 | expires_next.tv64 = KTIME_MAX; |
1268 | 1263 | |
1269 | 1264 | spin_lock(&cpu_base->lock); |
1270 | 1265 | |
... | ... | @@ -1325,10 +1320,48 @@ |
1325 | 1320 | spin_unlock(&cpu_base->lock); |
1326 | 1321 | |
1327 | 1322 | /* Reprogramming necessary ? */ |
1328 | - if (expires_next.tv64 != KTIME_MAX) { | |
1329 | - if (tick_program_event(expires_next, force_clock_reprogram)) | |
1330 | - goto retry; | |
1323 | + if (expires_next.tv64 == KTIME_MAX || | |
1324 | + !tick_program_event(expires_next, 0)) { | |
1325 | + cpu_base->hang_detected = 0; | |
1326 | + return; | |
1331 | 1327 | } |
1328 | + | |
1329 | + /* | |
1330 | + * The next timer was already expired due to: | |
1331 | + * - tracing | |
1332 | + * - long lasting callbacks | |
1333 | + * - being scheduled away when running in a VM | |
1334 | + * | |
1335 | + * We need to prevent that we loop forever in the hrtimer | |
1336 | + * interrupt routine. We give it 3 attempts to avoid | |
1337 | + * overreacting on some spurious event. | |
1338 | + */ | |
1339 | + now = ktime_get(); | |
1340 | + cpu_base->nr_retries++; | |
1341 | + if (++retries < 3) | |
1342 | + goto retry; | |
1343 | + /* | |
1344 | + * Give the system a chance to do something else than looping | |
1345 | + * here. We stored the entry time, so we know exactly how long | |
1346 | + * we spent here. We schedule the next event this amount of | |
1347 | + * time away. | |
1348 | + */ | |
1349 | + cpu_base->nr_hangs++; | |
1350 | + cpu_base->hang_detected = 1; | |
1351 | + delta = ktime_sub(now, entry_time); | |
1352 | + if (delta.tv64 > cpu_base->max_hang_time.tv64) | |
1353 | + cpu_base->max_hang_time = delta; | |
1354 | + /* | |
1355 | + * Limit it to a sensible value as we enforce a longer | |
1356 | + * delay. Give the CPU at least 100ms to catch up. | |
1357 | + */ | |
1358 | + if (delta.tv64 > 100 * NSEC_PER_MSEC) | |
1359 | + expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); | |
1360 | + else | |
1361 | + expires_next = ktime_add(now, delta); | |
1362 | + tick_program_event(expires_next, 1); | |
1363 | + printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", | |
1364 | + ktime_to_ns(delta)); | |
1332 | 1365 | } |
1333 | 1366 | |
1334 | 1367 | /* |
kernel/time/timer_list.c
... | ... | @@ -150,6 +150,9 @@ |
150 | 150 | P_ns(expires_next); |
151 | 151 | P(hres_active); |
152 | 152 | P(nr_events); |
153 | + P(nr_retries); | |
154 | + P(nr_hangs); | |
155 | + P_ns(max_hang_time); | |
153 | 156 | #endif |
154 | 157 | #undef P |
155 | 158 | #undef P_ns |
... | ... | @@ -254,7 +257,7 @@ |
254 | 257 | u64 now = ktime_to_ns(ktime_get()); |
255 | 258 | int cpu; |
256 | 259 | |
257 | - SEQ_printf(m, "Timer List Version: v0.4\n"); | |
260 | + SEQ_printf(m, "Timer List Version: v0.5\n"); | |
258 | 261 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); |
259 | 262 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); |
260 | 263 |