Commit 3070f27d6ecb69364e7cffe16c8b15e1b8ef41dd

Authored by Linus Torvalds

Merge branch 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kern…

…el/git/tip/linux-2.6-tip

* 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  itimer: Fix the itimer trace print format
  hrtimer: move timer stats helper functions to hrtimer.c
  hrtimer: Tune hrtimer_interrupt hang logic

Showing 4 changed files Side-by-side Diff

include/linux/hrtimer.h
... ... @@ -162,10 +162,11 @@
162 162 * @expires_next: absolute time of the next event which was scheduled
163 163 * via clock_set_next_event()
164 164 * @hres_active: State of high resolution mode
165   - * @check_clocks: Indictator, when set evaluate time source and clock
166   - * event devices whether high resolution mode can be
167   - * activated.
168   - * @nr_events: Total number of timer interrupt events
  165 + * @hang_detected: The last hrtimer interrupt detected a hang
  166 + * @nr_events: Total number of hrtimer interrupt events
  167 + * @nr_retries: Total number of hrtimer interrupt retries
  168 + * @nr_hangs: Total number of hrtimer interrupt hangs
  169 + * @max_hang_time: Maximum time spent in hrtimer_interrupt
169 170 */
170 171 struct hrtimer_cpu_base {
171 172 spinlock_t lock;
172 173  
... ... @@ -173,7 +174,11 @@
173 174 #ifdef CONFIG_HIGH_RES_TIMERS
174 175 ktime_t expires_next;
175 176 int hres_active;
  177 + int hang_detected;
176 178 unsigned long nr_events;
  179 + unsigned long nr_retries;
  180 + unsigned long nr_hangs;
  181 + ktime_t max_hang_time;
177 182 #endif
178 183 };
179 184  
... ... @@ -434,49 +439,6 @@
434 439  
435 440 /* Show pending timers: */
436 441 extern void sysrq_timer_list_show(void);
437   -
438   -/*
439   - * Timer-statistics info:
440   - */
441   -#ifdef CONFIG_TIMER_STATS
442   -
443   -extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
444   - void *timerf, char *comm,
445   - unsigned int timer_flag);
446   -
447   -static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
448   -{
449   - if (likely(!timer_stats_active))
450   - return;
451   - timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
452   - timer->function, timer->start_comm, 0);
453   -}
454   -
455   -extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer,
456   - void *addr);
457   -
458   -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
459   -{
460   - __timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0));
461   -}
462   -
463   -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
464   -{
465   - timer->start_site = NULL;
466   -}
467   -#else
468   -static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
469   -{
470   -}
471   -
472   -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
473   -{
474   -}
475   -
476   -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
477   -{
478   -}
479   -#endif
480 442  
481 443 #endif
include/trace/events/timer.h
... ... @@ -301,8 +301,8 @@
301 301 __entry->interval_usec = value->it_interval.tv_usec;
302 302 ),
303 303  
304   - TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu",
305   - __entry->which, __entry->expires,
  304 + TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld",
  305 + __entry->which, (unsigned long long)__entry->expires,
306 306 __entry->value_sec, __entry->value_usec,
307 307 __entry->interval_sec, __entry->interval_usec)
308 308 );
... ... @@ -331,8 +331,8 @@
331 331 __entry->pid = pid_nr(pid);
332 332 ),
333 333  
334   - TP_printk("which=%d pid=%d now=%lu", __entry->which,
335   - (int) __entry->pid, __entry->now)
  334 + TP_printk("which=%d pid=%d now=%llu", __entry->which,
  335 + (int) __entry->pid, (unsigned long long)__entry->now)
336 336 );
337 337  
338 338 #endif /* _TRACE_TIMER_H */
... ... @@ -557,7 +557,7 @@
557 557 static int hrtimer_reprogram(struct hrtimer *timer,
558 558 struct hrtimer_clock_base *base)
559 559 {
560   - ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
  560 + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
561 561 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
562 562 int res;
563 563  
564 564  
565 565  
... ... @@ -582,15 +582,24 @@
582 582 if (expires.tv64 < 0)
583 583 return -ETIME;
584 584  
585   - if (expires.tv64 >= expires_next->tv64)
  585 + if (expires.tv64 >= cpu_base->expires_next.tv64)
586 586 return 0;
587 587  
588 588 /*
  589 + * If a hang was detected in the last timer interrupt then we
  590 + * do not schedule a timer which is earlier than the expiry
  591 + * which we enforced in the hang detection. We want the system
  592 + * to make progress.
  593 + */
  594 + if (cpu_base->hang_detected)
  595 + return 0;
  596 +
  597 + /*
589 598 * Clockevents returns -ETIME, when the event was in the past.
590 599 */
591 600 res = tick_program_event(expires, 0);
592 601 if (!IS_ERR_VALUE(res))
593   - *expires_next = expires;
  602 + cpu_base->expires_next = expires;
594 603 return res;
595 604 }
596 605  
597 606  
598 607  
599 608  
600 609  
601 610  
602 611  
... ... @@ -747,18 +756,34 @@
747 756  
748 757 #endif /* CONFIG_HIGH_RES_TIMERS */
749 758  
750   -#ifdef CONFIG_TIMER_STATS
751   -void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
  759 +static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
752 760 {
  761 +#ifdef CONFIG_TIMER_STATS
753 762 if (timer->start_site)
754 763 return;
755   -
756   - timer->start_site = addr;
  764 + timer->start_site = __builtin_return_address(0);
757 765 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
758 766 timer->start_pid = current->pid;
  767 +#endif
759 768 }
  769 +
  770 +static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
  771 +{
  772 +#ifdef CONFIG_TIMER_STATS
  773 + timer->start_site = NULL;
760 774 #endif
  775 +}
761 776  
  777 +static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
  778 +{
  779 +#ifdef CONFIG_TIMER_STATS
  780 + if (likely(!timer_stats_active))
  781 + return;
  782 + timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
  783 + timer->function, timer->start_comm, 0);
  784 +#endif
  785 +}
  786 +
762 787 /*
763 788 * Counterpart to lock_hrtimer_base above:
764 789 */
765 790  
... ... @@ -1217,31 +1242,7 @@
1217 1242  
1218 1243 #ifdef CONFIG_HIGH_RES_TIMERS
1219 1244  
1220   -static int force_clock_reprogram;
1221   -
1222 1245 /*
1223   - * After 5 iteration's attempts, we consider that hrtimer_interrupt()
1224   - * is hanging, which could happen with something that slows the interrupt
1225   - * such as the tracing. Then we force the clock reprogramming for each future
1226   - * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
1227   - * threshold that we will overwrite.
1228   - * The next tick event will be scheduled to 3 times we currently spend on
1229   - * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
1230   - * 1/4 of their time to process the hrtimer interrupts. This is enough to
1231   - * let it running without serious starvation.
1232   - */
1233   -
1234   -static inline void
1235   -hrtimer_interrupt_hanging(struct clock_event_device *dev,
1236   - ktime_t try_time)
1237   -{
1238   - force_clock_reprogram = 1;
1239   - dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
1240   - printk(KERN_WARNING "hrtimer: interrupt too slow, "
1241   - "forcing clock min delta to %llu ns\n",
1242   - (unsigned long long) dev->min_delta_ns);
1243   -}
1244   -/*
1245 1246 * High resolution timer interrupt
1246 1247 * Called with interrupts disabled
1247 1248 */
1248 1249  
... ... @@ -1249,21 +1250,15 @@
1249 1250 {
1250 1251 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1251 1252 struct hrtimer_clock_base *base;
1252   - ktime_t expires_next, now;
1253   - int nr_retries = 0;
1254   - int i;
  1253 + ktime_t expires_next, now, entry_time, delta;
  1254 + int i, retries = 0;
1255 1255  
1256 1256 BUG_ON(!cpu_base->hres_active);
1257 1257 cpu_base->nr_events++;
1258 1258 dev->next_event.tv64 = KTIME_MAX;
1259 1259  
1260   - retry:
1261   - /* 5 retries is enough to notice a hang */
1262   - if (!(++nr_retries % 5))
1263   - hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
1264   -
1265   - now = ktime_get();
1266   -
  1260 + entry_time = now = ktime_get();
  1261 +retry:
1267 1262 expires_next.tv64 = KTIME_MAX;
1268 1263  
1269 1264 spin_lock(&cpu_base->lock);
1270 1265  
... ... @@ -1325,10 +1320,48 @@
1325 1320 spin_unlock(&cpu_base->lock);
1326 1321  
1327 1322 /* Reprogramming necessary ? */
1328   - if (expires_next.tv64 != KTIME_MAX) {
1329   - if (tick_program_event(expires_next, force_clock_reprogram))
1330   - goto retry;
  1323 + if (expires_next.tv64 == KTIME_MAX ||
  1324 + !tick_program_event(expires_next, 0)) {
  1325 + cpu_base->hang_detected = 0;
  1326 + return;
1331 1327 }
  1328 +
  1329 + /*
  1330 + * The next timer was already expired due to:
  1331 + * - tracing
  1332 + * - long lasting callbacks
  1333 + * - being scheduled away when running in a VM
  1334 + *
  1335 + * We need to prevent that we loop forever in the hrtimer
  1336 + * interrupt routine. We give it 3 attempts to avoid
  1337 + * overreacting on some spurious event.
  1338 + */
  1339 + now = ktime_get();
  1340 + cpu_base->nr_retries++;
  1341 + if (++retries < 3)
  1342 + goto retry;
  1343 + /*
  1344 + * Give the system a chance to do something else than looping
  1345 + * here. We stored the entry time, so we know exactly how long
  1346 + * we spent here. We schedule the next event this amount of
  1347 + * time away.
  1348 + */
  1349 + cpu_base->nr_hangs++;
  1350 + cpu_base->hang_detected = 1;
  1351 + delta = ktime_sub(now, entry_time);
  1352 + if (delta.tv64 > cpu_base->max_hang_time.tv64)
  1353 + cpu_base->max_hang_time = delta;
  1354 + /*
  1355 + * Limit it to a sensible value as we enforce a longer
  1356 + * delay. Give the CPU at least 100ms to catch up.
  1357 + */
  1358 + if (delta.tv64 > 100 * NSEC_PER_MSEC)
  1359 + expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
  1360 + else
  1361 + expires_next = ktime_add(now, delta);
  1362 + tick_program_event(expires_next, 1);
  1363 + printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
  1364 + ktime_to_ns(delta));
1332 1365 }
1333 1366  
1334 1367 /*
kernel/time/timer_list.c
... ... @@ -150,6 +150,9 @@
150 150 P_ns(expires_next);
151 151 P(hres_active);
152 152 P(nr_events);
  153 + P(nr_retries);
  154 + P(nr_hangs);
  155 + P_ns(max_hang_time);
153 156 #endif
154 157 #undef P
155 158 #undef P_ns
... ... @@ -254,7 +257,7 @@
254 257 u64 now = ktime_to_ns(ktime_get());
255 258 int cpu;
256 259  
257   - SEQ_printf(m, "Timer List Version: v0.4\n");
  260 + SEQ_printf(m, "Timer List Version: v0.5\n");
258 261 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
259 262 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
260 263