Commit 82f67cd9fca8c8762c15ba7ed0d5747588c1e221
Committed by
Linus Torvalds
1 parent
8bfd9a7a22
Exists in
master
and in
4 other branches
[PATCH] Add debugging feature /proc/timer_stat
Add /proc/timer_stats support: debugging feature to profile timer expiration. Both the starting site, process/PID and the expiration function is captured. This allows the quick identification of timer event sources in a system. Sample output: # echo 1 > /proc/timer_stats # cat /proc/timer_stats Timer Stats Version: v0.1 Sample period: 4.010 s 24, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) 11, 0 swapper sk_reset_timer (tcp_delack_timer) 6, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) 2, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) 17, 0 swapper hrtimer_restart_sched_tick (hrtimer_sched_tick) 2, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) 4, 2050 pcscd do_nanosleep (hrtimer_wakeup) 5, 4179 sshd sk_reset_timer (tcp_write_timer) 4, 2248 yum-updatesd schedule_timeout (process_timeout) 18, 0 swapper hrtimer_restart_sched_tick (hrtimer_sched_tick) 3, 0 swapper sk_reset_timer (tcp_delack_timer) 1, 1 swapper neigh_table_init_no_netlink (neigh_periodic_timer) 2, 1 swapper e1000_up (e1000_watchdog) 1, 1 init schedule_timeout (process_timeout) 100 total events, 25.24 events/sec [ cleanups and hrtimers support from Thomas Gleixner <tglx@linutronix.de> ] [bunk@stusta.de: nr_entries can become static] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: john stultz <johnstul@us.ibm.com> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 9 changed files with 650 additions and 4 deletions Side-by-side Diff
Documentation/hrtimer/timer_stats.txt
1 | +timer_stats - timer usage statistics | |
2 | +------------------------------------ | |
3 | + | |
4 | +timer_stats is a debugging facility to make the timer (ab)usage in a Linux | |
5 | +system visible to kernel and userspace developers. It is not intended for | |
6 | +production usage as it adds significant overhead to the (hr)timer code and the | |
7 | +(hr)timer data structures. | |
8 | + | |
9 | +timer_stats should be used by kernel and userspace developers to verify that | |
10 | +their code does not make unduly use of timers. This helps to avoid unnecessary | |
11 | +wakeups, which should be avoided to optimize power consumption. | |
12 | + | |
13 | +It can be enabled by CONFIG_TIMER_STATS in the "Kernel hacking" configuration | |
14 | +section. | |
15 | + | |
16 | +timer_stats collects information about the timer events which are fired in a | |
17 | +Linux system over a sample period: | |
18 | + | |
19 | +- the pid of the task(process) which initialized the timer | |
20 | +- the name of the process which initialized the timer | |
21 | +- the function where the timer was intialized | |
22 | +- the callback function which is associated to the timer | |
23 | +- the number of events (callbacks) | |
24 | + | |
25 | +timer_stats adds an entry to /proc: /proc/timer_stats | |
26 | + | |
27 | +This entry is used to control the statistics functionality and to read out the | |
28 | +sampled information. | |
29 | + | |
30 | +The timer_stats functionality is inactive on bootup. | |
31 | + | |
32 | +To activate a sample period issue: | |
33 | +# echo 1 >/proc/timer_stats | |
34 | + | |
35 | +To stop a sample period issue: | |
36 | +# echo 0 >/proc/timer_stats | |
37 | + | |
38 | +The statistics can be retrieved by: | |
39 | +# cat /proc/timer_stats | |
40 | + | |
41 | +The readout of /proc/timer_stats automatically disables sampling. The sampled | |
42 | +information is kept until a new sample period is started. This allows multiple | |
43 | +readouts. | |
44 | + | |
45 | +Sample output of /proc/timer_stats: | |
46 | + | |
47 | +Timerstats sample period: 3.888770 s | |
48 | + 12, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) | |
49 | + 15, 1 swapper hcd_submit_urb (rh_timer_func) | |
50 | + 4, 959 kedac schedule_timeout (process_timeout) | |
51 | + 1, 0 swapper page_writeback_init (wb_timer_fn) | |
52 | + 28, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) | |
53 | + 22, 2948 IRQ 4 tty_flip_buffer_push (delayed_work_timer_fn) | |
54 | + 3, 3100 bash schedule_timeout (process_timeout) | |
55 | + 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) | |
56 | + 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) | |
57 | + 1, 1 swapper neigh_table_init_no_netlink (neigh_periodic_timer) | |
58 | + 1, 2292 ip __netdev_watchdog_up (dev_watchdog) | |
59 | + 1, 23 events/1 do_cache_clean (delayed_work_timer_fn) | |
60 | +90 total events, 30.0 events/sec | |
61 | + | |
62 | +The first column is the number of events, the second column the pid, the third | |
63 | +column is the name of the process. The forth column shows the function which | |
64 | +initialized the timer and in parantheses the callback function which was | |
65 | +executed on expiry. | |
66 | + | |
67 | + Thomas, Ingo |
include/linux/hrtimer.h
... | ... | @@ -119,6 +119,11 @@ |
119 | 119 | enum hrtimer_cb_mode cb_mode; |
120 | 120 | struct list_head cb_entry; |
121 | 121 | #endif |
122 | +#ifdef CONFIG_TIMER_STATS | |
123 | + void *start_site; | |
124 | + char start_comm[16]; | |
125 | + int start_pid; | |
126 | +#endif | |
122 | 127 | }; |
123 | 128 | |
124 | 129 | /** |
... | ... | @@ -309,6 +314,46 @@ |
309 | 314 | extern unsigned long ktime_divns(const ktime_t kt, s64 div); |
310 | 315 | #else /* BITS_PER_LONG < 64 */ |
311 | 316 | # define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div)) |
317 | +#endif | |
318 | + | |
319 | +/* | |
320 | + * Timer-statistics info: | |
321 | + */ | |
322 | +#ifdef CONFIG_TIMER_STATS | |
323 | + | |
324 | +extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |
325 | + void *timerf, char * comm); | |
326 | + | |
327 | +static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | |
328 | +{ | |
329 | + timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | |
330 | + timer->function, timer->start_comm); | |
331 | +} | |
332 | + | |
333 | +extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, | |
334 | + void *addr); | |
335 | + | |
336 | +static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | |
337 | +{ | |
338 | + __timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0)); | |
339 | +} | |
340 | + | |
341 | +static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | |
342 | +{ | |
343 | + timer->start_site = NULL; | |
344 | +} | |
345 | +#else | |
346 | +static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | |
347 | +{ | |
348 | +} | |
349 | + | |
350 | +static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | |
351 | +{ | |
352 | +} | |
353 | + | |
354 | +static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | |
355 | +{ | |
356 | +} | |
312 | 357 | #endif |
313 | 358 | |
314 | 359 | #endif |
include/linux/timer.h
... | ... | @@ -2,6 +2,7 @@ |
2 | 2 | #define _LINUX_TIMER_H |
3 | 3 | |
4 | 4 | #include <linux/list.h> |
5 | +#include <linux/ktime.h> | |
5 | 6 | #include <linux/spinlock.h> |
6 | 7 | #include <linux/stddef.h> |
7 | 8 | |
... | ... | @@ -15,6 +16,11 @@ |
15 | 16 | unsigned long data; |
16 | 17 | |
17 | 18 | struct tvec_t_base_s *base; |
19 | +#ifdef CONFIG_TIMER_STATS | |
20 | + void *start_site; | |
21 | + char start_comm[16]; | |
22 | + int start_pid; | |
23 | +#endif | |
18 | 24 | }; |
19 | 25 | |
20 | 26 | extern struct tvec_t_base_s boot_tvec_bases; |
... | ... | @@ -72,6 +78,54 @@ |
72 | 78 | * jiffie. |
73 | 79 | */ |
74 | 80 | extern unsigned long get_next_timer_interrupt(unsigned long now); |
81 | + | |
82 | +/* | |
83 | + * Timer-statistics info: | |
84 | + */ | |
85 | +#ifdef CONFIG_TIMER_STATS | |
86 | + | |
87 | +extern void init_timer_stats(void); | |
88 | + | |
89 | +extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |
90 | + void *timerf, char * comm); | |
91 | + | |
92 | +static inline void timer_stats_account_timer(struct timer_list *timer) | |
93 | +{ | |
94 | + timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | |
95 | + timer->function, timer->start_comm); | |
96 | +} | |
97 | + | |
98 | +extern void __timer_stats_timer_set_start_info(struct timer_list *timer, | |
99 | + void *addr); | |
100 | + | |
101 | +static inline void timer_stats_timer_set_start_info(struct timer_list *timer) | |
102 | +{ | |
103 | + __timer_stats_timer_set_start_info(timer, __builtin_return_address(0)); | |
104 | +} | |
105 | + | |
106 | +static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) | |
107 | +{ | |
108 | + timer->start_site = NULL; | |
109 | +} | |
110 | +#else | |
111 | +static inline void init_timer_stats(void) | |
112 | +{ | |
113 | +} | |
114 | + | |
115 | +static inline void timer_stats_account_timer(struct timer_list *timer) | |
116 | +{ | |
117 | +} | |
118 | + | |
119 | +static inline void timer_stats_timer_set_start_info(struct timer_list *timer) | |
120 | +{ | |
121 | +} | |
122 | + | |
123 | +static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) | |
124 | +{ | |
125 | +} | |
126 | +#endif | |
127 | + | |
128 | +extern void delayed_work_timer_fn(unsigned long __data); | |
75 | 129 | |
76 | 130 | /** |
77 | 131 | * add_timer - start a timer |
kernel/hrtimer.c
... | ... | @@ -585,6 +585,18 @@ |
585 | 585 | |
586 | 586 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
587 | 587 | |
588 | +#ifdef CONFIG_TIMER_STATS | |
589 | +void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) | |
590 | +{ | |
591 | + if (timer->start_site) | |
592 | + return; | |
593 | + | |
594 | + timer->start_site = addr; | |
595 | + memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | |
596 | + timer->start_pid = current->pid; | |
597 | +} | |
598 | +#endif | |
599 | + | |
588 | 600 | /* |
589 | 601 | * Counterpart to lock_timer_base above: |
590 | 602 | */ |
... | ... | @@ -743,6 +755,7 @@ |
743 | 755 | * reprogramming happens in the interrupt handler. This is a |
744 | 756 | * rare case and less expensive than a smp call. |
745 | 757 | */ |
758 | + timer_stats_hrtimer_clear_start_info(timer); | |
746 | 759 | reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); |
747 | 760 | __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, |
748 | 761 | reprogram); |
... | ... | @@ -791,6 +804,8 @@ |
791 | 804 | } |
792 | 805 | timer->expires = tim; |
793 | 806 | |
807 | + timer_stats_hrtimer_set_start_info(timer); | |
808 | + | |
794 | 809 | enqueue_hrtimer(timer, new_base, base == new_base); |
795 | 810 | |
796 | 811 | unlock_hrtimer_base(timer, &flags); |
... | ... | @@ -925,6 +940,12 @@ |
925 | 940 | |
926 | 941 | timer->base = &cpu_base->clock_base[clock_id]; |
927 | 942 | hrtimer_init_timer_hres(timer); |
943 | + | |
944 | +#ifdef CONFIG_TIMER_STATS | |
945 | + timer->start_site = NULL; | |
946 | + timer->start_pid = -1; | |
947 | + memset(timer->start_comm, 0, TASK_COMM_LEN); | |
948 | +#endif | |
928 | 949 | } |
929 | 950 | EXPORT_SYMBOL_GPL(hrtimer_init); |
930 | 951 | |
... | ... | @@ -1006,6 +1027,7 @@ |
1006 | 1027 | |
1007 | 1028 | __remove_hrtimer(timer, base, |
1008 | 1029 | HRTIMER_STATE_CALLBACK, 0); |
1030 | + timer_stats_account_hrtimer(timer); | |
1009 | 1031 | |
1010 | 1032 | /* |
1011 | 1033 | * Note: We clear the CALLBACK bit after |
... | ... | @@ -1050,6 +1072,8 @@ |
1050 | 1072 | timer = list_entry(cpu_base->cb_pending.next, |
1051 | 1073 | struct hrtimer, cb_entry); |
1052 | 1074 | |
1075 | + timer_stats_account_hrtimer(timer); | |
1076 | + | |
1053 | 1077 | fn = timer->function; |
1054 | 1078 | __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); |
1055 | 1079 | spin_unlock_irq(&cpu_base->lock); |
... | ... | @@ -1105,6 +1129,8 @@ |
1105 | 1129 | timer = rb_entry(node, struct hrtimer, node); |
1106 | 1130 | if (base->softirq_time.tv64 <= timer->expires.tv64) |
1107 | 1131 | break; |
1132 | + | |
1133 | + timer_stats_account_hrtimer(timer); | |
1108 | 1134 | |
1109 | 1135 | fn = timer->function; |
1110 | 1136 | __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); |
kernel/time/Makefile
kernel/time/timer_stats.c
1 | +/* | |
2 | + * kernel/time/timer_stats.c | |
3 | + * | |
4 | + * Collect timer usage statistics. | |
5 | + * | |
6 | + * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar | |
7 | + * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
8 | + * | |
9 | + * timer_stats is based on timer_top, a similar functionality which was part of | |
10 | + * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the | |
11 | + * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based | |
12 | + * on dynamic allocation of the statistics entries and linear search based | |
13 | + * lookup combined with a global lock, rather than the static array, hash | |
14 | + * and per-CPU locking which is used by timer_stats. It was written for the | |
15 | + * pre hrtimer kernel code and therefore did not take hrtimers into account. | |
16 | + * Nevertheless it provided the base for the timer_stats implementation and | |
17 | + * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks | |
18 | + * for this effort. | |
19 | + * | |
20 | + * timer_top.c is | |
21 | + * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus | |
22 | + * Written by Daniel Petrini <d.pensator@gmail.com> | |
23 | + * timer_top.c was released under the GNU General Public License version 2 | |
24 | + * | |
25 | + * We export the addresses and counting of timer functions being called, | |
26 | + * the pid and cmdline from the owner process if applicable. | |
27 | + * | |
28 | + * Start/stop data collection: | |
29 | + * # echo 1[0] >/proc/timer_stats | |
30 | + * | |
31 | + * Display the information collected so far: | |
32 | + * # cat /proc/timer_stats | |
33 | + * | |
34 | + * This program is free software; you can redistribute it and/or modify | |
35 | + * it under the terms of the GNU General Public License version 2 as | |
36 | + * published by the Free Software Foundation. | |
37 | + */ | |
38 | + | |
39 | +#include <linux/proc_fs.h> | |
40 | +#include <linux/module.h> | |
41 | +#include <linux/spinlock.h> | |
42 | +#include <linux/sched.h> | |
43 | +#include <linux/seq_file.h> | |
44 | +#include <linux/kallsyms.h> | |
45 | + | |
46 | +#include <asm/uaccess.h> | |
47 | + | |
48 | +/* | |
49 | + * This is our basic unit of interest: a timer expiry event identified | |
50 | + * by the timer, its start/expire functions and the PID of the task that | |
51 | + * started the timer. We count the number of times an event happens: | |
52 | + */ | |
53 | +struct entry { | |
54 | + /* | |
55 | + * Hash list: | |
56 | + */ | |
57 | + struct entry *next; | |
58 | + | |
59 | + /* | |
60 | + * Hash keys: | |
61 | + */ | |
62 | + void *timer; | |
63 | + void *start_func; | |
64 | + void *expire_func; | |
65 | + pid_t pid; | |
66 | + | |
67 | + /* | |
68 | + * Number of timeout events: | |
69 | + */ | |
70 | + unsigned long count; | |
71 | + | |
72 | + /* | |
73 | + * We save the command-line string to preserve | |
74 | + * this information past task exit: | |
75 | + */ | |
76 | + char comm[TASK_COMM_LEN + 1]; | |
77 | + | |
78 | +} ____cacheline_aligned_in_smp; | |
79 | + | |
80 | +/* | |
81 | + * Spinlock protecting the tables - not taken during lookup: | |
82 | + */ | |
83 | +static DEFINE_SPINLOCK(table_lock); | |
84 | + | |
85 | +/* | |
86 | + * Per-CPU lookup locks for fast hash lookup: | |
87 | + */ | |
88 | +static DEFINE_PER_CPU(spinlock_t, lookup_lock); | |
89 | + | |
90 | +/* | |
91 | + * Mutex to serialize state changes with show-stats activities: | |
92 | + */ | |
93 | +static DEFINE_MUTEX(show_mutex); | |
94 | + | |
95 | +/* | |
96 | + * Collection status, active/inactive: | |
97 | + */ | |
98 | +static int __read_mostly active; | |
99 | + | |
100 | +/* | |
101 | + * Beginning/end timestamps of measurement: | |
102 | + */ | |
103 | +static ktime_t time_start, time_stop; | |
104 | + | |
105 | +/* | |
106 | + * tstat entry structs only get allocated while collection is | |
107 | + * active and never freed during that time - this simplifies | |
108 | + * things quite a bit. | |
109 | + * | |
110 | + * They get freed when a new collection period is started. | |
111 | + */ | |
112 | +#define MAX_ENTRIES_BITS 10 | |
113 | +#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) | |
114 | + | |
115 | +static unsigned long nr_entries; | |
116 | +static struct entry entries[MAX_ENTRIES]; | |
117 | + | |
118 | +static atomic_t overflow_count; | |
119 | + | |
120 | +static void reset_entries(void) | |
121 | +{ | |
122 | + nr_entries = 0; | |
123 | + memset(entries, 0, sizeof(entries)); | |
124 | + atomic_set(&overflow_count, 0); | |
125 | +} | |
126 | + | |
127 | +static struct entry *alloc_entry(void) | |
128 | +{ | |
129 | + if (nr_entries >= MAX_ENTRIES) | |
130 | + return NULL; | |
131 | + | |
132 | + return entries + nr_entries++; | |
133 | +} | |
134 | + | |
135 | +/* | |
136 | + * The entries are in a hash-table, for fast lookup: | |
137 | + */ | |
138 | +#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) | |
139 | +#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) | |
140 | +#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) | |
141 | + | |
142 | +#define __tstat_hashfn(entry) \ | |
143 | + (((unsigned long)(entry)->timer ^ \ | |
144 | + (unsigned long)(entry)->start_func ^ \ | |
145 | + (unsigned long)(entry)->expire_func ^ \ | |
146 | + (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) | |
147 | + | |
148 | +#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) | |
149 | + | |
150 | +static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; | |
151 | + | |
152 | +static int match_entries(struct entry *entry1, struct entry *entry2) | |
153 | +{ | |
154 | + return entry1->timer == entry2->timer && | |
155 | + entry1->start_func == entry2->start_func && | |
156 | + entry1->expire_func == entry2->expire_func && | |
157 | + entry1->pid == entry2->pid; | |
158 | +} | |
159 | + | |
160 | +/* | |
161 | + * Look up whether an entry matching this item is present | |
162 | + * in the hash already. Must be called with irqs off and the | |
163 | + * lookup lock held: | |
164 | + */ | |
165 | +static struct entry *tstat_lookup(struct entry *entry, char *comm) | |
166 | +{ | |
167 | + struct entry **head, *curr, *prev; | |
168 | + | |
169 | + head = tstat_hashentry(entry); | |
170 | + curr = *head; | |
171 | + | |
172 | + /* | |
173 | + * The fastpath is when the entry is already hashed, | |
174 | + * we do this with the lookup lock held, but with the | |
175 | + * table lock not held: | |
176 | + */ | |
177 | + while (curr) { | |
178 | + if (match_entries(curr, entry)) | |
179 | + return curr; | |
180 | + | |
181 | + curr = curr->next; | |
182 | + } | |
183 | + /* | |
184 | + * Slowpath: allocate, set up and link a new hash entry: | |
185 | + */ | |
186 | + prev = NULL; | |
187 | + curr = *head; | |
188 | + | |
189 | + spin_lock(&table_lock); | |
190 | + /* | |
191 | + * Make sure we have not raced with another CPU: | |
192 | + */ | |
193 | + while (curr) { | |
194 | + if (match_entries(curr, entry)) | |
195 | + goto out_unlock; | |
196 | + | |
197 | + prev = curr; | |
198 | + curr = curr->next; | |
199 | + } | |
200 | + | |
201 | + curr = alloc_entry(); | |
202 | + if (curr) { | |
203 | + *curr = *entry; | |
204 | + curr->count = 0; | |
205 | + memcpy(curr->comm, comm, TASK_COMM_LEN); | |
206 | + if (prev) | |
207 | + prev->next = curr; | |
208 | + else | |
209 | + *head = curr; | |
210 | + curr->next = NULL; | |
211 | + } | |
212 | + out_unlock: | |
213 | + spin_unlock(&table_lock); | |
214 | + | |
215 | + return curr; | |
216 | +} | |
217 | + | |
218 | +/** | |
219 | + * timer_stats_update_stats - Update the statistics for a timer. | |
220 | + * @timer: pointer to either a timer_list or a hrtimer | |
221 | + * @pid: the pid of the task which set up the timer | |
222 | + * @startf: pointer to the function which did the timer setup | |
223 | + * @timerf: pointer to the timer callback function of the timer | |
224 | + * @comm: name of the process which set up the timer | |
225 | + * | |
226 | + * When the timer is already registered, then the event counter is | |
227 | + * incremented. Otherwise the timer is registered in a free slot. | |
228 | + */ | |
229 | +void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |
230 | + void *timerf, char * comm) | |
231 | +{ | |
232 | + /* | |
233 | + * It doesnt matter which lock we take: | |
234 | + */ | |
235 | + spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id()); | |
236 | + struct entry *entry, input; | |
237 | + unsigned long flags; | |
238 | + | |
239 | + input.timer = timer; | |
240 | + input.start_func = startf; | |
241 | + input.expire_func = timerf; | |
242 | + input.pid = pid; | |
243 | + | |
244 | + spin_lock_irqsave(lock, flags); | |
245 | + if (!active) | |
246 | + goto out_unlock; | |
247 | + | |
248 | + entry = tstat_lookup(&input, comm); | |
249 | + if (likely(entry)) | |
250 | + entry->count++; | |
251 | + else | |
252 | + atomic_inc(&overflow_count); | |
253 | + | |
254 | + out_unlock: | |
255 | + spin_unlock_irqrestore(lock, flags); | |
256 | +} | |
257 | + | |
258 | +static void print_name_offset(struct seq_file *m, unsigned long addr) | |
259 | +{ | |
260 | + char namebuf[KSYM_NAME_LEN+1]; | |
261 | + unsigned long size, offset; | |
262 | + const char *sym_name; | |
263 | + char *modname; | |
264 | + | |
265 | + sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf); | |
266 | + if (sym_name) | |
267 | + seq_printf(m, "%s", sym_name); | |
268 | + else | |
269 | + seq_printf(m, "<%p>", (void *)addr); | |
270 | +} | |
271 | + | |
272 | +static int tstats_show(struct seq_file *m, void *v) | |
273 | +{ | |
274 | + struct timespec period; | |
275 | + struct entry *entry; | |
276 | + unsigned long ms; | |
277 | + long events = 0; | |
278 | + ktime_t time; | |
279 | + int i; | |
280 | + | |
281 | + mutex_lock(&show_mutex); | |
282 | + /* | |
283 | + * If still active then calculate up to now: | |
284 | + */ | |
285 | + if (active) | |
286 | + time_stop = ktime_get(); | |
287 | + | |
288 | + time = ktime_sub(time_stop, time_start); | |
289 | + | |
290 | + period = ktime_to_timespec(time); | |
291 | + ms = period.tv_nsec / 1000000; | |
292 | + | |
293 | + seq_puts(m, "Timer Stats Version: v0.1\n"); | |
294 | + seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); | |
295 | + if (atomic_read(&overflow_count)) | |
296 | + seq_printf(m, "Overflow: %d entries\n", | |
297 | + atomic_read(&overflow_count)); | |
298 | + | |
299 | + for (i = 0; i < nr_entries; i++) { | |
300 | + entry = entries + i; | |
301 | + seq_printf(m, "%4lu, %5d %-16s ", | |
302 | + entry->count, entry->pid, entry->comm); | |
303 | + | |
304 | + print_name_offset(m, (unsigned long)entry->start_func); | |
305 | + seq_puts(m, " ("); | |
306 | + print_name_offset(m, (unsigned long)entry->expire_func); | |
307 | + seq_puts(m, ")\n"); | |
308 | + | |
309 | + events += entry->count; | |
310 | + } | |
311 | + | |
312 | + ms += period.tv_sec * 1000; | |
313 | + if (!ms) | |
314 | + ms = 1; | |
315 | + | |
316 | + if (events && period.tv_sec) | |
317 | + seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events, | |
318 | + events / period.tv_sec, events * 1000 / ms); | |
319 | + else | |
320 | + seq_printf(m, "%ld total events\n", events); | |
321 | + | |
322 | + mutex_unlock(&show_mutex); | |
323 | + | |
324 | + return 0; | |
325 | +} | |
326 | + | |
327 | +/* | |
328 | + * After a state change, make sure all concurrent lookup/update | |
329 | + * activities have stopped: | |
330 | + */ | |
331 | +static void sync_access(void) | |
332 | +{ | |
333 | + unsigned long flags; | |
334 | + int cpu; | |
335 | + | |
336 | + for_each_online_cpu(cpu) { | |
337 | + spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); | |
338 | + /* nothing */ | |
339 | + spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); | |
340 | + } | |
341 | +} | |
342 | + | |
343 | +static ssize_t tstats_write(struct file *file, const char __user *buf, | |
344 | + size_t count, loff_t *offs) | |
345 | +{ | |
346 | + char ctl[2]; | |
347 | + | |
348 | + if (count != 2 || *offs) | |
349 | + return -EINVAL; | |
350 | + | |
351 | + if (copy_from_user(ctl, buf, count)) | |
352 | + return -EFAULT; | |
353 | + | |
354 | + mutex_lock(&show_mutex); | |
355 | + switch (ctl[0]) { | |
356 | + case '0': | |
357 | + if (active) { | |
358 | + active = 0; | |
359 | + time_stop = ktime_get(); | |
360 | + sync_access(); | |
361 | + } | |
362 | + break; | |
363 | + case '1': | |
364 | + if (!active) { | |
365 | + reset_entries(); | |
366 | + time_start = ktime_get(); | |
367 | + active = 1; | |
368 | + } | |
369 | + break; | |
370 | + default: | |
371 | + count = -EINVAL; | |
372 | + } | |
373 | + mutex_unlock(&show_mutex); | |
374 | + | |
375 | + return count; | |
376 | +} | |
377 | + | |
378 | +static int tstats_open(struct inode *inode, struct file *filp) | |
379 | +{ | |
380 | + return single_open(filp, tstats_show, NULL); | |
381 | +} | |
382 | + | |
383 | +static struct file_operations tstats_fops = { | |
384 | + .open = tstats_open, | |
385 | + .read = seq_read, | |
386 | + .write = tstats_write, | |
387 | + .llseek = seq_lseek, | |
388 | + .release = seq_release, | |
389 | +}; | |
390 | + | |
391 | +void __init init_timer_stats(void) | |
392 | +{ | |
393 | + int cpu; | |
394 | + | |
395 | + for_each_possible_cpu(cpu) | |
396 | + spin_lock_init(&per_cpu(lookup_lock, cpu)); | |
397 | +} | |
398 | + | |
399 | +static int __init init_tstats_procfs(void) | |
400 | +{ | |
401 | + struct proc_dir_entry *pe; | |
402 | + | |
403 | + pe = create_proc_entry("timer_stats", 0644, NULL); | |
404 | + if (!pe) | |
405 | + return -ENOMEM; | |
406 | + | |
407 | + pe->proc_fops = &tstats_fops; | |
408 | + | |
409 | + return 0; | |
410 | +} | |
411 | +__initcall(init_tstats_procfs); |
kernel/timer.c
... | ... | @@ -35,6 +35,7 @@ |
35 | 35 | #include <linux/syscalls.h> |
36 | 36 | #include <linux/delay.h> |
37 | 37 | #include <linux/tick.h> |
38 | +#include <linux/kallsyms.h> | |
38 | 39 | |
39 | 40 | #include <asm/uaccess.h> |
40 | 41 | #include <asm/unistd.h> |
... | ... | @@ -263,6 +264,18 @@ |
263 | 264 | list_add_tail(&timer->entry, vec); |
264 | 265 | } |
265 | 266 | |
267 | +#ifdef CONFIG_TIMER_STATS | |
268 | +void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | |
269 | +{ | |
270 | + if (timer->start_site) | |
271 | + return; | |
272 | + | |
273 | + timer->start_site = addr; | |
274 | + memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | |
275 | + timer->start_pid = current->pid; | |
276 | +} | |
277 | +#endif | |
278 | + | |
266 | 279 | /** |
267 | 280 | * init_timer - initialize a timer. |
268 | 281 | * @timer: the timer to be initialized |
269 | 282 | |
... | ... | @@ -274,11 +287,16 @@ |
274 | 287 | { |
275 | 288 | timer->entry.next = NULL; |
276 | 289 | timer->base = __raw_get_cpu_var(tvec_bases); |
290 | +#ifdef CONFIG_TIMER_STATS | |
291 | + timer->start_site = NULL; | |
292 | + timer->start_pid = -1; | |
293 | + memset(timer->start_comm, 0, TASK_COMM_LEN); | |
294 | +#endif | |
277 | 295 | } |
278 | 296 | EXPORT_SYMBOL(init_timer); |
279 | 297 | |
280 | 298 | static inline void detach_timer(struct timer_list *timer, |
281 | - int clear_pending) | |
299 | + int clear_pending) | |
282 | 300 | { |
283 | 301 | struct list_head *entry = &timer->entry; |
284 | 302 | |
... | ... | @@ -325,6 +343,7 @@ |
325 | 343 | unsigned long flags; |
326 | 344 | int ret = 0; |
327 | 345 | |
346 | + timer_stats_timer_set_start_info(timer); | |
328 | 347 | BUG_ON(!timer->function); |
329 | 348 | |
330 | 349 | base = lock_timer_base(timer, &flags); |
... | ... | @@ -375,6 +394,7 @@ |
375 | 394 | tvec_base_t *base = per_cpu(tvec_bases, cpu); |
376 | 395 | unsigned long flags; |
377 | 396 | |
397 | + timer_stats_timer_set_start_info(timer); | |
378 | 398 | BUG_ON(timer_pending(timer) || !timer->function); |
379 | 399 | spin_lock_irqsave(&base->lock, flags); |
380 | 400 | timer->base = base; |
... | ... | @@ -407,6 +427,7 @@ |
407 | 427 | { |
408 | 428 | BUG_ON(!timer->function); |
409 | 429 | |
430 | + timer_stats_timer_set_start_info(timer); | |
410 | 431 | /* |
411 | 432 | * This is a common optimization triggered by the |
412 | 433 | * networking code - if the timer is re-modified |
... | ... | @@ -437,6 +458,7 @@ |
437 | 458 | unsigned long flags; |
438 | 459 | int ret = 0; |
439 | 460 | |
461 | + timer_stats_timer_clear_start_info(timer); | |
440 | 462 | if (timer_pending(timer)) { |
441 | 463 | base = lock_timer_base(timer, &flags); |
442 | 464 | if (timer_pending(timer)) { |
... | ... | @@ -570,6 +592,8 @@ |
570 | 592 | fn = timer->function; |
571 | 593 | data = timer->data; |
572 | 594 | |
595 | + timer_stats_account_timer(timer); | |
596 | + | |
573 | 597 | set_running_timer(base, timer); |
574 | 598 | detach_timer(timer, 1); |
575 | 599 | spin_unlock_irq(&base->lock); |
... | ... | @@ -1229,7 +1253,8 @@ |
1229 | 1253 | { |
1230 | 1254 | tvec_base_t *base = __get_cpu_var(tvec_bases); |
1231 | 1255 | |
1232 | - hrtimer_run_queues(); | |
1256 | + hrtimer_run_queues(); | |
1257 | + | |
1233 | 1258 | if (time_after_eq(jiffies, base->timer_jiffies)) |
1234 | 1259 | __run_timers(base); |
1235 | 1260 | } |
... | ... | @@ -1674,6 +1699,8 @@ |
1674 | 1699 | { |
1675 | 1700 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, |
1676 | 1701 | (void *)(long)smp_processor_id()); |
1702 | + | |
1703 | + init_timer_stats(); | |
1677 | 1704 | |
1678 | 1705 | BUG_ON(err == NOTIFY_BAD); |
1679 | 1706 | register_cpu_notifier(&timers_nb); |
kernel/workqueue.c
... | ... | @@ -218,7 +218,7 @@ |
218 | 218 | } |
219 | 219 | EXPORT_SYMBOL_GPL(queue_work); |
220 | 220 | |
221 | -static void delayed_work_timer_fn(unsigned long __data) | |
221 | +void delayed_work_timer_fn(unsigned long __data) | |
222 | 222 | { |
223 | 223 | struct delayed_work *dwork = (struct delayed_work *)__data; |
224 | 224 | struct workqueue_struct *wq = get_wq_data(&dwork->work); |
... | ... | @@ -245,6 +245,7 @@ |
245 | 245 | struct timer_list *timer = &dwork->timer; |
246 | 246 | struct work_struct *work = &dwork->work; |
247 | 247 | |
248 | + timer_stats_timer_set_start_info(timer); | |
248 | 249 | if (delay == 0) |
249 | 250 | return queue_work(wq, work); |
250 | 251 | |
251 | 252 | |
... | ... | @@ -593,8 +594,10 @@ |
593 | 594 | * After waiting for a given time this puts a job in the kernel-global |
594 | 595 | * workqueue. |
595 | 596 | */ |
596 | -int fastcall schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) | |
597 | +int fastcall schedule_delayed_work(struct delayed_work *dwork, | |
598 | + unsigned long delay) | |
597 | 599 | { |
600 | + timer_stats_timer_set_start_info(&dwork->timer); | |
598 | 601 | return queue_delayed_work(keventd_wq, dwork, delay); |
599 | 602 | } |
600 | 603 | EXPORT_SYMBOL(schedule_delayed_work); |
lib/Kconfig.debug
... | ... | @@ -134,6 +134,17 @@ |
134 | 134 | application, you can say N to avoid the very slight overhead |
135 | 135 | this adds. |
136 | 136 | |
137 | +config TIMER_STATS | |
138 | + bool "Collect kernel timers statistics" | |
139 | + depends on DEBUG_KERNEL && PROC_FS | |
140 | + help | |
141 | + If you say Y here, additional code will be inserted into the | |
142 | + timer routines to collect statistics about kernel timers being | |
143 | + reprogrammed. The statistics can be read from /proc/timer_stats. | |
144 | + The statistics collection is started by writing 1 to /proc/timer_stats, | |
145 | + writing 0 stops it. This feature is useful to collect information | |
146 | + about timer usage patterns in kernel and userspace. | |
147 | + | |
137 | 148 | config DEBUG_SLAB |
138 | 149 | bool "Debug slab memory allocations" |
139 | 150 | depends on DEBUG_KERNEL && SLAB |