Commit 14131f2f98ac350ee9e73faed916d2238a8b6a0d
1 parent
6409c4da28
Exists in
master
and in
39 other branches
tracing: implement trace_clock_*() APIs
Impact: implement new tracing timestamp APIs Add three trace clock variants, with differing scalability/precision tradeoffs: - local: CPU-local trace clock - medium: scalable global clock with some jitter - global: globally monotonic, serialized clock Make the ring-buffer use the local trace clock internally. Acked-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 4 changed files with 123 additions and 3 deletions Side-by-side Diff
include/linux/trace_clock.h
1 | +#ifndef _LINUX_TRACE_CLOCK_H | |
2 | +#define _LINUX_TRACE_CLOCK_H | |
3 | + | |
4 | +/* | |
5 | + * 3 trace clock variants, with differing scalability/precision | |
6 | + * tradeoffs: | |
7 | + * | |
8 | + * - local: CPU-local trace clock | |
9 | + * - medium: scalable global clock with some jitter | |
10 | + * - global: globally monotonic, serialized clock | |
11 | + */ | |
12 | +#include <linux/compiler.h> | |
13 | +#include <linux/types.h> | |
14 | + | |
15 | +extern u64 notrace trace_clock_local(void); | |
16 | +extern u64 notrace trace_clock(void); | |
17 | +extern u64 notrace trace_clock_global(void); | |
18 | + | |
19 | +#endif /* _LINUX_TRACE_CLOCK_H */ |
kernel/trace/Makefile
... | ... | @@ -19,6 +19,7 @@ |
19 | 19 | obj-$(CONFIG_RING_BUFFER) += ring_buffer.o |
20 | 20 | |
21 | 21 | obj-$(CONFIG_TRACING) += trace.o |
22 | +obj-$(CONFIG_TRACING) += trace_clock.o | |
22 | 23 | obj-$(CONFIG_TRACING) += trace_output.o |
23 | 24 | obj-$(CONFIG_TRACING) += trace_stat.o |
24 | 25 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o |
kernel/trace/ring_buffer.c
... | ... | @@ -4,6 +4,7 @@ |
4 | 4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> |
5 | 5 | */ |
6 | 6 | #include <linux/ring_buffer.h> |
7 | +#include <linux/trace_clock.h> | |
7 | 8 | #include <linux/ftrace_irq.h> |
8 | 9 | #include <linux/spinlock.h> |
9 | 10 | #include <linux/debugfs.h> |
... | ... | @@ -12,7 +13,6 @@ |
12 | 13 | #include <linux/module.h> |
13 | 14 | #include <linux/percpu.h> |
14 | 15 | #include <linux/mutex.h> |
15 | -#include <linux/sched.h> /* used for sched_clock() (for now) */ | |
16 | 16 | #include <linux/init.h> |
17 | 17 | #include <linux/hash.h> |
18 | 18 | #include <linux/list.h> |
19 | 19 | |
... | ... | @@ -112,14 +112,13 @@ |
112 | 112 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
113 | 113 | #define DEBUG_SHIFT 0 |
114 | 114 | |
115 | -/* FIXME!!! */ | |
116 | 115 | u64 ring_buffer_time_stamp(int cpu) |
117 | 116 | { |
118 | 117 | u64 time; |
119 | 118 | |
120 | 119 | preempt_disable_notrace(); |
121 | 120 | /* shift to debug/test normalization and TIME_EXTENTS */ |
122 | - time = sched_clock() << DEBUG_SHIFT; | |
121 | + time = trace_clock_local() << DEBUG_SHIFT; | |
123 | 122 | preempt_enable_no_resched_notrace(); |
124 | 123 | |
125 | 124 | return time; |
kernel/trace/trace_clock.c
1 | +/* | |
2 | + * tracing clocks | |
3 | + * | |
4 | + * Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | |
5 | + * | |
6 | + * Implements 3 trace clock variants, with differing scalability/precision | |
7 | + * tradeoffs: | |
8 | + * | |
9 | + * - local: CPU-local trace clock | |
10 | + * - medium: scalable global clock with some jitter | |
11 | + * - global: globally monotonic, serialized clock | |
12 | + * | |
13 | + * Tracer plugins will chose a default from these clocks. | |
14 | + */ | |
15 | +#include <linux/spinlock.h> | |
16 | +#include <linux/hardirq.h> | |
17 | +#include <linux/module.h> | |
18 | +#include <linux/percpu.h> | |
19 | +#include <linux/sched.h> | |
20 | +#include <linux/ktime.h> | |
21 | + | |
22 | +/* | |
23 | + * trace_clock_local(): the simplest and least coherent tracing clock. | |
24 | + * | |
25 | + * Useful for tracing that does not cross to other CPUs nor | |
26 | + * does it go through idle events. | |
27 | + */ | |
28 | +u64 notrace trace_clock_local(void) | |
29 | +{ | |
30 | + /* | |
31 | + * sched_clock() is an architecture implemented, fast, scalable, | |
32 | + * lockless clock. It is not guaranteed to be coherent across | |
33 | + * CPUs, nor across CPU idle events. | |
34 | + */ | |
35 | + return sched_clock(); | |
36 | +} | |
37 | + | |
38 | +/* | |
39 | + * trace_clock(): 'inbetween' trace clock. Not completely serialized, | |
40 | + * but not completely incorrect when crossing CPUs either. | |
41 | + * | |
42 | + * This is based on cpu_clock(), which will allow at most ~1 jiffy of | |
43 | + * jitter between CPUs. So it's a pretty scalable clock, but there | |
44 | + * can be offsets in the trace data. | |
45 | + */ | |
46 | +u64 notrace trace_clock(void) | |
47 | +{ | |
48 | + return cpu_clock(raw_smp_processor_id()); | |
49 | +} | |
50 | + | |
51 | + | |
52 | +/* | |
53 | + * trace_clock_global(): special globally coherent trace clock | |
54 | + * | |
55 | + * It has higher overhead than the other trace clocks but is still | |
56 | + * an order of magnitude faster than GTOD derived hardware clocks. | |
57 | + * | |
58 | + * Used by plugins that need globally coherent timestamps. | |
59 | + */ | |
60 | + | |
61 | +static u64 prev_trace_clock_time; | |
62 | + | |
63 | +static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = | |
64 | + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | |
65 | + | |
66 | +u64 notrace trace_clock_global(void) | |
67 | +{ | |
68 | + unsigned long flags; | |
69 | + int this_cpu; | |
70 | + u64 now; | |
71 | + | |
72 | + raw_local_irq_save(flags); | |
73 | + | |
74 | + this_cpu = raw_smp_processor_id(); | |
75 | + now = cpu_clock(this_cpu); | |
76 | + /* | |
77 | + * If in an NMI context then dont risk lockups and return the | |
78 | + * cpu_clock() time: | |
79 | + */ | |
80 | + if (unlikely(in_nmi())) | |
81 | + goto out; | |
82 | + | |
83 | + __raw_spin_lock(&trace_clock_lock); | |
84 | + | |
85 | + /* | |
86 | + * TODO: if this happens often then maybe we should reset | |
87 | + * my_scd->clock to prev_trace_clock_time+1, to make sure | |
88 | + * we start ticking with the local clock from now on? | |
89 | + */ | |
90 | + if ((s64)(now - prev_trace_clock_time) < 0) | |
91 | + now = prev_trace_clock_time + 1; | |
92 | + | |
93 | + prev_trace_clock_time = now; | |
94 | + | |
95 | + __raw_spin_unlock(&trace_clock_lock); | |
96 | + | |
97 | + out: | |
98 | + raw_local_irq_restore(flags); | |
99 | + | |
100 | + return now; | |
101 | +} |