Commit 14131f2f98ac350ee9e73faed916d2238a8b6a0d

Authored by Ingo Molnar
1 parent 6409c4da28

tracing: implement trace_clock_*() APIs

Impact: implement new tracing timestamp APIs

Add three trace clock variants, with differing scalability/precision
tradeoffs:

 -   local: CPU-local trace clock
 -  medium: scalable global clock with some jitter
 -  global: globally monotonic, serialized clock

Make the ring-buffer use the local trace clock internally.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 4 changed files with 123 additions and 3 deletions Side-by-side Diff

include/linux/trace_clock.h
  1 +#ifndef _LINUX_TRACE_CLOCK_H
  2 +#define _LINUX_TRACE_CLOCK_H
  3 +
  4 +/*
  5 + * 3 trace clock variants, with differing scalability/precision
  6 + * tradeoffs:
  7 + *
  8 + * - local: CPU-local trace clock
  9 + * - medium: scalable global clock with some jitter
  10 + * - global: globally monotonic, serialized clock
  11 + */
  12 +#include <linux/compiler.h>
  13 +#include <linux/types.h>
  14 +
  15 +extern u64 notrace trace_clock_local(void);
  16 +extern u64 notrace trace_clock(void);
  17 +extern u64 notrace trace_clock_global(void);
  18 +
  19 +#endif /* _LINUX_TRACE_CLOCK_H */
kernel/trace/Makefile
... ... @@ -19,6 +19,7 @@
19 19 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
20 20  
21 21 obj-$(CONFIG_TRACING) += trace.o
  22 +obj-$(CONFIG_TRACING) += trace_clock.o
22 23 obj-$(CONFIG_TRACING) += trace_output.o
23 24 obj-$(CONFIG_TRACING) += trace_stat.o
24 25 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
kernel/trace/ring_buffer.c
... ... @@ -4,6 +4,7 @@
4 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 5 */
6 6 #include <linux/ring_buffer.h>
  7 +#include <linux/trace_clock.h>
7 8 #include <linux/ftrace_irq.h>
8 9 #include <linux/spinlock.h>
9 10 #include <linux/debugfs.h>
... ... @@ -12,7 +13,6 @@
12 13 #include <linux/module.h>
13 14 #include <linux/percpu.h>
14 15 #include <linux/mutex.h>
15   -#include <linux/sched.h> /* used for sched_clock() (for now) */
16 16 #include <linux/init.h>
17 17 #include <linux/hash.h>
18 18 #include <linux/list.h>
19 19  
... ... @@ -112,14 +112,13 @@
112 112 /* Up this if you want to test the TIME_EXTENTS and normalization */
113 113 #define DEBUG_SHIFT 0
114 114  
115   -/* FIXME!!! */
116 115 u64 ring_buffer_time_stamp(int cpu)
117 116 {
118 117 u64 time;
119 118  
120 119 preempt_disable_notrace();
121 120 /* shift to debug/test normalization and TIME_EXTENTS */
122   - time = sched_clock() << DEBUG_SHIFT;
  121 + time = trace_clock_local() << DEBUG_SHIFT;
123 122 preempt_enable_no_resched_notrace();
124 123  
125 124 return time;
kernel/trace/trace_clock.c
  1 +/*
  2 + * tracing clocks
  3 + *
  4 + * Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  5 + *
  6 + * Implements 3 trace clock variants, with differing scalability/precision
  7 + * tradeoffs:
  8 + *
  9 + * - local: CPU-local trace clock
  10 + * - medium: scalable global clock with some jitter
  11 + * - global: globally monotonic, serialized clock
  12 + *
  13 + * Tracer plugins will chose a default from these clocks.
  14 + */
  15 +#include <linux/spinlock.h>
  16 +#include <linux/hardirq.h>
  17 +#include <linux/module.h>
  18 +#include <linux/percpu.h>
  19 +#include <linux/sched.h>
  20 +#include <linux/ktime.h>
  21 +
  22 +/*
  23 + * trace_clock_local(): the simplest and least coherent tracing clock.
  24 + *
  25 + * Useful for tracing that does not cross to other CPUs nor
  26 + * does it go through idle events.
  27 + */
  28 +u64 notrace trace_clock_local(void)
  29 +{
  30 + /*
  31 + * sched_clock() is an architecture implemented, fast, scalable,
  32 + * lockless clock. It is not guaranteed to be coherent across
  33 + * CPUs, nor across CPU idle events.
  34 + */
  35 + return sched_clock();
  36 +}
  37 +
  38 +/*
  39 + * trace_clock(): 'inbetween' trace clock. Not completely serialized,
  40 + * but not completely incorrect when crossing CPUs either.
  41 + *
  42 + * This is based on cpu_clock(), which will allow at most ~1 jiffy of
  43 + * jitter between CPUs. So it's a pretty scalable clock, but there
  44 + * can be offsets in the trace data.
  45 + */
  46 +u64 notrace trace_clock(void)
  47 +{
  48 + return cpu_clock(raw_smp_processor_id());
  49 +}
  50 +
  51 +
  52 +/*
  53 + * trace_clock_global(): special globally coherent trace clock
  54 + *
  55 + * It has higher overhead than the other trace clocks but is still
  56 + * an order of magnitude faster than GTOD derived hardware clocks.
  57 + *
  58 + * Used by plugins that need globally coherent timestamps.
  59 + */
  60 +
  61 +static u64 prev_trace_clock_time;
  62 +
  63 +static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
  64 + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
  65 +
  66 +u64 notrace trace_clock_global(void)
  67 +{
  68 + unsigned long flags;
  69 + int this_cpu;
  70 + u64 now;
  71 +
  72 + raw_local_irq_save(flags);
  73 +
  74 + this_cpu = raw_smp_processor_id();
  75 + now = cpu_clock(this_cpu);
  76 + /*
  77 + * If in an NMI context then dont risk lockups and return the
  78 + * cpu_clock() time:
  79 + */
  80 + if (unlikely(in_nmi()))
  81 + goto out;
  82 +
  83 + __raw_spin_lock(&trace_clock_lock);
  84 +
  85 + /*
  86 + * TODO: if this happens often then maybe we should reset
  87 + * my_scd->clock to prev_trace_clock_time+1, to make sure
  88 + * we start ticking with the local clock from now on?
  89 + */
  90 + if ((s64)(now - prev_trace_clock_time) < 0)
  91 + now = prev_trace_clock_time + 1;
  92 +
  93 + prev_trace_clock_time = now;
  94 +
  95 + __raw_spin_unlock(&trace_clock_lock);
  96 +
  97 + out:
  98 + raw_local_irq_restore(flags);
  99 +
  100 + return now;
  101 +}