Commit ec1bb60bbff0386c3ec25360e7a8c72f467a6ff1
Exists in
master
and in
7 other branches
Merge branch 'tracing/sysprof' into auto-ftrace-next
Showing 6 changed files Side-by-side Diff
kernel/trace/Kconfig
... | ... | @@ -75,6 +75,14 @@ |
75 | 75 | enabled. This option and the irqs-off timing option can be |
76 | 76 | used together or separately.) |
77 | 77 | |
78 | +config SYSPROF_TRACER | |
79 | + bool "Sysprof Tracer" | |
80 | + depends on X86 | |
81 | + select TRACING | |
82 | + help | |
83 | + This tracer provides the trace needed by the 'Sysprof' userspace | |
84 | + tool. | |
85 | + | |
78 | 86 | config SCHED_TRACER |
79 | 87 | bool "Scheduling Latency Tracer" |
80 | 88 | depends on HAVE_FTRACE |
kernel/trace/Makefile
... | ... | @@ -14,6 +14,7 @@ |
14 | 14 | |
15 | 15 | obj-$(CONFIG_TRACING) += trace.o |
16 | 16 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o |
17 | +obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o | |
17 | 18 | obj-$(CONFIG_FTRACE) += trace_functions.o |
18 | 19 | obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o |
19 | 20 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o |
kernel/trace/trace.c
... | ... | @@ -2913,6 +2913,9 @@ |
2913 | 2913 | pr_warning("Could not create debugfs " |
2914 | 2914 | "'dyn_ftrace_total_info' entry\n"); |
2915 | 2915 | #endif |
2916 | +#ifdef CONFIG_SYSPROF_TRACER | |
2917 | + init_tracer_sysprof_debugfs(d_tracer); | |
2918 | +#endif | |
2916 | 2919 | } |
2917 | 2920 | |
2918 | 2921 | static int trace_alloc_page(void) |
kernel/trace/trace.h
... | ... | @@ -193,6 +193,8 @@ |
193 | 193 | void tracing_reset(struct trace_array_cpu *data); |
194 | 194 | int tracing_open_generic(struct inode *inode, struct file *filp); |
195 | 195 | struct dentry *tracing_init_dentry(void); |
196 | +void init_tracer_sysprof_debugfs(struct dentry *d_tracer); | |
197 | + | |
196 | 198 | void ftrace(struct trace_array *tr, |
197 | 199 | struct trace_array_cpu *data, |
198 | 200 | unsigned long ip, |
... | ... | @@ -292,6 +294,10 @@ |
292 | 294 | #endif |
293 | 295 | #ifdef CONFIG_CONTEXT_SWITCH_TRACER |
294 | 296 | extern int trace_selftest_startup_sched_switch(struct tracer *trace, |
297 | + struct trace_array *tr); | |
298 | +#endif | |
299 | +#ifdef CONFIG_SYSPROF_TRACER | |
300 | +extern int trace_selftest_startup_sysprof(struct tracer *trace, | |
295 | 301 | struct trace_array *tr); |
296 | 302 | #endif |
297 | 303 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
kernel/trace/trace_selftest.c
... | ... | @@ -538,4 +538,27 @@ |
538 | 538 | return ret; |
539 | 539 | } |
540 | 540 | #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ |
541 | + | |
542 | +#ifdef CONFIG_SYSPROF_TRACER | |
543 | +int | |
544 | +trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) | |
545 | +{ | |
546 | + unsigned long count; | |
547 | + int ret; | |
548 | + | |
549 | + /* start the tracing */ | |
550 | + tr->ctrl = 1; | |
551 | + trace->init(tr); | |
552 | + /* Sleep for a 1/10 of a second */ | |
553 | + msleep(100); | |
554 | + /* stop the tracing. */ | |
555 | + tr->ctrl = 0; | |
556 | + trace->ctrl_update(tr); | |
557 | + /* check the trace buffer */ | |
558 | + ret = trace_test_buffer(tr, &count); | |
559 | + trace->reset(tr); | |
560 | + | |
561 | + return ret; | |
562 | +} | |
563 | +#endif /* CONFIG_SYSPROF_TRACER */ |
kernel/trace/trace_sysprof.c
1 | +/* | |
2 | + * trace stack traces | |
3 | + * | |
4 | + * Copyright (C) 2004-2008, Soeren Sandmann | |
5 | + * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com> | |
6 | + * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | |
7 | + */ | |
8 | +#include <linux/kallsyms.h> | |
9 | +#include <linux/debugfs.h> | |
10 | +#include <linux/hrtimer.h> | |
11 | +#include <linux/uaccess.h> | |
12 | +#include <linux/ftrace.h> | |
13 | +#include <linux/module.h> | |
14 | +#include <linux/irq.h> | |
15 | +#include <linux/fs.h> | |
16 | + | |
17 | +#include <asm/stacktrace.h> | |
18 | + | |
19 | +#include "trace.h" | |
20 | + | |
21 | +static struct trace_array *sysprof_trace; | |
22 | +static int __read_mostly tracer_enabled; | |
23 | + | |
24 | +/* | |
25 | + * 1 msec sample interval by default: | |
26 | + */ | |
27 | +static unsigned long sample_period = 1000000; | |
28 | +static const unsigned int sample_max_depth = 512; | |
29 | + | |
30 | +static DEFINE_MUTEX(sample_timer_lock); | |
31 | +/* | |
32 | + * Per CPU hrtimers that do the profiling: | |
33 | + */ | |
34 | +static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); | |
35 | + | |
36 | +struct stack_frame { | |
37 | + const void __user *next_fp; | |
38 | + unsigned long return_address; | |
39 | +}; | |
40 | + | |
41 | +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | |
42 | +{ | |
43 | + int ret; | |
44 | + | |
45 | + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | |
46 | + return 0; | |
47 | + | |
48 | + ret = 1; | |
49 | + pagefault_disable(); | |
50 | + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | |
51 | + ret = 0; | |
52 | + pagefault_enable(); | |
53 | + | |
54 | + return ret; | |
55 | +} | |
56 | + | |
57 | +struct backtrace_info { | |
58 | + struct trace_array_cpu *data; | |
59 | + struct trace_array *tr; | |
60 | + int pos; | |
61 | +}; | |
62 | + | |
63 | +static void | |
64 | +backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | |
65 | +{ | |
66 | + /* Ignore warnings */ | |
67 | +} | |
68 | + | |
69 | +static void backtrace_warning(void *data, char *msg) | |
70 | +{ | |
71 | + /* Ignore warnings */ | |
72 | +} | |
73 | + | |
74 | +static int backtrace_stack(void *data, char *name) | |
75 | +{ | |
76 | + /* Don't bother with IRQ stacks for now */ | |
77 | + return -1; | |
78 | +} | |
79 | + | |
80 | +static void backtrace_address(void *data, unsigned long addr, int reliable) | |
81 | +{ | |
82 | + struct backtrace_info *info = data; | |
83 | + | |
84 | + if (info->pos < sample_max_depth && reliable) { | |
85 | + __trace_special(info->tr, info->data, 1, addr, 0); | |
86 | + | |
87 | + info->pos++; | |
88 | + } | |
89 | +} | |
90 | + | |
91 | +const static struct stacktrace_ops backtrace_ops = { | |
92 | + .warning = backtrace_warning, | |
93 | + .warning_symbol = backtrace_warning_symbol, | |
94 | + .stack = backtrace_stack, | |
95 | + .address = backtrace_address, | |
96 | +}; | |
97 | + | |
98 | +static int | |
99 | +trace_kernel(struct pt_regs *regs, struct trace_array *tr, | |
100 | + struct trace_array_cpu *data) | |
101 | +{ | |
102 | + struct backtrace_info info; | |
103 | + unsigned long bp; | |
104 | + char *stack; | |
105 | + | |
106 | + info.tr = tr; | |
107 | + info.data = data; | |
108 | + info.pos = 1; | |
109 | + | |
110 | + __trace_special(info.tr, info.data, 1, regs->ip, 0); | |
111 | + | |
112 | + stack = ((char *)regs + sizeof(struct pt_regs)); | |
113 | +#ifdef CONFIG_FRAME_POINTER | |
114 | + bp = regs->bp; | |
115 | +#else | |
116 | + bp = 0; | |
117 | +#endif | |
118 | + | |
119 | + dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); | |
120 | + | |
121 | + return info.pos; | |
122 | +} | |
123 | + | |
124 | +static void timer_notify(struct pt_regs *regs, int cpu) | |
125 | +{ | |
126 | + struct trace_array_cpu *data; | |
127 | + struct stack_frame frame; | |
128 | + struct trace_array *tr; | |
129 | + const void __user *fp; | |
130 | + int is_user; | |
131 | + int i; | |
132 | + | |
133 | + if (!regs) | |
134 | + return; | |
135 | + | |
136 | + tr = sysprof_trace; | |
137 | + data = tr->data[cpu]; | |
138 | + is_user = user_mode(regs); | |
139 | + | |
140 | + if (!current || current->pid == 0) | |
141 | + return; | |
142 | + | |
143 | + if (is_user && current->state != TASK_RUNNING) | |
144 | + return; | |
145 | + | |
146 | + __trace_special(tr, data, 0, 0, current->pid); | |
147 | + | |
148 | + if (!is_user) | |
149 | + i = trace_kernel(regs, tr, data); | |
150 | + else | |
151 | + i = 0; | |
152 | + | |
153 | + /* | |
154 | + * Trace user stack if we are not a kernel thread | |
155 | + */ | |
156 | + if (current->mm && i < sample_max_depth) { | |
157 | + regs = (struct pt_regs *)current->thread.sp0 - 1; | |
158 | + | |
159 | + fp = (void __user *)regs->bp; | |
160 | + | |
161 | + __trace_special(tr, data, 2, regs->ip, 0); | |
162 | + | |
163 | + while (i < sample_max_depth) { | |
164 | + frame.next_fp = 0; | |
165 | + frame.return_address = 0; | |
166 | + if (!copy_stack_frame(fp, &frame)) | |
167 | + break; | |
168 | + if ((unsigned long)fp < regs->sp) | |
169 | + break; | |
170 | + | |
171 | + __trace_special(tr, data, 2, frame.return_address, | |
172 | + (unsigned long)fp); | |
173 | + fp = frame.next_fp; | |
174 | + | |
175 | + i++; | |
176 | + } | |
177 | + | |
178 | + } | |
179 | + | |
180 | + /* | |
181 | + * Special trace entry if we overflow the max depth: | |
182 | + */ | |
183 | + if (i == sample_max_depth) | |
184 | + __trace_special(tr, data, -1, -1, -1); | |
185 | + | |
186 | + __trace_special(tr, data, 3, current->pid, i); | |
187 | +} | |
188 | + | |
189 | +static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) | |
190 | +{ | |
191 | + /* trace here */ | |
192 | + timer_notify(get_irq_regs(), smp_processor_id()); | |
193 | + | |
194 | + hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); | |
195 | + | |
196 | + return HRTIMER_RESTART; | |
197 | +} | |
198 | + | |
199 | +static void start_stack_timer(int cpu) | |
200 | +{ | |
201 | + struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); | |
202 | + | |
203 | + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
204 | + hrtimer->function = stack_trace_timer_fn; | |
205 | + hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | |
206 | + | |
207 | + hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); | |
208 | +} | |
209 | + | |
210 | +static void start_stack_timers(void) | |
211 | +{ | |
212 | + cpumask_t saved_mask = current->cpus_allowed; | |
213 | + int cpu; | |
214 | + | |
215 | + for_each_online_cpu(cpu) { | |
216 | + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | |
217 | + start_stack_timer(cpu); | |
218 | + } | |
219 | + set_cpus_allowed_ptr(current, &saved_mask); | |
220 | +} | |
221 | + | |
222 | +static void stop_stack_timer(int cpu) | |
223 | +{ | |
224 | + struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); | |
225 | + | |
226 | + hrtimer_cancel(hrtimer); | |
227 | +} | |
228 | + | |
229 | +static void stop_stack_timers(void) | |
230 | +{ | |
231 | + int cpu; | |
232 | + | |
233 | + for_each_online_cpu(cpu) | |
234 | + stop_stack_timer(cpu); | |
235 | +} | |
236 | + | |
237 | +static void stack_reset(struct trace_array *tr) | |
238 | +{ | |
239 | + int cpu; | |
240 | + | |
241 | + tr->time_start = ftrace_now(tr->cpu); | |
242 | + | |
243 | + for_each_online_cpu(cpu) | |
244 | + tracing_reset(tr->data[cpu]); | |
245 | +} | |
246 | + | |
247 | +static void start_stack_trace(struct trace_array *tr) | |
248 | +{ | |
249 | + mutex_lock(&sample_timer_lock); | |
250 | + stack_reset(tr); | |
251 | + start_stack_timers(); | |
252 | + tracer_enabled = 1; | |
253 | + mutex_unlock(&sample_timer_lock); | |
254 | +} | |
255 | + | |
256 | +static void stop_stack_trace(struct trace_array *tr) | |
257 | +{ | |
258 | + mutex_lock(&sample_timer_lock); | |
259 | + stop_stack_timers(); | |
260 | + tracer_enabled = 0; | |
261 | + mutex_unlock(&sample_timer_lock); | |
262 | +} | |
263 | + | |
264 | +static void stack_trace_init(struct trace_array *tr) | |
265 | +{ | |
266 | + sysprof_trace = tr; | |
267 | + | |
268 | + if (tr->ctrl) | |
269 | + start_stack_trace(tr); | |
270 | +} | |
271 | + | |
272 | +static void stack_trace_reset(struct trace_array *tr) | |
273 | +{ | |
274 | + if (tr->ctrl) | |
275 | + stop_stack_trace(tr); | |
276 | +} | |
277 | + | |
278 | +static void stack_trace_ctrl_update(struct trace_array *tr) | |
279 | +{ | |
280 | + /* When starting a new trace, reset the buffers */ | |
281 | + if (tr->ctrl) | |
282 | + start_stack_trace(tr); | |
283 | + else | |
284 | + stop_stack_trace(tr); | |
285 | +} | |
286 | + | |
287 | +static struct tracer stack_trace __read_mostly = | |
288 | +{ | |
289 | + .name = "sysprof", | |
290 | + .init = stack_trace_init, | |
291 | + .reset = stack_trace_reset, | |
292 | + .ctrl_update = stack_trace_ctrl_update, | |
293 | +#ifdef CONFIG_FTRACE_SELFTEST | |
294 | + .selftest = trace_selftest_startup_sysprof, | |
295 | +#endif | |
296 | +}; | |
297 | + | |
298 | +__init static int init_stack_trace(void) | |
299 | +{ | |
300 | + return register_tracer(&stack_trace); | |
301 | +} | |
302 | +device_initcall(init_stack_trace); | |
303 | + | |
304 | +#define MAX_LONG_DIGITS 22 | |
305 | + | |
306 | +static ssize_t | |
307 | +sysprof_sample_read(struct file *filp, char __user *ubuf, | |
308 | + size_t cnt, loff_t *ppos) | |
309 | +{ | |
310 | + char buf[MAX_LONG_DIGITS]; | |
311 | + int r; | |
312 | + | |
313 | + r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); | |
314 | + | |
315 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
316 | +} | |
317 | + | |
318 | +static ssize_t | |
319 | +sysprof_sample_write(struct file *filp, const char __user *ubuf, | |
320 | + size_t cnt, loff_t *ppos) | |
321 | +{ | |
322 | + char buf[MAX_LONG_DIGITS]; | |
323 | + unsigned long val; | |
324 | + | |
325 | + if (cnt > MAX_LONG_DIGITS-1) | |
326 | + cnt = MAX_LONG_DIGITS-1; | |
327 | + | |
328 | + if (copy_from_user(&buf, ubuf, cnt)) | |
329 | + return -EFAULT; | |
330 | + | |
331 | + buf[cnt] = 0; | |
332 | + | |
333 | + val = simple_strtoul(buf, NULL, 10); | |
334 | + /* | |
335 | + * Enforce a minimum sample period of 100 usecs: | |
336 | + */ | |
337 | + if (val < 100) | |
338 | + val = 100; | |
339 | + | |
340 | + mutex_lock(&sample_timer_lock); | |
341 | + stop_stack_timers(); | |
342 | + sample_period = val * 1000; | |
343 | + start_stack_timers(); | |
344 | + mutex_unlock(&sample_timer_lock); | |
345 | + | |
346 | + return cnt; | |
347 | +} | |
348 | + | |
349 | +static struct file_operations sysprof_sample_fops = { | |
350 | + .read = sysprof_sample_read, | |
351 | + .write = sysprof_sample_write, | |
352 | +}; | |
353 | + | |
354 | +void init_tracer_sysprof_debugfs(struct dentry *d_tracer) | |
355 | +{ | |
356 | + struct dentry *entry; | |
357 | + | |
358 | + entry = debugfs_create_file("sysprof_sample_period", 0644, | |
359 | + d_tracer, NULL, &sysprof_sample_fops); | |
360 | + if (entry) | |
361 | + return; | |
362 | + pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); | |
363 | +} |