Commit 02b67518e2b1c490787dac7f35e1204e74fe21ba

Authored by Török Edwin
Committed by Ingo Molnar
1 parent a0a70c735e

tracing: add support for userspace stacktraces in tracing/iter_ctrl

Impact: add new (default-off) tracing visualization feature

Usage example:

 mount -t debugfs nodev /sys/kernel/debug
 cd /sys/kernel/debug/tracing
 echo userstacktrace >iter_ctrl
 echo sched_switch >current_tracer
 echo 1 >tracing_enabled
 .... run application ...
 echo 0 >tracing_enabled

Then read one of 'trace','latency_trace','trace_pipe'.

To get the best output you can compile your userspace programs with
frame pointers (at least glibc + the app you are tracing).

Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 5 changed files with 171 additions and 1 deletions Side-by-side Diff

Documentation/ftrace.txt
... ... @@ -324,7 +324,7 @@
324 324  
325 325 cat /debug/tracing/trace_options
326 326 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
327   - noblock nostacktrace nosched-tree
  327 + noblock nostacktrace nosched-tree nouserstacktrace
328 328  
329 329 To disable one of the options, echo in the option prepended with "no".
330 330  
... ... @@ -377,6 +377,9 @@
377 377 stacktrace - This is one of the options that changes the trace itself.
378 378 When a trace is recorded, so is the stack of functions.
379 379 This allows for back traces of trace sites.
  380 +
  381 + userstacktrace - This option changes the trace.
  382 + It records a stacktrace of the current userspace thread.
380 383  
381 384 sched-tree - TBD (any users??)
382 385  
arch/x86/kernel/stacktrace.c
... ... @@ -6,6 +6,7 @@
6 6 #include <linux/sched.h>
7 7 #include <linux/stacktrace.h>
8 8 #include <linux/module.h>
  9 +#include <linux/uaccess.h>
9 10 #include <asm/stacktrace.h>
10 11  
11 12 static void save_stack_warning(void *data, char *msg)
... ... @@ -83,4 +84,59 @@
83 84 trace->entries[trace->nr_entries++] = ULONG_MAX;
84 85 }
85 86 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
  87 +
  88 +/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
  89 +
  90 +struct stack_frame {
  91 + const void __user *next_fp;
  92 + unsigned long return_address;
  93 +};
  94 +
  95 +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
  96 +{
  97 + int ret;
  98 +
  99 + if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
  100 + return 0;
  101 +
  102 + ret = 1;
  103 + pagefault_disable();
  104 + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
  105 + ret = 0;
  106 + pagefault_enable();
  107 +
  108 + return ret;
  109 +}
  110 +
  111 +void save_stack_trace_user(struct stack_trace *trace)
  112 +{
  113 + /*
  114 + * Trace user stack if we are not a kernel thread
  115 + */
  116 + if (current->mm) {
  117 + const struct pt_regs *regs = task_pt_regs(current);
  118 + const void __user *fp = (const void __user *)regs->bp;
  119 +
  120 + if (trace->nr_entries < trace->max_entries)
  121 + trace->entries[trace->nr_entries++] = regs->ip;
  122 +
  123 + while (trace->nr_entries < trace->max_entries) {
  124 + struct stack_frame frame;
  125 + frame.next_fp = NULL;
  126 + frame.return_address = 0;
  127 + if (!copy_stack_frame(fp, &frame))
  128 + break;
  129 + if ((unsigned long)fp < regs->sp)
  130 + break;
  131 + if (frame.return_address)
  132 + trace->entries[trace->nr_entries++] =
  133 + frame.return_address;
  134 + if (fp == frame.next_fp)
  135 + break;
  136 + fp = frame.next_fp;
  137 + }
  138 + }
  139 + if (trace->nr_entries < trace->max_entries)
  140 + trace->entries[trace->nr_entries++] = ULONG_MAX;
  141 +}
include/linux/stacktrace.h
... ... @@ -15,9 +15,17 @@
15 15 struct stack_trace *trace);
16 16  
17 17 extern void print_stack_trace(struct stack_trace *trace, int spaces);
  18 +
  19 +#ifdef CONFIG_X86
  20 +extern void save_stack_trace_user(struct stack_trace *trace);
18 21 #else
  22 +# define save_stack_trace_user(trace) do { } while (0)
  23 +#endif
  24 +
  25 +#else
19 26 # define save_stack_trace(trace) do { } while (0)
20 27 # define save_stack_trace_tsk(tsk, trace) do { } while (0)
  28 +# define save_stack_trace_user(trace) do { } while (0)
21 29 # define print_stack_trace(trace, spaces) do { } while (0)
22 30 #endif
23 31  
kernel/trace/trace.c
... ... @@ -275,6 +275,7 @@
275 275 "ftrace_preempt",
276 276 "branch",
277 277 "annotate",
  278 + "userstacktrace",
278 279 NULL
279 280 };
280 281  
... ... @@ -918,6 +919,44 @@
918 919 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
919 920 }
920 921  
  922 +static void ftrace_trace_userstack(struct trace_array *tr,
  923 + struct trace_array_cpu *data,
  924 + unsigned long flags, int pc)
  925 +{
  926 + struct userstack_entry *entry;
  927 + struct stack_trace trace;
  928 + struct ring_buffer_event *event;
  929 + unsigned long irq_flags;
  930 +
  931 + if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
  932 + return;
  933 +
  934 + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
  935 + &irq_flags);
  936 + if (!event)
  937 + return;
  938 + entry = ring_buffer_event_data(event);
  939 + tracing_generic_entry_update(&entry->ent, flags, pc);
  940 + entry->ent.type = TRACE_USER_STACK;
  941 +
  942 + memset(&entry->caller, 0, sizeof(entry->caller));
  943 +
  944 + trace.nr_entries = 0;
  945 + trace.max_entries = FTRACE_STACK_ENTRIES;
  946 + trace.skip = 0;
  947 + trace.entries = entry->caller;
  948 +
  949 + save_stack_trace_user(&trace);
  950 + ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
  951 +}
  952 +
  953 +void __trace_userstack(struct trace_array *tr,
  954 + struct trace_array_cpu *data,
  955 + unsigned long flags)
  956 +{
  957 + ftrace_trace_userstack(tr, data, flags, preempt_count());
  958 +}
  959 +
921 960 static void
922 961 ftrace_trace_special(void *__tr, void *__data,
923 962 unsigned long arg1, unsigned long arg2, unsigned long arg3,
... ... @@ -941,6 +980,7 @@
941 980 entry->arg3 = arg3;
942 981 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
943 982 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
  983 + ftrace_trace_userstack(tr, data, irq_flags, pc);
944 984  
945 985 trace_wake_up();
946 986 }
... ... @@ -979,6 +1019,7 @@
979 1019 entry->next_cpu = task_cpu(next);
980 1020 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
981 1021 ftrace_trace_stack(tr, data, flags, 5, pc);
  1022 + ftrace_trace_userstack(tr, data, flags, pc);
982 1023 }
983 1024  
984 1025 void
... ... @@ -1008,6 +1049,7 @@
1008 1049 entry->next_cpu = task_cpu(wakee);
1009 1050 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1010 1051 ftrace_trace_stack(tr, data, flags, 6, pc);
  1052 + ftrace_trace_userstack(tr, data, flags, pc);
1011 1053  
1012 1054 trace_wake_up();
1013 1055 }
... ... @@ -1387,6 +1429,31 @@
1387 1429 return ret;
1388 1430 }
1389 1431  
  1432 +static int
  1433 +seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
  1434 + unsigned long sym_flags)
  1435 +{
  1436 + int ret = 1;
  1437 + unsigned i;
  1438 +
  1439 + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
  1440 + unsigned long ip = entry->caller[i];
  1441 +
  1442 + if (ip == ULONG_MAX || !ret)
  1443 + break;
  1444 + if (i)
  1445 + ret = trace_seq_puts(s, " <- ");
  1446 + if (!ip) {
  1447 + ret = trace_seq_puts(s, "??");
  1448 + continue;
  1449 + }
  1450 + if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/)
  1451 + ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
  1452 + }
  1453 +
  1454 + return ret;
  1455 +}
  1456 +
1390 1457 static void print_lat_help_header(struct seq_file *m)
1391 1458 {
1392 1459 seq_puts(m, "# _------=> CPU# \n");
... ... @@ -1702,6 +1769,16 @@
1702 1769 field->line);
1703 1770 break;
1704 1771 }
  1772 + case TRACE_USER_STACK: {
  1773 + struct userstack_entry *field;
  1774 +
  1775 + trace_assign_type(field, entry);
  1776 +
  1777 + seq_print_userip_objs(field, s, sym_flags);
  1778 + if (entry->flags & TRACE_FLAG_CONT)
  1779 + trace_seq_print_cont(s, iter);
  1780 + break;
  1781 + }
1705 1782 default:
1706 1783 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1707 1784 }
1708 1785  
... ... @@ -1853,7 +1930,20 @@
1853 1930 field->line);
1854 1931 break;
1855 1932 }
  1933 + case TRACE_USER_STACK: {
  1934 + struct userstack_entry *field;
  1935 +
  1936 + trace_assign_type(field, entry);
  1937 +
  1938 + ret = seq_print_userip_objs(field, s, sym_flags);
  1939 + if (!ret)
  1940 + return TRACE_TYPE_PARTIAL_LINE;
  1941 + ret = trace_seq_putc(s, '\n');
  1942 + if (!ret)
  1943 + return TRACE_TYPE_PARTIAL_LINE;
  1944 + break;
1856 1945 }
  1946 + }
1857 1947 return TRACE_TYPE_HANDLED;
1858 1948 }
1859 1949  
... ... @@ -1912,6 +2002,7 @@
1912 2002 break;
1913 2003 }
1914 2004 case TRACE_SPECIAL:
  2005 + case TRACE_USER_STACK:
1915 2006 case TRACE_STACK: {
1916 2007 struct special_entry *field;
1917 2008  
... ... @@ -2000,6 +2091,7 @@
2000 2091 break;
2001 2092 }
2002 2093 case TRACE_SPECIAL:
  2094 + case TRACE_USER_STACK:
2003 2095 case TRACE_STACK: {
2004 2096 struct special_entry *field;
2005 2097  
... ... @@ -2054,6 +2146,7 @@
2054 2146 break;
2055 2147 }
2056 2148 case TRACE_SPECIAL:
  2149 + case TRACE_USER_STACK:
2057 2150 case TRACE_STACK: {
2058 2151 struct special_entry *field;
2059 2152  
kernel/trace/trace.h
... ... @@ -26,6 +26,7 @@
26 26 TRACE_BOOT_CALL,
27 27 TRACE_BOOT_RET,
28 28 TRACE_FN_RET,
  29 + TRACE_USER_STACK,
29 30  
30 31 __TRACE_LAST_TYPE
31 32 };
... ... @@ -42,6 +43,7 @@
42 43 unsigned char flags;
43 44 unsigned char preempt_count;
44 45 int pid;
  46 + int tgid;
45 47 };
46 48  
47 49 /*
... ... @@ -99,6 +101,11 @@
99 101 unsigned long caller[FTRACE_STACK_ENTRIES];
100 102 };
101 103  
  104 +struct userstack_entry {
  105 + struct trace_entry ent;
  106 + unsigned long caller[FTRACE_STACK_ENTRIES];
  107 +};
  108 +
102 109 /*
103 110 * ftrace_printk entry:
104 111 */
... ... @@ -240,6 +247,7 @@
240 247 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
241 248 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
242 249 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
  250 + IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
243 251 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
244 252 IF_ASSIGN(var, ent, struct special_entry, 0); \
245 253 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
... ... @@ -500,6 +508,7 @@
500 508 TRACE_ITER_PREEMPTONLY = 0x800,
501 509 TRACE_ITER_BRANCH = 0x1000,
502 510 TRACE_ITER_ANNOTATE = 0x2000,
  511 + TRACE_ITER_USERSTACKTRACE = 0x4000
503 512 };
504 513  
505 514 /*