Commit 86c269fea37334687b1c0789e6444be0d750e8a6

Authored by Andy Lutomirski
Committed by Ingo Molnar
1 parent 88a7c26af8

perf/x86_64: Improve user regs sampling

Perf reports user regs for kernel-mode samples so that samples can
be backtraced through user code.  The old code was very broken in
syscall context, resulting in useless backtraces.

The new code, in contrast, is still dangerously racy, but it should
at least work most of the time.

Tested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: chenggang.qcg@taobao.com
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/243560c26ff0f739978e2459e203f6515367634d.1420396372.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Showing 1 changed file with 76 additions and 2 deletions Side-by-side Diff

arch/x86/kernel/perf_regs.c
... ... @@ -115,8 +115,82 @@
115 115 struct pt_regs *regs,
116 116 struct pt_regs *regs_user_copy)
117 117 {
118   - regs_user->regs = task_pt_regs(current);
119   - regs_user->abi = perf_reg_abi(current);
  118 + struct pt_regs *user_regs = task_pt_regs(current);
  119 +
  120 + /*
  121 + * If we're in an NMI that interrupted task_pt_regs setup, then
  122 + * we can't sample user regs at all. This check isn't really
  123 + * sufficient, though, as we could be in an NMI inside an interrupt
  124 + * that happened during task_pt_regs setup.
  125 + */
  126 + if (regs->sp > (unsigned long)&user_regs->r11 &&
  127 + regs->sp <= (unsigned long)(user_regs + 1)) {
  128 + regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
  129 + regs_user->regs = NULL;
  130 + return;
  131 + }
  132 +
  133 + /*
  134 + * RIP, flags, and the argument registers are usually saved.
  135 + * orig_ax is probably okay, too.
  136 + */
  137 + regs_user_copy->ip = user_regs->ip;
  138 + regs_user_copy->cx = user_regs->cx;
  139 + regs_user_copy->dx = user_regs->dx;
  140 + regs_user_copy->si = user_regs->si;
  141 + regs_user_copy->di = user_regs->di;
  142 + regs_user_copy->r8 = user_regs->r8;
  143 + regs_user_copy->r9 = user_regs->r9;
  144 + regs_user_copy->r10 = user_regs->r10;
  145 + regs_user_copy->r11 = user_regs->r11;
  146 + regs_user_copy->orig_ax = user_regs->orig_ax;
  147 + regs_user_copy->flags = user_regs->flags;
  148 +
  149 + /*
  150 + * Don't even try to report the "rest" regs.
  151 + */
  152 + regs_user_copy->bx = -1;
  153 + regs_user_copy->bp = -1;
  154 + regs_user_copy->r12 = -1;
  155 + regs_user_copy->r13 = -1;
  156 + regs_user_copy->r14 = -1;
  157 + regs_user_copy->r15 = -1;
  158 +
  159 + /*
  160 + * For this to be at all useful, we need a reasonable guess for
  161 + * sp and the ABI. Be careful: we're in NMI context, and we're
  162 + * considering current to be the current task, so we should
  163 + * be careful not to look at any other percpu variables that might
  164 + * change during context switches.
  165 + */
  166 + if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
  167 + task_thread_info(current)->status & TS_COMPAT) {
  168 + /* Easy case: we're in a compat syscall. */
  169 + regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
  170 + regs_user_copy->sp = user_regs->sp;
  171 + regs_user_copy->cs = user_regs->cs;
  172 + regs_user_copy->ss = user_regs->ss;
  173 + } else if (user_regs->orig_ax != -1) {
  174 + /*
  175 + * We're probably in a 64-bit syscall.
  176 + * Warning: this code is severely racy. At least it's better
  177 + * than just blindly copying user_regs.
  178 + */
  179 + regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
  180 + regs_user_copy->sp = this_cpu_read(old_rsp);
  181 + regs_user_copy->cs = __USER_CS;
  182 + regs_user_copy->ss = __USER_DS;
  183 + regs_user_copy->cx = -1; /* usually contains garbage */
  184 + } else {
  185 + /* We're probably in an interrupt or exception. */
  186 + regs_user->abi = user_64bit_mode(user_regs) ?
  187 + PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
  188 + regs_user_copy->sp = user_regs->sp;
  189 + regs_user_copy->cs = user_regs->cs;
  190 + regs_user_copy->ss = user_regs->ss;
  191 + }
  192 +
  193 + regs_user->regs = regs_user_copy;
120 194 }
121 195 #endif /* CONFIG_X86_32 */