Commit 56962b4449af34070bb1994621ef4f0265eed4d8
1 parent
70791ce9ba
Exists in
master
and in
4 other branches
perf: Generalize some arch callchain code
- Most archs use one callchain buffer per cpu, except x86 that needs to deal with NMIs. Provide a default perf_callchain_buffer() implementation that x86 overrides. - Centralize all the kernel/user regs handling and invoke new arch handlers from there: perf_callchain_user() / perf_callchain_kernel() That avoid all the user_mode(), current->mm checks and so... - Invert some parameters in perf_callchain_*() helpers: entry to the left, regs to the right, following the traditional (dst, src). Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Acked-by: Paul Mackerras <paulus@samba.org> Tested-by: Will Deacon <will.deacon@arm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Stephane Eranian <eranian@google.com> Cc: David Miller <davem@davemloft.net> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Borislav Petkov <bp@amd64.org>
Showing 7 changed files with 90 additions and 180 deletions Side-by-side Diff
arch/arm/kernel/perf_event.c
... | ... | @@ -3044,17 +3044,13 @@ |
3044 | 3044 | return buftail.fp - 1; |
3045 | 3045 | } |
3046 | 3046 | |
3047 | -static void | |
3048 | -perf_callchain_user(struct pt_regs *regs, | |
3049 | - struct perf_callchain_entry *entry) | |
3047 | +void | |
3048 | +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |
3050 | 3049 | { |
3051 | 3050 | struct frame_tail *tail; |
3052 | 3051 | |
3053 | 3052 | perf_callchain_store(entry, PERF_CONTEXT_USER); |
3054 | 3053 | |
3055 | - if (!user_mode(regs)) | |
3056 | - regs = task_pt_regs(current); | |
3057 | - | |
3058 | 3054 | tail = (struct frame_tail *)regs->ARM_fp - 1; |
3059 | 3055 | |
3060 | 3056 | while (tail && !((unsigned long)tail & 0x3)) |
... | ... | @@ -3075,9 +3071,8 @@ |
3075 | 3071 | return 0; |
3076 | 3072 | } |
3077 | 3073 | |
3078 | -static void | |
3079 | -perf_callchain_kernel(struct pt_regs *regs, | |
3080 | - struct perf_callchain_entry *entry) | |
3074 | +void | |
3075 | +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |
3081 | 3076 | { |
3082 | 3077 | struct stackframe fr; |
3083 | 3078 | |
... | ... | @@ -3087,35 +3082,5 @@ |
3087 | 3082 | fr.lr = regs->ARM_lr; |
3088 | 3083 | fr.pc = regs->ARM_pc; |
3089 | 3084 | walk_stackframe(&fr, callchain_trace, entry); |
3090 | -} | |
3091 | - | |
3092 | -static void | |
3093 | -perf_do_callchain(struct pt_regs *regs, | |
3094 | - struct perf_callchain_entry *entry) | |
3095 | -{ | |
3096 | - int is_user; | |
3097 | - | |
3098 | - if (!regs) | |
3099 | - return; | |
3100 | - | |
3101 | - is_user = user_mode(regs); | |
3102 | - | |
3103 | - if (!is_user) | |
3104 | - perf_callchain_kernel(regs, entry); | |
3105 | - | |
3106 | - if (current->mm) | |
3107 | - perf_callchain_user(regs, entry); | |
3108 | -} | |
3109 | - | |
3110 | -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | |
3111 | - | |
3112 | -struct perf_callchain_entry * | |
3113 | -perf_callchain(struct pt_regs *regs) | |
3114 | -{ | |
3115 | - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); | |
3116 | - | |
3117 | - entry->nr = 0; | |
3118 | - perf_do_callchain(regs, entry); | |
3119 | - return entry; | |
3120 | 3085 | } |
arch/powerpc/kernel/perf_callchain.c
... | ... | @@ -46,8 +46,8 @@ |
46 | 46 | return 0; |
47 | 47 | } |
48 | 48 | |
49 | -static void perf_callchain_kernel(struct pt_regs *regs, | |
50 | - struct perf_callchain_entry *entry) | |
49 | +void | |
50 | +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |
51 | 51 | { |
52 | 52 | unsigned long sp, next_sp; |
53 | 53 | unsigned long next_ip; |
... | ... | @@ -221,8 +221,8 @@ |
221 | 221 | puc == (unsigned long) &sf->uc; |
222 | 222 | } |
223 | 223 | |
224 | -static void perf_callchain_user_64(struct pt_regs *regs, | |
225 | - struct perf_callchain_entry *entry) | |
224 | +static void perf_callchain_user_64(struct perf_callchain_entry *entry, | |
225 | + struct pt_regs *regs) | |
226 | 226 | { |
227 | 227 | unsigned long sp, next_sp; |
228 | 228 | unsigned long next_ip; |
... | ... | @@ -303,8 +303,8 @@ |
303 | 303 | return __get_user_inatomic(*ret, ptr); |
304 | 304 | } |
305 | 305 | |
306 | -static inline void perf_callchain_user_64(struct pt_regs *regs, | |
307 | - struct perf_callchain_entry *entry) | |
306 | +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, | |
307 | + struct pt_regs *regs) | |
308 | 308 | { |
309 | 309 | } |
310 | 310 | |
... | ... | @@ -423,8 +423,8 @@ |
423 | 423 | return mctx->mc_gregs; |
424 | 424 | } |
425 | 425 | |
426 | -static void perf_callchain_user_32(struct pt_regs *regs, | |
427 | - struct perf_callchain_entry *entry) | |
426 | +static void perf_callchain_user_32(struct perf_callchain_entry *entry, | |
427 | + struct pt_regs *regs) | |
428 | 428 | { |
429 | 429 | unsigned int sp, next_sp; |
430 | 430 | unsigned int next_ip; |
431 | 431 | |
... | ... | @@ -471,33 +471,12 @@ |
471 | 471 | } |
472 | 472 | } |
473 | 473 | |
474 | -/* | |
475 | - * Since we can't get PMU interrupts inside a PMU interrupt handler, | |
476 | - * we don't need separate irq and nmi entries here. | |
477 | - */ | |
478 | -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); | |
479 | - | |
480 | -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |
474 | +void | |
475 | +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |
481 | 476 | { |
482 | - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); | |
483 | - | |
484 | - entry->nr = 0; | |
485 | - | |
486 | - if (!user_mode(regs)) { | |
487 | - perf_callchain_kernel(regs, entry); | |
488 | - if (current->mm) | |
489 | - regs = task_pt_regs(current); | |
490 | - else | |
491 | - regs = NULL; | |
492 | - } | |
493 | - | |
494 | - if (regs) { | |
495 | - if (current_is_64bit()) | |
496 | - perf_callchain_user_64(regs, entry); | |
497 | - else | |
498 | - perf_callchain_user_32(regs, entry); | |
499 | - } | |
500 | - | |
501 | - return entry; | |
477 | + if (current_is_64bit()) | |
478 | + perf_callchain_user_64(entry, regs); | |
479 | + else | |
480 | + perf_callchain_user_32(entry, regs); | |
502 | 481 | } |
arch/sh/kernel/perf_callchain.c
... | ... | @@ -44,45 +44,12 @@ |
44 | 44 | .address = callchain_address, |
45 | 45 | }; |
46 | 46 | |
47 | -static void | |
48 | -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |
47 | +void | |
48 | +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |
49 | 49 | { |
50 | 50 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); |
51 | 51 | perf_callchain_store(entry, regs->pc); |
52 | 52 | |
53 | 53 | unwind_stack(NULL, regs, NULL, &callchain_ops, entry); |
54 | -} | |
55 | - | |
56 | -static void | |
57 | -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | |
58 | -{ | |
59 | - int is_user; | |
60 | - | |
61 | - if (!regs) | |
62 | - return; | |
63 | - | |
64 | - is_user = user_mode(regs); | |
65 | - | |
66 | - /* | |
67 | - * Only the kernel side is implemented for now. | |
68 | - */ | |
69 | - if (!is_user) | |
70 | - perf_callchain_kernel(regs, entry); | |
71 | -} | |
72 | - | |
73 | -/* | |
74 | - * No need for separate IRQ and NMI entries. | |
75 | - */ | |
76 | -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); | |
77 | - | |
78 | -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |
79 | -{ | |
80 | - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); | |
81 | - | |
82 | - entry->nr = 0; | |
83 | - | |
84 | - perf_do_callchain(regs, entry); | |
85 | - | |
86 | - return entry; | |
87 | 54 | } |
arch/sparc/kernel/perf_event.c
... | ... | @@ -1283,14 +1283,16 @@ |
1283 | 1283 | register_die_notifier(&perf_event_nmi_notifier); |
1284 | 1284 | } |
1285 | 1285 | |
1286 | -static void perf_callchain_kernel(struct pt_regs *regs, | |
1287 | - struct perf_callchain_entry *entry) | |
1286 | +void perf_callchain_kernel(struct perf_callchain_entry *entry, | |
1287 | + struct pt_regs *regs) | |
1288 | 1288 | { |
1289 | 1289 | unsigned long ksp, fp; |
1290 | 1290 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1291 | 1291 | int graph = 0; |
1292 | 1292 | #endif |
1293 | 1293 | |
1294 | + stack_trace_flush(); | |
1295 | + | |
1294 | 1296 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); |
1295 | 1297 | perf_callchain_store(entry, regs->tpc); |
1296 | 1298 | |
... | ... | @@ -1330,8 +1332,8 @@ |
1330 | 1332 | } while (entry->nr < PERF_MAX_STACK_DEPTH); |
1331 | 1333 | } |
1332 | 1334 | |
1333 | -static void perf_callchain_user_64(struct pt_regs *regs, | |
1334 | - struct perf_callchain_entry *entry) | |
1335 | +static void perf_callchain_user_64(struct perf_callchain_entry *entry, | |
1336 | + struct pt_regs *regs) | |
1335 | 1337 | { |
1336 | 1338 | unsigned long ufp; |
1337 | 1339 | |
... | ... | @@ -1353,8 +1355,8 @@ |
1353 | 1355 | } while (entry->nr < PERF_MAX_STACK_DEPTH); |
1354 | 1356 | } |
1355 | 1357 | |
1356 | -static void perf_callchain_user_32(struct pt_regs *regs, | |
1357 | - struct perf_callchain_entry *entry) | |
1358 | +static void perf_callchain_user_32(struct perf_callchain_entry *entry, | |
1359 | + struct pt_regs *regs) | |
1358 | 1360 | { |
1359 | 1361 | unsigned long ufp; |
1360 | 1362 | |
1361 | 1363 | |
... | ... | @@ -1376,31 +1378,13 @@ |
1376 | 1378 | } while (entry->nr < PERF_MAX_STACK_DEPTH); |
1377 | 1379 | } |
1378 | 1380 | |
1379 | -/* Like powerpc we can't get PMU interrupts within the PMU handler, | |
1380 | - * so no need for separate NMI and IRQ chains as on x86. | |
1381 | - */ | |
1382 | -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); | |
1383 | - | |
1384 | -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |
1381 | +void | |
1382 | +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |
1385 | 1383 | { |
1386 | - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); | |
1387 | - | |
1388 | - entry->nr = 0; | |
1389 | - if (!user_mode(regs)) { | |
1390 | - stack_trace_flush(); | |
1391 | - perf_callchain_kernel(regs, entry); | |
1392 | - if (current->mm) | |
1393 | - regs = task_pt_regs(current); | |
1394 | - else | |
1395 | - regs = NULL; | |
1396 | - } | |
1397 | - if (regs) { | |
1398 | - flushw_user(); | |
1399 | - if (test_thread_flag(TIF_32BIT)) | |
1400 | - perf_callchain_user_32(regs, entry); | |
1401 | - else | |
1402 | - perf_callchain_user_64(regs, entry); | |
1403 | - } | |
1404 | - return entry; | |
1384 | + flushw_user(); | |
1385 | + if (test_thread_flag(TIF_32BIT)) | |
1386 | + perf_callchain_user_32(entry, regs); | |
1387 | + else | |
1388 | + perf_callchain_user_64(entry, regs); | |
1405 | 1389 | } |
arch/x86/kernel/cpu/perf_event.c
... | ... | @@ -1571,11 +1571,9 @@ |
1571 | 1571 | * callchain support |
1572 | 1572 | */ |
1573 | 1573 | |
1574 | +static DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry_nmi); | |
1574 | 1575 | |
1575 | -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | |
1576 | -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | |
1577 | 1576 | |
1578 | - | |
1579 | 1577 | static void |
1580 | 1578 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) |
1581 | 1579 | { |
... | ... | @@ -1607,8 +1605,8 @@ |
1607 | 1605 | .walk_stack = print_context_stack_bp, |
1608 | 1606 | }; |
1609 | 1607 | |
1610 | -static void | |
1611 | -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |
1608 | +void | |
1609 | +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |
1612 | 1610 | { |
1613 | 1611 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); |
1614 | 1612 | perf_callchain_store(entry, regs->ip); |
1615 | 1613 | |
... | ... | @@ -1653,14 +1651,12 @@ |
1653 | 1651 | } |
1654 | 1652 | #endif |
1655 | 1653 | |
1656 | -static void | |
1657 | -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |
1654 | +void | |
1655 | +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |
1658 | 1656 | { |
1659 | 1657 | struct stack_frame frame; |
1660 | 1658 | const void __user *fp; |
1661 | 1659 | |
1662 | - if (!user_mode(regs)) | |
1663 | - regs = task_pt_regs(current); | |
1664 | 1660 | |
1665 | 1661 | fp = (void __user *)regs->bp; |
1666 | 1662 | |
1667 | 1663 | |
1668 | 1664 | |
1669 | 1665 | |
... | ... | @@ -1687,42 +1683,17 @@ |
1687 | 1683 | } |
1688 | 1684 | } |
1689 | 1685 | |
1690 | -static void | |
1691 | -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | |
1686 | +struct perf_callchain_entry *perf_callchain_buffer(void) | |
1692 | 1687 | { |
1693 | - int is_user; | |
1694 | - | |
1695 | - if (!regs) | |
1696 | - return; | |
1697 | - | |
1698 | - is_user = user_mode(regs); | |
1699 | - | |
1700 | - if (!is_user) | |
1701 | - perf_callchain_kernel(regs, entry); | |
1702 | - | |
1703 | - if (current->mm) | |
1704 | - perf_callchain_user(regs, entry); | |
1705 | -} | |
1706 | - | |
1707 | -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |
1708 | -{ | |
1709 | - struct perf_callchain_entry *entry; | |
1710 | - | |
1711 | 1688 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { |
1712 | 1689 | /* TODO: We don't support guest os callchain now */ |
1713 | 1690 | return NULL; |
1714 | 1691 | } |
1715 | 1692 | |
1716 | 1693 | if (in_nmi()) |
1717 | - entry = &__get_cpu_var(pmc_nmi_entry); | |
1718 | - else | |
1719 | - entry = &__get_cpu_var(pmc_irq_entry); | |
1694 | + return &__get_cpu_var(perf_callchain_entry_nmi); | |
1720 | 1695 | |
1721 | - entry->nr = 0; | |
1722 | - | |
1723 | - perf_do_callchain(regs, entry); | |
1724 | - | |
1725 | - return entry; | |
1696 | + return &__get_cpu_var(perf_callchain_entry); | |
1726 | 1697 | } |
1727 | 1698 | |
1728 | 1699 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
include/linux/perf_event.h
... | ... | @@ -976,7 +976,15 @@ |
976 | 976 | extern void perf_event_comm(struct task_struct *tsk); |
977 | 977 | extern void perf_event_fork(struct task_struct *tsk); |
978 | 978 | |
979 | -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | |
979 | +/* Callchains */ | |
980 | +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); | |
981 | + | |
982 | +extern void perf_callchain_user(struct perf_callchain_entry *entry, | |
983 | + struct pt_regs *regs); | |
984 | +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, | |
985 | + struct pt_regs *regs); | |
986 | +extern struct perf_callchain_entry *perf_callchain_buffer(void); | |
987 | + | |
980 | 988 | |
981 | 989 | static inline void |
982 | 990 | perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) |
kernel/perf_event.c
... | ... | @@ -2937,13 +2937,49 @@ |
2937 | 2937 | __perf_pending_run(); |
2938 | 2938 | } |
2939 | 2939 | |
2940 | +DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); | |
2941 | + | |
2940 | 2942 | /* |
2941 | 2943 | * Callchain support -- arch specific |
2942 | 2944 | */ |
2943 | 2945 | |
2944 | -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |
2946 | +__weak struct perf_callchain_entry *perf_callchain_buffer(void) | |
2945 | 2947 | { |
2946 | - return NULL; | |
2948 | + return &__get_cpu_var(perf_callchain_entry); | |
2949 | +} | |
2950 | + | |
2951 | +__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, | |
2952 | + struct pt_regs *regs) | |
2953 | +{ | |
2954 | +} | |
2955 | + | |
2956 | +__weak void perf_callchain_user(struct perf_callchain_entry *entry, | |
2957 | + struct pt_regs *regs) | |
2958 | +{ | |
2959 | +} | |
2960 | + | |
2961 | +static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |
2962 | +{ | |
2963 | + struct perf_callchain_entry *entry; | |
2964 | + | |
2965 | + entry = perf_callchain_buffer(); | |
2966 | + if (!entry) | |
2967 | + return NULL; | |
2968 | + | |
2969 | + entry->nr = 0; | |
2970 | + | |
2971 | + if (!user_mode(regs)) { | |
2972 | + perf_callchain_kernel(entry, regs); | |
2973 | + if (current->mm) | |
2974 | + regs = task_pt_regs(current); | |
2975 | + else | |
2976 | + regs = NULL; | |
2977 | + } | |
2978 | + | |
2979 | + if (regs) | |
2980 | + perf_callchain_user(entry, regs); | |
2981 | + | |
2982 | + return entry; | |
2947 | 2983 | } |
2948 | 2984 | |
2949 | 2985 |