Commit 56962b4449af34070bb1994621ef4f0265eed4d8

Authored by Frederic Weisbecker
1 parent 70791ce9ba

perf: Generalize some arch callchain code

- Most archs use one callchain buffer per cpu, except x86 that needs
  to deal with NMIs. Provide a default perf_callchain_buffer()
  implementation that x86 overrides.

- Centralize all the kernel/user regs handling and invoke new arch
  handlers from there: perf_callchain_user() / perf_callchain_kernel()
  That avoid all the user_mode(), current->mm checks and so...

- Invert some parameters in perf_callchain_*() helpers: entry to the
  left, regs to the right, following the traditional (dst, src).

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>

Showing 7 changed files with 90 additions and 180 deletions Side-by-side Diff

arch/arm/kernel/perf_event.c
... ... @@ -3044,17 +3044,13 @@
3044 3044 return buftail.fp - 1;
3045 3045 }
3046 3046  
3047   -static void
3048   -perf_callchain_user(struct pt_regs *regs,
3049   - struct perf_callchain_entry *entry)
  3047 +void
  3048 +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3050 3049 {
3051 3050 struct frame_tail *tail;
3052 3051  
3053 3052 perf_callchain_store(entry, PERF_CONTEXT_USER);
3054 3053  
3055   - if (!user_mode(regs))
3056   - regs = task_pt_regs(current);
3057   -
3058 3054 tail = (struct frame_tail *)regs->ARM_fp - 1;
3059 3055  
3060 3056 while (tail && !((unsigned long)tail & 0x3))
... ... @@ -3075,9 +3071,8 @@
3075 3071 return 0;
3076 3072 }
3077 3073  
3078   -static void
3079   -perf_callchain_kernel(struct pt_regs *regs,
3080   - struct perf_callchain_entry *entry)
  3074 +void
  3075 +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3081 3076 {
3082 3077 struct stackframe fr;
3083 3078  
... ... @@ -3087,35 +3082,5 @@
3087 3082 fr.lr = regs->ARM_lr;
3088 3083 fr.pc = regs->ARM_pc;
3089 3084 walk_stackframe(&fr, callchain_trace, entry);
3090   -}
3091   -
3092   -static void
3093   -perf_do_callchain(struct pt_regs *regs,
3094   - struct perf_callchain_entry *entry)
3095   -{
3096   - int is_user;
3097   -
3098   - if (!regs)
3099   - return;
3100   -
3101   - is_user = user_mode(regs);
3102   -
3103   - if (!is_user)
3104   - perf_callchain_kernel(regs, entry);
3105   -
3106   - if (current->mm)
3107   - perf_callchain_user(regs, entry);
3108   -}
3109   -
3110   -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
3111   -
3112   -struct perf_callchain_entry *
3113   -perf_callchain(struct pt_regs *regs)
3114   -{
3115   - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
3116   -
3117   - entry->nr = 0;
3118   - perf_do_callchain(regs, entry);
3119   - return entry;
3120 3085 }
arch/powerpc/kernel/perf_callchain.c
... ... @@ -46,8 +46,8 @@
46 46 return 0;
47 47 }
48 48  
49   -static void perf_callchain_kernel(struct pt_regs *regs,
50   - struct perf_callchain_entry *entry)
  49 +void
  50 +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
51 51 {
52 52 unsigned long sp, next_sp;
53 53 unsigned long next_ip;
... ... @@ -221,8 +221,8 @@
221 221 puc == (unsigned long) &sf->uc;
222 222 }
223 223  
224   -static void perf_callchain_user_64(struct pt_regs *regs,
225   - struct perf_callchain_entry *entry)
  224 +static void perf_callchain_user_64(struct perf_callchain_entry *entry,
  225 + struct pt_regs *regs)
226 226 {
227 227 unsigned long sp, next_sp;
228 228 unsigned long next_ip;
... ... @@ -303,8 +303,8 @@
303 303 return __get_user_inatomic(*ret, ptr);
304 304 }
305 305  
306   -static inline void perf_callchain_user_64(struct pt_regs *regs,
307   - struct perf_callchain_entry *entry)
  306 +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
  307 + struct pt_regs *regs)
308 308 {
309 309 }
310 310  
... ... @@ -423,8 +423,8 @@
423 423 return mctx->mc_gregs;
424 424 }
425 425  
426   -static void perf_callchain_user_32(struct pt_regs *regs,
427   - struct perf_callchain_entry *entry)
  426 +static void perf_callchain_user_32(struct perf_callchain_entry *entry,
  427 + struct pt_regs *regs)
428 428 {
429 429 unsigned int sp, next_sp;
430 430 unsigned int next_ip;
431 431  
... ... @@ -471,33 +471,12 @@
471 471 }
472 472 }
473 473  
474   -/*
475   - * Since we can't get PMU interrupts inside a PMU interrupt handler,
476   - * we don't need separate irq and nmi entries here.
477   - */
478   -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
479   -
480   -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
  474 +void
  475 +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
481 476 {
482   - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
483   -
484   - entry->nr = 0;
485   -
486   - if (!user_mode(regs)) {
487   - perf_callchain_kernel(regs, entry);
488   - if (current->mm)
489   - regs = task_pt_regs(current);
490   - else
491   - regs = NULL;
492   - }
493   -
494   - if (regs) {
495   - if (current_is_64bit())
496   - perf_callchain_user_64(regs, entry);
497   - else
498   - perf_callchain_user_32(regs, entry);
499   - }
500   -
501   - return entry;
  477 + if (current_is_64bit())
  478 + perf_callchain_user_64(entry, regs);
  479 + else
  480 + perf_callchain_user_32(entry, regs);
502 481 }
arch/sh/kernel/perf_callchain.c
... ... @@ -44,45 +44,12 @@
44 44 .address = callchain_address,
45 45 };
46 46  
47   -static void
48   -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
  47 +void
  48 +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
49 49 {
50 50 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
51 51 perf_callchain_store(entry, regs->pc);
52 52  
53 53 unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
54   -}
55   -
56   -static void
57   -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
58   -{
59   - int is_user;
60   -
61   - if (!regs)
62   - return;
63   -
64   - is_user = user_mode(regs);
65   -
66   - /*
67   - * Only the kernel side is implemented for now.
68   - */
69   - if (!is_user)
70   - perf_callchain_kernel(regs, entry);
71   -}
72   -
73   -/*
74   - * No need for separate IRQ and NMI entries.
75   - */
76   -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
77   -
78   -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
79   -{
80   - struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
81   -
82   - entry->nr = 0;
83   -
84   - perf_do_callchain(regs, entry);
85   -
86   - return entry;
87 54 }
arch/sparc/kernel/perf_event.c
... ... @@ -1283,14 +1283,16 @@
1283 1283 register_die_notifier(&perf_event_nmi_notifier);
1284 1284 }
1285 1285  
1286   -static void perf_callchain_kernel(struct pt_regs *regs,
1287   - struct perf_callchain_entry *entry)
  1286 +void perf_callchain_kernel(struct perf_callchain_entry *entry,
  1287 + struct pt_regs *regs)
1288 1288 {
1289 1289 unsigned long ksp, fp;
1290 1290 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
1291 1291 int graph = 0;
1292 1292 #endif
1293 1293  
  1294 + stack_trace_flush();
  1295 +
1294 1296 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
1295 1297 perf_callchain_store(entry, regs->tpc);
1296 1298  
... ... @@ -1330,8 +1332,8 @@
1330 1332 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1331 1333 }
1332 1334  
1333   -static void perf_callchain_user_64(struct pt_regs *regs,
1334   - struct perf_callchain_entry *entry)
  1335 +static void perf_callchain_user_64(struct perf_callchain_entry *entry,
  1336 + struct pt_regs *regs)
1335 1337 {
1336 1338 unsigned long ufp;
1337 1339  
... ... @@ -1353,8 +1355,8 @@
1353 1355 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1354 1356 }
1355 1357  
1356   -static void perf_callchain_user_32(struct pt_regs *regs,
1357   - struct perf_callchain_entry *entry)
  1358 +static void perf_callchain_user_32(struct perf_callchain_entry *entry,
  1359 + struct pt_regs *regs)
1358 1360 {
1359 1361 unsigned long ufp;
1360 1362  
1361 1363  
... ... @@ -1376,31 +1378,13 @@
1376 1378 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1377 1379 }
1378 1380  
1379   -/* Like powerpc we can't get PMU interrupts within the PMU handler,
1380   - * so no need for separate NMI and IRQ chains as on x86.
1381   - */
1382   -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
1383   -
1384   -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
  1381 +void
  1382 +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1385 1383 {
1386   - struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
1387   -
1388   - entry->nr = 0;
1389   - if (!user_mode(regs)) {
1390   - stack_trace_flush();
1391   - perf_callchain_kernel(regs, entry);
1392   - if (current->mm)
1393   - regs = task_pt_regs(current);
1394   - else
1395   - regs = NULL;
1396   - }
1397   - if (regs) {
1398   - flushw_user();
1399   - if (test_thread_flag(TIF_32BIT))
1400   - perf_callchain_user_32(regs, entry);
1401   - else
1402   - perf_callchain_user_64(regs, entry);
1403   - }
1404   - return entry;
  1384 + flushw_user();
  1385 + if (test_thread_flag(TIF_32BIT))
  1386 + perf_callchain_user_32(entry, regs);
  1387 + else
  1388 + perf_callchain_user_64(entry, regs);
1405 1389 }
arch/x86/kernel/cpu/perf_event.c
... ... @@ -1571,11 +1571,9 @@
1571 1571 * callchain support
1572 1572 */
1573 1573  
  1574 +static DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry_nmi);
1574 1575  
1575   -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
1576   -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
1577 1576  
1578   -
1579 1577 static void
1580 1578 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1581 1579 {
... ... @@ -1607,8 +1605,8 @@
1607 1605 .walk_stack = print_context_stack_bp,
1608 1606 };
1609 1607  
1610   -static void
1611   -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
  1608 +void
  1609 +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1612 1610 {
1613 1611 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
1614 1612 perf_callchain_store(entry, regs->ip);
1615 1613  
... ... @@ -1653,14 +1651,12 @@
1653 1651 }
1654 1652 #endif
1655 1653  
1656   -static void
1657   -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
  1654 +void
  1655 +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1658 1656 {
1659 1657 struct stack_frame frame;
1660 1658 const void __user *fp;
1661 1659  
1662   - if (!user_mode(regs))
1663   - regs = task_pt_regs(current);
1664 1660  
1665 1661 fp = (void __user *)regs->bp;
1666 1662  
1667 1663  
1668 1664  
1669 1665  
... ... @@ -1687,42 +1683,17 @@
1687 1683 }
1688 1684 }
1689 1685  
1690   -static void
1691   -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
  1686 +struct perf_callchain_entry *perf_callchain_buffer(void)
1692 1687 {
1693   - int is_user;
1694   -
1695   - if (!regs)
1696   - return;
1697   -
1698   - is_user = user_mode(regs);
1699   -
1700   - if (!is_user)
1701   - perf_callchain_kernel(regs, entry);
1702   -
1703   - if (current->mm)
1704   - perf_callchain_user(regs, entry);
1705   -}
1706   -
1707   -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1708   -{
1709   - struct perf_callchain_entry *entry;
1710   -
1711 1688 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1712 1689 /* TODO: We don't support guest os callchain now */
1713 1690 return NULL;
1714 1691 }
1715 1692  
1716 1693 if (in_nmi())
1717   - entry = &__get_cpu_var(pmc_nmi_entry);
1718   - else
1719   - entry = &__get_cpu_var(pmc_irq_entry);
  1694 + return &__get_cpu_var(perf_callchain_entry_nmi);
1720 1695  
1721   - entry->nr = 0;
1722   -
1723   - perf_do_callchain(regs, entry);
1724   -
1725   - return entry;
  1696 + return &__get_cpu_var(perf_callchain_entry);
1726 1697 }
1727 1698  
1728 1699 unsigned long perf_instruction_pointer(struct pt_regs *regs)
include/linux/perf_event.h
... ... @@ -976,7 +976,15 @@
976 976 extern void perf_event_comm(struct task_struct *tsk);
977 977 extern void perf_event_fork(struct task_struct *tsk);
978 978  
979   -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
  979 +/* Callchains */
  980 +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
  981 +
  982 +extern void perf_callchain_user(struct perf_callchain_entry *entry,
  983 + struct pt_regs *regs);
  984 +extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
  985 + struct pt_regs *regs);
  986 +extern struct perf_callchain_entry *perf_callchain_buffer(void);
  987 +
980 988  
981 989 static inline void
982 990 perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
... ... @@ -2937,13 +2937,49 @@
2937 2937 __perf_pending_run();
2938 2938 }
2939 2939  
  2940 +DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
  2941 +
2940 2942 /*
2941 2943 * Callchain support -- arch specific
2942 2944 */
2943 2945  
2944   -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
  2946 +__weak struct perf_callchain_entry *perf_callchain_buffer(void)
2945 2947 {
2946   - return NULL;
  2948 + return &__get_cpu_var(perf_callchain_entry);
  2949 +}
  2950 +
  2951 +__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
  2952 + struct pt_regs *regs)
  2953 +{
  2954 +}
  2955 +
  2956 +__weak void perf_callchain_user(struct perf_callchain_entry *entry,
  2957 + struct pt_regs *regs)
  2958 +{
  2959 +}
  2960 +
  2961 +static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
  2962 +{
  2963 + struct perf_callchain_entry *entry;
  2964 +
  2965 + entry = perf_callchain_buffer();
  2966 + if (!entry)
  2967 + return NULL;
  2968 +
  2969 + entry->nr = 0;
  2970 +
  2971 + if (!user_mode(regs)) {
  2972 + perf_callchain_kernel(entry, regs);
  2973 + if (current->mm)
  2974 + regs = task_pt_regs(current);
  2975 + else
  2976 + regs = NULL;
  2977 + }
  2978 +
  2979 + if (regs)
  2980 + perf_callchain_user(entry, regs);
  2981 +
  2982 + return entry;
2947 2983 }
2948 2984  
2949 2985