Commit ddb321a8dd158520d97ed1cbade1d4ac36b6af31

Authored by Linus Torvalds

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "Mostly tooling fixes, but also some kernel side fixes: uncore PMU
  driver fix, user regs sampling fix and an instruction decoder fix that
  unbreaks PEBS precise sampling"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes
  perf/x86_64: Improve user regs sampling
  perf: Move task_pt_regs sampling into arch code
  x86: Fix off-by-one in instruction decoder
  perf hists browser: Fix segfault when showing callchain
  perf callchain: Free callchains when hist entries are deleted
  perf hists: Fix children sort key behavior
  perf diff: Fix to sort by baseline field by default
  perf list: Fix --raw-dump option
  perf probe: Fix crash in dwarf_getcfi_elf
  perf probe: Fix to fall back to find probe point in symbols
  perf callchain: Append callchains only when requested
  perf ui/tui: Print backtrace symbols when segfault occurs
  perf report: Show progress bar for output resorting

Showing 26 changed files Side-by-side Diff

arch/arm/kernel/perf_regs.c
... ... @@ -28,4 +28,12 @@
28 28 {
29 29 return PERF_SAMPLE_REGS_ABI_32;
30 30 }
  31 +
  32 +void perf_get_regs_user(struct perf_regs *regs_user,
  33 + struct pt_regs *regs,
  34 + struct pt_regs *regs_user_copy)
  35 +{
  36 + regs_user->regs = task_pt_regs(current);
  37 + regs_user->abi = perf_reg_abi(current);
  38 +}
arch/arm64/kernel/perf_regs.c
... ... @@ -50,4 +50,12 @@
50 50 else
51 51 return PERF_SAMPLE_REGS_ABI_64;
52 52 }
  53 +
  54 +void perf_get_regs_user(struct perf_regs *regs_user,
  55 + struct pt_regs *regs,
  56 + struct pt_regs *regs_user_copy)
  57 +{
  58 + regs_user->regs = task_pt_regs(current);
  59 + regs_user->abi = perf_reg_abi(current);
  60 +}
arch/x86/kernel/cpu/perf_event_intel_uncore.h
... ... @@ -17,7 +17,7 @@
17 17 #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
18 18 #define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
19 19 #define UNCORE_EXTRA_PCI_DEV 0xff
20   -#define UNCORE_EXTRA_PCI_DEV_MAX 2
  20 +#define UNCORE_EXTRA_PCI_DEV_MAX 3
21 21  
22 22 /* support up to 8 sockets */
23 23 #define UNCORE_SOCKET_MAX 8
arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
... ... @@ -891,6 +891,7 @@
891 891 enum {
892 892 SNBEP_PCI_QPI_PORT0_FILTER,
893 893 SNBEP_PCI_QPI_PORT1_FILTER,
  894 + HSWEP_PCI_PCU_3,
894 895 };
895 896  
896 897 static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
... ... @@ -2026,6 +2027,17 @@
2026 2027 {
2027 2028 if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
2028 2029 hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
  2030 +
  2031 + /* Detect 6-8 core systems with only two SBOXes */
  2032 + if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
  2033 + u32 capid4;
  2034 +
  2035 + pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
  2036 + 0x94, &capid4);
  2037 + if (((capid4 >> 6) & 0x3) == 0)
  2038 + hswep_uncore_sbox.num_boxes = 2;
  2039 + }
  2040 +
2029 2041 uncore_msr_uncores = hswep_msr_uncores;
2030 2042 }
2031 2043  
... ... @@ -2286,6 +2298,11 @@
2286 2298 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96),
2287 2299 .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
2288 2300 SNBEP_PCI_QPI_PORT1_FILTER),
  2301 + },
  2302 + { /* PCU.3 (for Capability registers) */
  2303 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
  2304 + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
  2305 + HSWEP_PCI_PCU_3),
2289 2306 },
2290 2307 { /* end: all zeroes */ }
2291 2308 };
arch/x86/kernel/perf_regs.c
... ... @@ -78,6 +78,14 @@
78 78 {
79 79 return PERF_SAMPLE_REGS_ABI_32;
80 80 }
  81 +
  82 +void perf_get_regs_user(struct perf_regs *regs_user,
  83 + struct pt_regs *regs,
  84 + struct pt_regs *regs_user_copy)
  85 +{
  86 + regs_user->regs = task_pt_regs(current);
  87 + regs_user->abi = perf_reg_abi(current);
  88 +}
81 89 #else /* CONFIG_X86_64 */
82 90 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
83 91 (1ULL << PERF_REG_X86_ES) | \
... ... @@ -101,6 +109,88 @@
101 109 return PERF_SAMPLE_REGS_ABI_32;
102 110 else
103 111 return PERF_SAMPLE_REGS_ABI_64;
  112 +}
  113 +
  114 +void perf_get_regs_user(struct perf_regs *regs_user,
  115 + struct pt_regs *regs,
  116 + struct pt_regs *regs_user_copy)
  117 +{
  118 + struct pt_regs *user_regs = task_pt_regs(current);
  119 +
  120 + /*
  121 + * If we're in an NMI that interrupted task_pt_regs setup, then
  122 + * we can't sample user regs at all. This check isn't really
  123 + * sufficient, though, as we could be in an NMI inside an interrupt
  124 + * that happened during task_pt_regs setup.
  125 + */
  126 + if (regs->sp > (unsigned long)&user_regs->r11 &&
  127 + regs->sp <= (unsigned long)(user_regs + 1)) {
  128 + regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
  129 + regs_user->regs = NULL;
  130 + return;
  131 + }
  132 +
  133 + /*
  134 + * RIP, flags, and the argument registers are usually saved.
  135 + * orig_ax is probably okay, too.
  136 + */
  137 + regs_user_copy->ip = user_regs->ip;
  138 + regs_user_copy->cx = user_regs->cx;
  139 + regs_user_copy->dx = user_regs->dx;
  140 + regs_user_copy->si = user_regs->si;
  141 + regs_user_copy->di = user_regs->di;
  142 + regs_user_copy->r8 = user_regs->r8;
  143 + regs_user_copy->r9 = user_regs->r9;
  144 + regs_user_copy->r10 = user_regs->r10;
  145 + regs_user_copy->r11 = user_regs->r11;
  146 + regs_user_copy->orig_ax = user_regs->orig_ax;
  147 + regs_user_copy->flags = user_regs->flags;
  148 +
  149 + /*
  150 + * Don't even try to report the "rest" regs.
  151 + */
  152 + regs_user_copy->bx = -1;
  153 + regs_user_copy->bp = -1;
  154 + regs_user_copy->r12 = -1;
  155 + regs_user_copy->r13 = -1;
  156 + regs_user_copy->r14 = -1;
  157 + regs_user_copy->r15 = -1;
  158 +
  159 + /*
  160 + * For this to be at all useful, we need a reasonable guess for
  161 + * sp and the ABI. Be careful: we're in NMI context, and we're
  162 + * considering current to be the current task, so we should
  163 + * be careful not to look at any other percpu variables that might
  164 + * change during context switches.
  165 + */
  166 + if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
  167 + task_thread_info(current)->status & TS_COMPAT) {
  168 + /* Easy case: we're in a compat syscall. */
  169 + regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
  170 + regs_user_copy->sp = user_regs->sp;
  171 + regs_user_copy->cs = user_regs->cs;
  172 + regs_user_copy->ss = user_regs->ss;
  173 + } else if (user_regs->orig_ax != -1) {
  174 + /*
  175 + * We're probably in a 64-bit syscall.
  176 + * Warning: this code is severely racy. At least it's better
  177 + * than just blindly copying user_regs.
  178 + */
  179 + regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
  180 + regs_user_copy->sp = this_cpu_read(old_rsp);
  181 + regs_user_copy->cs = __USER_CS;
  182 + regs_user_copy->ss = __USER_DS;
  183 + regs_user_copy->cx = -1; /* usually contains garbage */
  184 + } else {
  185 + /* We're probably in an interrupt or exception. */
  186 + regs_user->abi = user_64bit_mode(user_regs) ?
  187 + PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
  188 + regs_user_copy->sp = user_regs->sp;
  189 + regs_user_copy->cs = user_regs->cs;
  190 + regs_user_copy->ss = user_regs->ss;
  191 + }
  192 +
  193 + regs_user->regs = regs_user_copy;
104 194 }
105 195 #endif /* CONFIG_X86_32 */
... ... @@ -28,7 +28,7 @@
28 28  
29 29 /* Verify next sizeof(t) bytes can be on the same instruction */
30 30 #define validate_next(t, insn, n) \
31   - ((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr)
  31 + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
32 32  
33 33 #define __get_next(t, insn) \
34 34 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
include/linux/perf_event.h
... ... @@ -79,11 +79,6 @@
79 79 struct perf_branch_entry entries[0];
80 80 };
81 81  
82   -struct perf_regs {
83   - __u64 abi;
84   - struct pt_regs *regs;
85   -};
86   -
87 82 struct task_struct;
88 83  
89 84 /*
90 85  
... ... @@ -610,7 +605,14 @@
610 605 u32 reserved;
611 606 } cpu_entry;
612 607 struct perf_callchain_entry *callchain;
  608 +
  609 + /*
  610 + * regs_user may point to task_pt_regs or to regs_user_copy, depending
  611 + * on arch details.
  612 + */
613 613 struct perf_regs regs_user;
  614 + struct pt_regs regs_user_copy;
  615 +
614 616 struct perf_regs regs_intr;
615 617 u64 stack_user_size;
616 618 } ____cacheline_aligned;
include/linux/perf_regs.h
1 1 #ifndef _LINUX_PERF_REGS_H
2 2 #define _LINUX_PERF_REGS_H
3 3  
  4 +struct perf_regs {
  5 + __u64 abi;
  6 + struct pt_regs *regs;
  7 +};
  8 +
4 9 #ifdef CONFIG_HAVE_PERF_REGS
5 10 #include <asm/perf_regs.h>
6 11 u64 perf_reg_value(struct pt_regs *regs, int idx);
7 12 int perf_reg_validate(u64 mask);
8 13 u64 perf_reg_abi(struct task_struct *task);
  14 +void perf_get_regs_user(struct perf_regs *regs_user,
  15 + struct pt_regs *regs,
  16 + struct pt_regs *regs_user_copy);
9 17 #else
10 18 static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
11 19 {
... ... @@ -20,6 +28,14 @@
20 28 static inline u64 perf_reg_abi(struct task_struct *task)
21 29 {
22 30 return PERF_SAMPLE_REGS_ABI_NONE;
  31 +}
  32 +
  33 +static inline void perf_get_regs_user(struct perf_regs *regs_user,
  34 + struct pt_regs *regs,
  35 + struct pt_regs *regs_user_copy)
  36 +{
  37 + regs_user->regs = task_pt_regs(current);
  38 + regs_user->abi = perf_reg_abi(current);
23 39 }
24 40 #endif /* CONFIG_HAVE_PERF_REGS */
25 41 #endif /* _LINUX_PERF_REGS_H */
kernel/events/core.c
... ... @@ -4461,18 +4461,14 @@
4461 4461 }
4462 4462  
4463 4463 static void perf_sample_regs_user(struct perf_regs *regs_user,
4464   - struct pt_regs *regs)
  4464 + struct pt_regs *regs,
  4465 + struct pt_regs *regs_user_copy)
4465 4466 {
4466   - if (!user_mode(regs)) {
4467   - if (current->mm)
4468   - regs = task_pt_regs(current);
4469   - else
4470   - regs = NULL;
4471   - }
4472   -
4473   - if (regs) {
4474   - regs_user->abi = perf_reg_abi(current);
  4467 + if (user_mode(regs)) {
  4468 + regs_user->abi = perf_reg_abi(current);
4475 4469 regs_user->regs = regs;
  4470 + } else if (current->mm) {
  4471 + perf_get_regs_user(regs_user, regs, regs_user_copy);
4476 4472 } else {
4477 4473 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
4478 4474 regs_user->regs = NULL;
... ... @@ -4951,7 +4947,8 @@
4951 4947 }
4952 4948  
4953 4949 if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
4954   - perf_sample_regs_user(&data->regs_user, regs);
  4950 + perf_sample_regs_user(&data->regs_user, regs,
  4951 + &data->regs_user_copy);
4955 4952  
4956 4953 if (sample_type & PERF_SAMPLE_REGS_USER) {
4957 4954 /* regs dump ABI info */
tools/perf/builtin-annotate.c
... ... @@ -232,7 +232,7 @@
232 232 if (nr_samples > 0) {
233 233 total_nr_samples += nr_samples;
234 234 hists__collapse_resort(hists, NULL);
235   - hists__output_resort(hists);
  235 + hists__output_resort(hists, NULL);
236 236  
237 237 if (symbol_conf.event_group &&
238 238 !perf_evsel__is_group_leader(pos))
tools/perf/builtin-diff.c
... ... @@ -545,6 +545,42 @@
545 545 return __hist_entry__cmp_compute(p_left, p_right, c);
546 546 }
547 547  
  548 +static int64_t
  549 +hist_entry__cmp_nop(struct hist_entry *left __maybe_unused,
  550 + struct hist_entry *right __maybe_unused)
  551 +{
  552 + return 0;
  553 +}
  554 +
  555 +static int64_t
  556 +hist_entry__cmp_baseline(struct hist_entry *left, struct hist_entry *right)
  557 +{
  558 + if (sort_compute)
  559 + return 0;
  560 +
  561 + if (left->stat.period == right->stat.period)
  562 + return 0;
  563 + return left->stat.period > right->stat.period ? 1 : -1;
  564 +}
  565 +
  566 +static int64_t
  567 +hist_entry__cmp_delta(struct hist_entry *left, struct hist_entry *right)
  568 +{
  569 + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA);
  570 +}
  571 +
  572 +static int64_t
  573 +hist_entry__cmp_ratio(struct hist_entry *left, struct hist_entry *right)
  574 +{
  575 + return hist_entry__cmp_compute(right, left, COMPUTE_RATIO);
  576 +}
  577 +
  578 +static int64_t
  579 +hist_entry__cmp_wdiff(struct hist_entry *left, struct hist_entry *right)
  580 +{
  581 + return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF);
  582 +}
  583 +
548 584 static void insert_hist_entry_by_compute(struct rb_root *root,
549 585 struct hist_entry *he,
550 586 int c)
... ... @@ -605,7 +641,7 @@
605 641 hists__precompute(hists);
606 642 hists__compute_resort(hists);
607 643 } else {
608   - hists__output_resort(hists);
  644 + hists__output_resort(hists, NULL);
609 645 }
610 646  
611 647 hists__fprintf(hists, true, 0, 0, 0, stdout);
612 648  
613 649  
614 650  
615 651  
616 652  
617 653  
... ... @@ -1038,27 +1074,35 @@
1038 1074 fmt->header = hpp__header;
1039 1075 fmt->width = hpp__width;
1040 1076 fmt->entry = hpp__entry_global;
  1077 + fmt->cmp = hist_entry__cmp_nop;
  1078 + fmt->collapse = hist_entry__cmp_nop;
1041 1079  
1042 1080 /* TODO more colors */
1043 1081 switch (idx) {
1044 1082 case PERF_HPP_DIFF__BASELINE:
1045 1083 fmt->color = hpp__color_baseline;
  1084 + fmt->sort = hist_entry__cmp_baseline;
1046 1085 break;
1047 1086 case PERF_HPP_DIFF__DELTA:
1048 1087 fmt->color = hpp__color_delta;
  1088 + fmt->sort = hist_entry__cmp_delta;
1049 1089 break;
1050 1090 case PERF_HPP_DIFF__RATIO:
1051 1091 fmt->color = hpp__color_ratio;
  1092 + fmt->sort = hist_entry__cmp_ratio;
1052 1093 break;
1053 1094 case PERF_HPP_DIFF__WEIGHTED_DIFF:
1054 1095 fmt->color = hpp__color_wdiff;
  1096 + fmt->sort = hist_entry__cmp_wdiff;
1055 1097 break;
1056 1098 default:
  1099 + fmt->sort = hist_entry__cmp_nop;
1057 1100 break;
1058 1101 }
1059 1102  
1060 1103 init_header(d, dfmt);
1061 1104 perf_hpp__column_register(fmt);
  1105 + perf_hpp__register_sort_field(fmt);
1062 1106 }
1063 1107  
1064 1108 static void ui_init(void)
tools/perf/builtin-list.c
... ... @@ -19,7 +19,9 @@
19 19 int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
20 20 {
21 21 int i;
22   - const struct option list_options[] = {
  22 + bool raw_dump = false;
  23 + struct option list_options[] = {
  24 + OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
23 25 OPT_END()
24 26 };
25 27 const char * const list_usage[] = {
26 28  
... ... @@ -27,11 +29,18 @@
27 29 NULL
28 30 };
29 31  
  32 + set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN);
  33 +
30 34 argc = parse_options(argc, argv, list_options, list_usage,
31 35 PARSE_OPT_STOP_AT_NON_OPTION);
32 36  
33 37 setup_pager();
34 38  
  39 + if (raw_dump) {
  40 + print_events(NULL, true);
  41 + return 0;
  42 + }
  43 +
35 44 if (argc == 0) {
36 45 print_events(NULL, false);
37 46 return 0;
... ... @@ -53,8 +62,6 @@
53 62 print_hwcache_events(NULL, false);
54 63 else if (strcmp(argv[i], "pmu") == 0)
55 64 print_pmu_events(NULL, false);
56   - else if (strcmp(argv[i], "--raw-dump") == 0)
57   - print_events(NULL, true);
58 65 else {
59 66 char *sep = strchr(argv[i], ':'), *s;
60 67 int sep_idx;
tools/perf/builtin-report.c
... ... @@ -457,6 +457,19 @@
457 457 ui_progress__finish();
458 458 }
459 459  
  460 +static void report__output_resort(struct report *rep)
  461 +{
  462 + struct ui_progress prog;
  463 + struct perf_evsel *pos;
  464 +
  465 + ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
  466 +
  467 + evlist__for_each(rep->session->evlist, pos)
  468 + hists__output_resort(evsel__hists(pos), &prog);
  469 +
  470 + ui_progress__finish();
  471 +}
  472 +
460 473 static int __cmd_report(struct report *rep)
461 474 {
462 475 int ret;
463 476  
... ... @@ -505,13 +518,20 @@
505 518 if (session_done())
506 519 return 0;
507 520  
  521 + /*
  522 + * recalculate number of entries after collapsing since it
  523 + * might be changed during the collapse phase.
  524 + */
  525 + rep->nr_entries = 0;
  526 + evlist__for_each(session->evlist, pos)
  527 + rep->nr_entries += evsel__hists(pos)->nr_entries;
  528 +
508 529 if (rep->nr_entries == 0) {
509 530 ui__error("The %s file has no samples!\n", file->path);
510 531 return 0;
511 532 }
512 533  
513   - evlist__for_each(session->evlist, pos)
514   - hists__output_resort(evsel__hists(pos));
  534 + report__output_resort(rep);
515 535  
516 536 return report__browse_hists(rep);
517 537 }
tools/perf/builtin-top.c
... ... @@ -285,7 +285,7 @@
285 285 }
286 286  
287 287 hists__collapse_resort(hists, NULL);
288   - hists__output_resort(hists);
  288 + hists__output_resort(hists, NULL);
289 289  
290 290 hists__output_recalc_col_len(hists, top->print_entries - printed);
291 291 putchar('\n');
... ... @@ -554,7 +554,7 @@
554 554 }
555 555  
556 556 hists__collapse_resort(hists, NULL);
557   - hists__output_resort(hists);
  557 + hists__output_resort(hists, NULL);
558 558 }
559 559  
560 560 static void *display_thread_tui(void *arg)
tools/perf/tests/hists_cumulate.c
... ... @@ -187,7 +187,7 @@
187 187 * function since TEST_ASSERT_VAL() returns in case of failure.
188 188 */
189 189 hists__collapse_resort(hists, NULL);
190   - hists__output_resort(hists);
  190 + hists__output_resort(hists, NULL);
191 191  
192 192 if (verbose > 2) {
193 193 pr_info("use callchain: %d, cumulate callchain: %d\n",
194 194  
195 195  
... ... @@ -454,12 +454,12 @@
454 454 * 30.00% 10.00% perf perf [.] cmd_record
455 455 * 20.00% 0.00% bash libc [.] malloc
456 456 * 10.00% 10.00% bash [kernel] [k] page_fault
457   - * 10.00% 10.00% perf [kernel] [k] schedule
458   - * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
  457 + * 10.00% 10.00% bash bash [.] xmalloc
459 458 * 10.00% 10.00% perf [kernel] [k] page_fault
460   - * 10.00% 10.00% perf libc [.] free
461 459 * 10.00% 10.00% perf libc [.] malloc
462   - * 10.00% 10.00% bash bash [.] xmalloc
  460 + * 10.00% 10.00% perf [kernel] [k] schedule
  461 + * 10.00% 10.00% perf libc [.] free
  462 + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
463 463 */
464 464 struct result expected[] = {
465 465 { 7000, 2000, "perf", "perf", "main" },
466 466  
467 467  
... ... @@ -468,12 +468,12 @@
468 468 { 3000, 1000, "perf", "perf", "cmd_record" },
469 469 { 2000, 0, "bash", "libc", "malloc" },
470 470 { 1000, 1000, "bash", "[kernel]", "page_fault" },
471   - { 1000, 1000, "perf", "[kernel]", "schedule" },
472   - { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
  471 + { 1000, 1000, "bash", "bash", "xmalloc" },
473 472 { 1000, 1000, "perf", "[kernel]", "page_fault" },
  473 + { 1000, 1000, "perf", "[kernel]", "schedule" },
474 474 { 1000, 1000, "perf", "libc", "free" },
475 475 { 1000, 1000, "perf", "libc", "malloc" },
476   - { 1000, 1000, "bash", "bash", "xmalloc" },
  476 + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
477 477 };
478 478  
479 479 symbol_conf.use_callchain = false;
480 480  
... ... @@ -537,10 +537,13 @@
537 537 * malloc
538 538 * main
539 539 *
540   - * 10.00% 10.00% perf [kernel] [k] schedule
  540 + * 10.00% 10.00% bash bash [.] xmalloc
541 541 * |
542   - * --- schedule
543   - * run_command
  542 + * --- xmalloc
  543 + * malloc
  544 + * xmalloc <--- NOTE: there's a cycle
  545 + * malloc
  546 + * xmalloc
544 547 * main
545 548 *
546 549 * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
... ... @@ -556,6 +559,12 @@
556 559 * run_command
557 560 * main
558 561 *
  562 + * 10.00% 10.00% perf [kernel] [k] schedule
  563 + * |
  564 + * --- schedule
  565 + * run_command
  566 + * main
  567 + *
559 568 * 10.00% 10.00% perf libc [.] free
560 569 * |
561 570 * --- free
... ... @@ -570,15 +579,6 @@
570 579 * run_command
571 580 * main
572 581 *
573   - * 10.00% 10.00% bash bash [.] xmalloc
574   - * |
575   - * --- xmalloc
576   - * malloc
577   - * xmalloc <--- NOTE: there's a cycle
578   - * malloc
579   - * xmalloc
580   - * main
581   - *
582 582 */
583 583 struct result expected[] = {
584 584 { 7000, 2000, "perf", "perf", "main" },
585 585  
586 586  
... ... @@ -587,12 +587,12 @@
587 587 { 3000, 1000, "perf", "perf", "cmd_record" },
588 588 { 2000, 0, "bash", "libc", "malloc" },
589 589 { 1000, 1000, "bash", "[kernel]", "page_fault" },
590   - { 1000, 1000, "perf", "[kernel]", "schedule" },
  590 + { 1000, 1000, "bash", "bash", "xmalloc" },
591 591 { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
592 592 { 1000, 1000, "perf", "[kernel]", "page_fault" },
  593 + { 1000, 1000, "perf", "[kernel]", "schedule" },
593 594 { 1000, 1000, "perf", "libc", "free" },
594 595 { 1000, 1000, "perf", "libc", "malloc" },
595   - { 1000, 1000, "bash", "bash", "xmalloc" },
596 596 };
597 597 struct callchain_result expected_callchain[] = {
598 598 {
... ... @@ -622,9 +622,12 @@
622 622 { "bash", "main" }, },
623 623 },
624 624 {
625   - 3, { { "[kernel]", "schedule" },
626   - { "perf", "run_command" },
627   - { "perf", "main" }, },
  625 + 6, { { "bash", "xmalloc" },
  626 + { "libc", "malloc" },
  627 + { "bash", "xmalloc" },
  628 + { "libc", "malloc" },
  629 + { "bash", "xmalloc" },
  630 + { "bash", "main" }, },
628 631 },
629 632 {
630 633 3, { { "[kernel]", "sys_perf_event_open" },
... ... @@ -638,6 +641,11 @@
638 641 { "perf", "main" }, },
639 642 },
640 643 {
  644 + 3, { { "[kernel]", "schedule" },
  645 + { "perf", "run_command" },
  646 + { "perf", "main" }, },
  647 + },
  648 + {
641 649 4, { { "libc", "free" },
642 650 { "perf", "cmd_record" },
643 651 { "perf", "run_command" },
... ... @@ -648,14 +656,6 @@
648 656 { "perf", "cmd_record" },
649 657 { "perf", "run_command" },
650 658 { "perf", "main" }, },
651   - },
652   - {
653   - 6, { { "bash", "xmalloc" },
654   - { "libc", "malloc" },
655   - { "bash", "xmalloc" },
656   - { "libc", "malloc" },
657   - { "bash", "xmalloc" },
658   - { "bash", "main" }, },
659 659 },
660 660 };
661 661  
tools/perf/tests/hists_filter.c
... ... @@ -138,7 +138,7 @@
138 138 struct hists *hists = evsel__hists(evsel);
139 139  
140 140 hists__collapse_resort(hists, NULL);
141   - hists__output_resort(hists);
  141 + hists__output_resort(hists, NULL);
142 142  
143 143 if (verbose > 2) {
144 144 pr_info("Normal histogram\n");
tools/perf/tests/hists_output.c
... ... @@ -152,7 +152,7 @@
152 152 goto out;
153 153  
154 154 hists__collapse_resort(hists, NULL);
155   - hists__output_resort(hists);
  155 + hists__output_resort(hists, NULL);
156 156  
157 157 if (verbose > 2) {
158 158 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
... ... @@ -252,7 +252,7 @@
252 252 goto out;
253 253  
254 254 hists__collapse_resort(hists, NULL);
255   - hists__output_resort(hists);
  255 + hists__output_resort(hists, NULL);
256 256  
257 257 if (verbose > 2) {
258 258 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
... ... @@ -306,7 +306,7 @@
306 306 goto out;
307 307  
308 308 hists__collapse_resort(hists, NULL);
309   - hists__output_resort(hists);
  309 + hists__output_resort(hists, NULL);
310 310  
311 311 if (verbose > 2) {
312 312 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
... ... @@ -384,7 +384,7 @@
384 384 goto out;
385 385  
386 386 hists__collapse_resort(hists, NULL);
387   - hists__output_resort(hists);
  387 + hists__output_resort(hists, NULL);
388 388  
389 389 if (verbose > 2) {
390 390 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
... ... @@ -487,7 +487,7 @@
487 487 goto out;
488 488  
489 489 hists__collapse_resort(hists, NULL);
490   - hists__output_resort(hists);
  490 + hists__output_resort(hists, NULL);
491 491  
492 492 if (verbose > 2) {
493 493 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
tools/perf/ui/browsers/hists.c
... ... @@ -550,7 +550,7 @@
550 550 bool need_percent;
551 551  
552 552 node = rb_first(root);
553   - need_percent = !!rb_next(node);
  553 + need_percent = node && rb_next(node);
554 554  
555 555 while (node) {
556 556 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
tools/perf/ui/hist.c
... ... @@ -204,6 +204,9 @@
204 204 if (ret)
205 205 return ret;
206 206  
  207 + if (a->thread != b->thread || !symbol_conf.use_callchain)
  208 + return 0;
  209 +
207 210 ret = b->callchain->max_depth - a->callchain->max_depth;
208 211 }
209 212 return ret;
tools/perf/ui/tui/setup.c
1 1 #include <signal.h>
2 2 #include <stdbool.h>
  3 +#ifdef HAVE_BACKTRACE_SUPPORT
  4 +#include <execinfo.h>
  5 +#endif
3 6  
4 7 #include "../../util/cache.h"
5 8 #include "../../util/debug.h"
... ... @@ -88,6 +91,25 @@
88 91 return SLkp_getkey();
89 92 }
90 93  
  94 +#ifdef HAVE_BACKTRACE_SUPPORT
  95 +static void ui__signal_backtrace(int sig)
  96 +{
  97 + void *stackdump[32];
  98 + size_t size;
  99 +
  100 + ui__exit(false);
  101 + psignal(sig, "perf");
  102 +
  103 + printf("-------- backtrace --------\n");
  104 + size = backtrace(stackdump, ARRAY_SIZE(stackdump));
  105 + backtrace_symbols_fd(stackdump, size, STDOUT_FILENO);
  106 +
  107 + exit(0);
  108 +}
  109 +#else
  110 +# define ui__signal_backtrace ui__signal
  111 +#endif
  112 +
91 113 static void ui__signal(int sig)
92 114 {
93 115 ui__exit(false);
... ... @@ -122,8 +144,8 @@
122 144 ui_browser__init();
123 145 tui_progress__init();
124 146  
125   - signal(SIGSEGV, ui__signal);
126   - signal(SIGFPE, ui__signal);
  147 + signal(SIGSEGV, ui__signal_backtrace);
  148 + signal(SIGFPE, ui__signal_backtrace);
127 149 signal(SIGINT, ui__signal);
128 150 signal(SIGQUIT, ui__signal);
129 151 signal(SIGTERM, ui__signal);
tools/perf/util/callchain.c
... ... @@ -841,4 +841,34 @@
841 841  
842 842 return bf;
843 843 }
  844 +
  845 +static void free_callchain_node(struct callchain_node *node)
  846 +{
  847 + struct callchain_list *list, *tmp;
  848 + struct callchain_node *child;
  849 + struct rb_node *n;
  850 +
  851 + list_for_each_entry_safe(list, tmp, &node->val, list) {
  852 + list_del(&list->list);
  853 + free(list);
  854 + }
  855 +
  856 + n = rb_first(&node->rb_root_in);
  857 + while (n) {
  858 + child = container_of(n, struct callchain_node, rb_node_in);
  859 + n = rb_next(n);
  860 + rb_erase(&child->rb_node_in, &node->rb_root_in);
  861 +
  862 + free_callchain_node(child);
  863 + free(child);
  864 + }
  865 +}
  866 +
  867 +void free_callchain(struct callchain_root *root)
  868 +{
  869 + if (!symbol_conf.use_callchain)
  870 + return;
  871 +
  872 + free_callchain_node(&root->node);
  873 +}
tools/perf/util/callchain.h
... ... @@ -198,5 +198,7 @@
198 198 char *callchain_list__sym_name(struct callchain_list *cl,
199 199 char *bf, size_t bfsize, bool show_dso);
200 200  
  201 +void free_callchain(struct callchain_root *root);
  202 +
201 203 #endif /* __PERF_CALLCHAIN_H */
tools/perf/util/hist.c
... ... @@ -6,6 +6,7 @@
6 6 #include "evlist.h"
7 7 #include "evsel.h"
8 8 #include "annotate.h"
  9 +#include "ui/progress.h"
9 10 #include <math.h>
10 11  
11 12 static bool hists__filter_entry_by_dso(struct hists *hists,
... ... @@ -303,7 +304,7 @@
303 304 size_t callchain_size = 0;
304 305 struct hist_entry *he;
305 306  
306   - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain)
  307 + if (symbol_conf.use_callchain)
307 308 callchain_size = sizeof(struct callchain_root);
308 309  
309 310 he = zalloc(sizeof(*he) + callchain_size);
... ... @@ -736,7 +737,7 @@
736 737 iter->he = he;
737 738 he_cache[iter->curr++] = he;
738 739  
739   - callchain_append(he->callchain, &callchain_cursor, sample->period);
  740 + hist_entry__append_callchain(he, sample);
740 741  
741 742 /*
742 743 * We need to re-initialize the cursor since callchain_append()
... ... @@ -809,7 +810,8 @@
809 810 iter->he = he;
810 811 he_cache[iter->curr++] = he;
811 812  
812   - callchain_append(he->callchain, &cursor, sample->period);
  813 + if (symbol_conf.use_callchain)
  814 + callchain_append(he->callchain, &cursor, sample->period);
813 815 return 0;
814 816 }
815 817  
... ... @@ -945,6 +947,7 @@
945 947 zfree(&he->mem_info);
946 948 zfree(&he->stat_acc);
947 949 free_srcline(he->srcline);
  950 + free_callchain(he->callchain);
948 951 free(he);
949 952 }
950 953  
... ... @@ -987,6 +990,7 @@
987 990 else
988 991 p = &(*p)->rb_right;
989 992 }
  993 + hists->nr_entries++;
990 994  
991 995 rb_link_node(&he->rb_node_in, parent, p);
992 996 rb_insert_color(&he->rb_node_in, root);
993 997  
... ... @@ -1024,7 +1028,10 @@
1024 1028 if (!sort__need_collapse)
1025 1029 return;
1026 1030  
  1031 + hists->nr_entries = 0;
  1032 +
1027 1033 root = hists__get_rotate_entries_in(hists);
  1034 +
1028 1035 next = rb_first(root);
1029 1036  
1030 1037 while (next) {
... ... @@ -1119,7 +1126,7 @@
1119 1126 rb_insert_color(&he->rb_node, entries);
1120 1127 }
1121 1128  
1122   -void hists__output_resort(struct hists *hists)
  1129 +void hists__output_resort(struct hists *hists, struct ui_progress *prog)
1123 1130 {
1124 1131 struct rb_root *root;
1125 1132 struct rb_node *next;
... ... @@ -1148,6 +1155,9 @@
1148 1155  
1149 1156 if (!n->filtered)
1150 1157 hists__calc_col_len(hists, n);
  1158 +
  1159 + if (prog)
  1160 + ui_progress__update(prog, 1);
1151 1161 }
1152 1162 }
1153 1163  
tools/perf/util/hist.h
... ... @@ -121,7 +121,7 @@
121 121 struct hists *hists);
122 122 void hist_entry__free(struct hist_entry *);
123 123  
124   -void hists__output_resort(struct hists *hists);
  124 +void hists__output_resort(struct hists *hists, struct ui_progress *prog);
125 125 void hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
126 126  
127 127 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
tools/perf/util/probe-event.c
... ... @@ -495,9 +495,11 @@
495 495 }
496 496  
497 497 if (ntevs == 0) { /* No error but failed to find probe point. */
498   - pr_warning("Probe point '%s' not found.\n",
  498 + pr_warning("Probe point '%s' not found in debuginfo.\n",
499 499 synthesize_perf_probe_point(&pev->point));
500   - return -ENOENT;
  500 + if (need_dwarf)
  501 + return -ENOENT;
  502 + return 0;
501 503 }
502 504 /* Error path : ntevs < 0 */
503 505 pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
tools/perf/util/probe-finder.c
... ... @@ -989,8 +989,24 @@
989 989 int ret = 0;
990 990  
991 991 #if _ELFUTILS_PREREQ(0, 142)
  992 + Elf *elf;
  993 + GElf_Ehdr ehdr;
  994 + GElf_Shdr shdr;
  995 +
992 996 /* Get the call frame information from this dwarf */
993   - pf->cfi = dwarf_getcfi_elf(dwarf_getelf(dbg->dbg));
  997 + elf = dwarf_getelf(dbg->dbg);
  998 + if (elf == NULL)
  999 + return -EINVAL;
  1000 +
  1001 + if (gelf_getehdr(elf, &ehdr) == NULL)
  1002 + return -EINVAL;
  1003 +
  1004 + if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
  1005 + shdr.sh_type == SHT_PROGBITS) {
  1006 + pf->cfi = dwarf_getcfi_elf(elf);
  1007 + } else {
  1008 + pf->cfi = dwarf_getcfi(dbg->dbg);
  1009 + }
994 1010 #endif
995 1011  
996 1012 off = 0;