Commit ddb321a8dd158520d97ed1cbade1d4ac36b6af31
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "Mostly tooling fixes, but also some kernel side fixes: uncore PMU driver fix, user regs sampling fix and an instruction decoder fix that unbreaks PEBS precise sampling" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes perf/x86_64: Improve user regs sampling perf: Move task_pt_regs sampling into arch code x86: Fix off-by-one in instruction decoder perf hists browser: Fix segfault when showing callchain perf callchain: Free callchains when hist entries are deleted perf hists: Fix children sort key behavior perf diff: Fix to sort by baseline field by default perf list: Fix --raw-dump option perf probe: Fix crash in dwarf_getcfi_elf perf probe: Fix to fall back to find probe point in symbols perf callchain: Append callchains only when requested perf ui/tui: Print backtrace symbols when segfault occurs perf report: Show progress bar for output resorting
Showing 26 changed files Side-by-side Diff
- arch/arm/kernel/perf_regs.c
- arch/arm64/kernel/perf_regs.c
- arch/x86/kernel/cpu/perf_event_intel_uncore.h
- arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
- arch/x86/kernel/perf_regs.c
- arch/x86/lib/insn.c
- include/linux/perf_event.h
- include/linux/perf_regs.h
- kernel/events/core.c
- tools/perf/builtin-annotate.c
- tools/perf/builtin-diff.c
- tools/perf/builtin-list.c
- tools/perf/builtin-report.c
- tools/perf/builtin-top.c
- tools/perf/tests/hists_cumulate.c
- tools/perf/tests/hists_filter.c
- tools/perf/tests/hists_output.c
- tools/perf/ui/browsers/hists.c
- tools/perf/ui/hist.c
- tools/perf/ui/tui/setup.c
- tools/perf/util/callchain.c
- tools/perf/util/callchain.h
- tools/perf/util/hist.c
- tools/perf/util/hist.h
- tools/perf/util/probe-event.c
- tools/perf/util/probe-finder.c
arch/arm/kernel/perf_regs.c
... | ... | @@ -28,4 +28,12 @@ |
28 | 28 | { |
29 | 29 | return PERF_SAMPLE_REGS_ABI_32; |
30 | 30 | } |
31 | + | |
32 | +void perf_get_regs_user(struct perf_regs *regs_user, | |
33 | + struct pt_regs *regs, | |
34 | + struct pt_regs *regs_user_copy) | |
35 | +{ | |
36 | + regs_user->regs = task_pt_regs(current); | |
37 | + regs_user->abi = perf_reg_abi(current); | |
38 | +} |
arch/arm64/kernel/perf_regs.c
... | ... | @@ -50,4 +50,12 @@ |
50 | 50 | else |
51 | 51 | return PERF_SAMPLE_REGS_ABI_64; |
52 | 52 | } |
53 | + | |
54 | +void perf_get_regs_user(struct perf_regs *regs_user, | |
55 | + struct pt_regs *regs, | |
56 | + struct pt_regs *regs_user_copy) | |
57 | +{ | |
58 | + regs_user->regs = task_pt_regs(current); | |
59 | + regs_user->abi = perf_reg_abi(current); | |
60 | +} |
arch/x86/kernel/cpu/perf_event_intel_uncore.h
... | ... | @@ -17,7 +17,7 @@ |
17 | 17 | #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) |
18 | 18 | #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) |
19 | 19 | #define UNCORE_EXTRA_PCI_DEV 0xff |
20 | -#define UNCORE_EXTRA_PCI_DEV_MAX 2 | |
20 | +#define UNCORE_EXTRA_PCI_DEV_MAX 3 | |
21 | 21 | |
22 | 22 | /* support up to 8 sockets */ |
23 | 23 | #define UNCORE_SOCKET_MAX 8 |
arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
... | ... | @@ -891,6 +891,7 @@ |
891 | 891 | enum { |
892 | 892 | SNBEP_PCI_QPI_PORT0_FILTER, |
893 | 893 | SNBEP_PCI_QPI_PORT1_FILTER, |
894 | + HSWEP_PCI_PCU_3, | |
894 | 895 | }; |
895 | 896 | |
896 | 897 | static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) |
... | ... | @@ -2026,6 +2027,17 @@ |
2026 | 2027 | { |
2027 | 2028 | if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) |
2028 | 2029 | hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; |
2030 | + | |
2031 | + /* Detect 6-8 core systems with only two SBOXes */ | |
2032 | + if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) { | |
2033 | + u32 capid4; | |
2034 | + | |
2035 | + pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3], | |
2036 | + 0x94, &capid4); | |
2037 | + if (((capid4 >> 6) & 0x3) == 0) | |
2038 | + hswep_uncore_sbox.num_boxes = 2; | |
2039 | + } | |
2040 | + | |
2029 | 2041 | uncore_msr_uncores = hswep_msr_uncores; |
2030 | 2042 | } |
2031 | 2043 | |
... | ... | @@ -2286,6 +2298,11 @@ |
2286 | 2298 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96), |
2287 | 2299 | .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, |
2288 | 2300 | SNBEP_PCI_QPI_PORT1_FILTER), |
2301 | + }, | |
2302 | + { /* PCU.3 (for Capability registers) */ | |
2303 | + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0), | |
2304 | + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, | |
2305 | + HSWEP_PCI_PCU_3), | |
2289 | 2306 | }, |
2290 | 2307 | { /* end: all zeroes */ } |
2291 | 2308 | }; |
arch/x86/kernel/perf_regs.c
... | ... | @@ -78,6 +78,14 @@ |
78 | 78 | { |
79 | 79 | return PERF_SAMPLE_REGS_ABI_32; |
80 | 80 | } |
81 | + | |
82 | +void perf_get_regs_user(struct perf_regs *regs_user, | |
83 | + struct pt_regs *regs, | |
84 | + struct pt_regs *regs_user_copy) | |
85 | +{ | |
86 | + regs_user->regs = task_pt_regs(current); | |
87 | + regs_user->abi = perf_reg_abi(current); | |
88 | +} | |
81 | 89 | #else /* CONFIG_X86_64 */ |
82 | 90 | #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ |
83 | 91 | (1ULL << PERF_REG_X86_ES) | \ |
... | ... | @@ -101,6 +109,88 @@ |
101 | 109 | return PERF_SAMPLE_REGS_ABI_32; |
102 | 110 | else |
103 | 111 | return PERF_SAMPLE_REGS_ABI_64; |
112 | +} | |
113 | + | |
114 | +void perf_get_regs_user(struct perf_regs *regs_user, | |
115 | + struct pt_regs *regs, | |
116 | + struct pt_regs *regs_user_copy) | |
117 | +{ | |
118 | + struct pt_regs *user_regs = task_pt_regs(current); | |
119 | + | |
120 | + /* | |
121 | + * If we're in an NMI that interrupted task_pt_regs setup, then | |
122 | + * we can't sample user regs at all. This check isn't really | |
123 | + * sufficient, though, as we could be in an NMI inside an interrupt | |
124 | + * that happened during task_pt_regs setup. | |
125 | + */ | |
126 | + if (regs->sp > (unsigned long)&user_regs->r11 && | |
127 | + regs->sp <= (unsigned long)(user_regs + 1)) { | |
128 | + regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; | |
129 | + regs_user->regs = NULL; | |
130 | + return; | |
131 | + } | |
132 | + | |
133 | + /* | |
134 | + * RIP, flags, and the argument registers are usually saved. | |
135 | + * orig_ax is probably okay, too. | |
136 | + */ | |
137 | + regs_user_copy->ip = user_regs->ip; | |
138 | + regs_user_copy->cx = user_regs->cx; | |
139 | + regs_user_copy->dx = user_regs->dx; | |
140 | + regs_user_copy->si = user_regs->si; | |
141 | + regs_user_copy->di = user_regs->di; | |
142 | + regs_user_copy->r8 = user_regs->r8; | |
143 | + regs_user_copy->r9 = user_regs->r9; | |
144 | + regs_user_copy->r10 = user_regs->r10; | |
145 | + regs_user_copy->r11 = user_regs->r11; | |
146 | + regs_user_copy->orig_ax = user_regs->orig_ax; | |
147 | + regs_user_copy->flags = user_regs->flags; | |
148 | + | |
149 | + /* | |
150 | + * Don't even try to report the "rest" regs. | |
151 | + */ | |
152 | + regs_user_copy->bx = -1; | |
153 | + regs_user_copy->bp = -1; | |
154 | + regs_user_copy->r12 = -1; | |
155 | + regs_user_copy->r13 = -1; | |
156 | + regs_user_copy->r14 = -1; | |
157 | + regs_user_copy->r15 = -1; | |
158 | + | |
159 | + /* | |
160 | + * For this to be at all useful, we need a reasonable guess for | |
161 | + * sp and the ABI. Be careful: we're in NMI context, and we're | |
162 | + * considering current to be the current task, so we should | |
163 | + * be careful not to look at any other percpu variables that might | |
164 | + * change during context switches. | |
165 | + */ | |
166 | + if (IS_ENABLED(CONFIG_IA32_EMULATION) && | |
167 | + task_thread_info(current)->status & TS_COMPAT) { | |
168 | + /* Easy case: we're in a compat syscall. */ | |
169 | + regs_user->abi = PERF_SAMPLE_REGS_ABI_32; | |
170 | + regs_user_copy->sp = user_regs->sp; | |
171 | + regs_user_copy->cs = user_regs->cs; | |
172 | + regs_user_copy->ss = user_regs->ss; | |
173 | + } else if (user_regs->orig_ax != -1) { | |
174 | + /* | |
175 | + * We're probably in a 64-bit syscall. | |
176 | + * Warning: this code is severely racy. At least it's better | |
177 | + * than just blindly copying user_regs. | |
178 | + */ | |
179 | + regs_user->abi = PERF_SAMPLE_REGS_ABI_64; | |
180 | + regs_user_copy->sp = this_cpu_read(old_rsp); | |
181 | + regs_user_copy->cs = __USER_CS; | |
182 | + regs_user_copy->ss = __USER_DS; | |
183 | + regs_user_copy->cx = -1; /* usually contains garbage */ | |
184 | + } else { | |
185 | + /* We're probably in an interrupt or exception. */ | |
186 | + regs_user->abi = user_64bit_mode(user_regs) ? | |
187 | + PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32; | |
188 | + regs_user_copy->sp = user_regs->sp; | |
189 | + regs_user_copy->cs = user_regs->cs; | |
190 | + regs_user_copy->ss = user_regs->ss; | |
191 | + } | |
192 | + | |
193 | + regs_user->regs = regs_user_copy; | |
104 | 194 | } |
105 | 195 | #endif /* CONFIG_X86_32 */ |
arch/x86/lib/insn.c
... | ... | @@ -28,7 +28,7 @@ |
28 | 28 | |
29 | 29 | /* Verify next sizeof(t) bytes can be on the same instruction */ |
30 | 30 | #define validate_next(t, insn, n) \ |
31 | - ((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr) | |
31 | + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) | |
32 | 32 | |
33 | 33 | #define __get_next(t, insn) \ |
34 | 34 | ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) |
include/linux/perf_event.h
... | ... | @@ -79,11 +79,6 @@ |
79 | 79 | struct perf_branch_entry entries[0]; |
80 | 80 | }; |
81 | 81 | |
82 | -struct perf_regs { | |
83 | - __u64 abi; | |
84 | - struct pt_regs *regs; | |
85 | -}; | |
86 | - | |
87 | 82 | struct task_struct; |
88 | 83 | |
89 | 84 | /* |
90 | 85 | |
... | ... | @@ -610,7 +605,14 @@ |
610 | 605 | u32 reserved; |
611 | 606 | } cpu_entry; |
612 | 607 | struct perf_callchain_entry *callchain; |
608 | + | |
609 | + /* | |
610 | + * regs_user may point to task_pt_regs or to regs_user_copy, depending | |
611 | + * on arch details. | |
612 | + */ | |
613 | 613 | struct perf_regs regs_user; |
614 | + struct pt_regs regs_user_copy; | |
615 | + | |
614 | 616 | struct perf_regs regs_intr; |
615 | 617 | u64 stack_user_size; |
616 | 618 | } ____cacheline_aligned; |
include/linux/perf_regs.h
1 | 1 | #ifndef _LINUX_PERF_REGS_H |
2 | 2 | #define _LINUX_PERF_REGS_H |
3 | 3 | |
4 | +struct perf_regs { | |
5 | + __u64 abi; | |
6 | + struct pt_regs *regs; | |
7 | +}; | |
8 | + | |
4 | 9 | #ifdef CONFIG_HAVE_PERF_REGS |
5 | 10 | #include <asm/perf_regs.h> |
6 | 11 | u64 perf_reg_value(struct pt_regs *regs, int idx); |
7 | 12 | int perf_reg_validate(u64 mask); |
8 | 13 | u64 perf_reg_abi(struct task_struct *task); |
14 | +void perf_get_regs_user(struct perf_regs *regs_user, | |
15 | + struct pt_regs *regs, | |
16 | + struct pt_regs *regs_user_copy); | |
9 | 17 | #else |
10 | 18 | static inline u64 perf_reg_value(struct pt_regs *regs, int idx) |
11 | 19 | { |
... | ... | @@ -20,6 +28,14 @@ |
20 | 28 | static inline u64 perf_reg_abi(struct task_struct *task) |
21 | 29 | { |
22 | 30 | return PERF_SAMPLE_REGS_ABI_NONE; |
31 | +} | |
32 | + | |
33 | +static inline void perf_get_regs_user(struct perf_regs *regs_user, | |
34 | + struct pt_regs *regs, | |
35 | + struct pt_regs *regs_user_copy) | |
36 | +{ | |
37 | + regs_user->regs = task_pt_regs(current); | |
38 | + regs_user->abi = perf_reg_abi(current); | |
23 | 39 | } |
24 | 40 | #endif /* CONFIG_HAVE_PERF_REGS */ |
25 | 41 | #endif /* _LINUX_PERF_REGS_H */ |
kernel/events/core.c
... | ... | @@ -4461,18 +4461,14 @@ |
4461 | 4461 | } |
4462 | 4462 | |
4463 | 4463 | static void perf_sample_regs_user(struct perf_regs *regs_user, |
4464 | - struct pt_regs *regs) | |
4464 | + struct pt_regs *regs, | |
4465 | + struct pt_regs *regs_user_copy) | |
4465 | 4466 | { |
4466 | - if (!user_mode(regs)) { | |
4467 | - if (current->mm) | |
4468 | - regs = task_pt_regs(current); | |
4469 | - else | |
4470 | - regs = NULL; | |
4471 | - } | |
4472 | - | |
4473 | - if (regs) { | |
4474 | - regs_user->abi = perf_reg_abi(current); | |
4467 | + if (user_mode(regs)) { | |
4468 | + regs_user->abi = perf_reg_abi(current); | |
4475 | 4469 | regs_user->regs = regs; |
4470 | + } else if (current->mm) { | |
4471 | + perf_get_regs_user(regs_user, regs, regs_user_copy); | |
4476 | 4472 | } else { |
4477 | 4473 | regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; |
4478 | 4474 | regs_user->regs = NULL; |
... | ... | @@ -4951,7 +4947,8 @@ |
4951 | 4947 | } |
4952 | 4948 | |
4953 | 4949 | if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) |
4954 | - perf_sample_regs_user(&data->regs_user, regs); | |
4950 | + perf_sample_regs_user(&data->regs_user, regs, | |
4951 | + &data->regs_user_copy); | |
4955 | 4952 | |
4956 | 4953 | if (sample_type & PERF_SAMPLE_REGS_USER) { |
4957 | 4954 | /* regs dump ABI info */ |
tools/perf/builtin-annotate.c
... | ... | @@ -232,7 +232,7 @@ |
232 | 232 | if (nr_samples > 0) { |
233 | 233 | total_nr_samples += nr_samples; |
234 | 234 | hists__collapse_resort(hists, NULL); |
235 | - hists__output_resort(hists); | |
235 | + hists__output_resort(hists, NULL); | |
236 | 236 | |
237 | 237 | if (symbol_conf.event_group && |
238 | 238 | !perf_evsel__is_group_leader(pos)) |
tools/perf/builtin-diff.c
... | ... | @@ -545,6 +545,42 @@ |
545 | 545 | return __hist_entry__cmp_compute(p_left, p_right, c); |
546 | 546 | } |
547 | 547 | |
548 | +static int64_t | |
549 | +hist_entry__cmp_nop(struct hist_entry *left __maybe_unused, | |
550 | + struct hist_entry *right __maybe_unused) | |
551 | +{ | |
552 | + return 0; | |
553 | +} | |
554 | + | |
555 | +static int64_t | |
556 | +hist_entry__cmp_baseline(struct hist_entry *left, struct hist_entry *right) | |
557 | +{ | |
558 | + if (sort_compute) | |
559 | + return 0; | |
560 | + | |
561 | + if (left->stat.period == right->stat.period) | |
562 | + return 0; | |
563 | + return left->stat.period > right->stat.period ? 1 : -1; | |
564 | +} | |
565 | + | |
566 | +static int64_t | |
567 | +hist_entry__cmp_delta(struct hist_entry *left, struct hist_entry *right) | |
568 | +{ | |
569 | + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA); | |
570 | +} | |
571 | + | |
572 | +static int64_t | |
573 | +hist_entry__cmp_ratio(struct hist_entry *left, struct hist_entry *right) | |
574 | +{ | |
575 | + return hist_entry__cmp_compute(right, left, COMPUTE_RATIO); | |
576 | +} | |
577 | + | |
578 | +static int64_t | |
579 | +hist_entry__cmp_wdiff(struct hist_entry *left, struct hist_entry *right) | |
580 | +{ | |
581 | + return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF); | |
582 | +} | |
583 | + | |
548 | 584 | static void insert_hist_entry_by_compute(struct rb_root *root, |
549 | 585 | struct hist_entry *he, |
550 | 586 | int c) |
... | ... | @@ -605,7 +641,7 @@ |
605 | 641 | hists__precompute(hists); |
606 | 642 | hists__compute_resort(hists); |
607 | 643 | } else { |
608 | - hists__output_resort(hists); | |
644 | + hists__output_resort(hists, NULL); | |
609 | 645 | } |
610 | 646 | |
611 | 647 | hists__fprintf(hists, true, 0, 0, 0, stdout); |
612 | 648 | |
613 | 649 | |
614 | 650 | |
615 | 651 | |
616 | 652 | |
617 | 653 | |
... | ... | @@ -1038,27 +1074,35 @@ |
1038 | 1074 | fmt->header = hpp__header; |
1039 | 1075 | fmt->width = hpp__width; |
1040 | 1076 | fmt->entry = hpp__entry_global; |
1077 | + fmt->cmp = hist_entry__cmp_nop; | |
1078 | + fmt->collapse = hist_entry__cmp_nop; | |
1041 | 1079 | |
1042 | 1080 | /* TODO more colors */ |
1043 | 1081 | switch (idx) { |
1044 | 1082 | case PERF_HPP_DIFF__BASELINE: |
1045 | 1083 | fmt->color = hpp__color_baseline; |
1084 | + fmt->sort = hist_entry__cmp_baseline; | |
1046 | 1085 | break; |
1047 | 1086 | case PERF_HPP_DIFF__DELTA: |
1048 | 1087 | fmt->color = hpp__color_delta; |
1088 | + fmt->sort = hist_entry__cmp_delta; | |
1049 | 1089 | break; |
1050 | 1090 | case PERF_HPP_DIFF__RATIO: |
1051 | 1091 | fmt->color = hpp__color_ratio; |
1092 | + fmt->sort = hist_entry__cmp_ratio; | |
1052 | 1093 | break; |
1053 | 1094 | case PERF_HPP_DIFF__WEIGHTED_DIFF: |
1054 | 1095 | fmt->color = hpp__color_wdiff; |
1096 | + fmt->sort = hist_entry__cmp_wdiff; | |
1055 | 1097 | break; |
1056 | 1098 | default: |
1099 | + fmt->sort = hist_entry__cmp_nop; | |
1057 | 1100 | break; |
1058 | 1101 | } |
1059 | 1102 | |
1060 | 1103 | init_header(d, dfmt); |
1061 | 1104 | perf_hpp__column_register(fmt); |
1105 | + perf_hpp__register_sort_field(fmt); | |
1062 | 1106 | } |
1063 | 1107 | |
1064 | 1108 | static void ui_init(void) |
tools/perf/builtin-list.c
... | ... | @@ -19,7 +19,9 @@ |
19 | 19 | int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) |
20 | 20 | { |
21 | 21 | int i; |
22 | - const struct option list_options[] = { | |
22 | + bool raw_dump = false; | |
23 | + struct option list_options[] = { | |
24 | + OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), | |
23 | 25 | OPT_END() |
24 | 26 | }; |
25 | 27 | const char * const list_usage[] = { |
26 | 28 | |
... | ... | @@ -27,11 +29,18 @@ |
27 | 29 | NULL |
28 | 30 | }; |
29 | 31 | |
32 | + set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN); | |
33 | + | |
30 | 34 | argc = parse_options(argc, argv, list_options, list_usage, |
31 | 35 | PARSE_OPT_STOP_AT_NON_OPTION); |
32 | 36 | |
33 | 37 | setup_pager(); |
34 | 38 | |
39 | + if (raw_dump) { | |
40 | + print_events(NULL, true); | |
41 | + return 0; | |
42 | + } | |
43 | + | |
35 | 44 | if (argc == 0) { |
36 | 45 | print_events(NULL, false); |
37 | 46 | return 0; |
... | ... | @@ -53,8 +62,6 @@ |
53 | 62 | print_hwcache_events(NULL, false); |
54 | 63 | else if (strcmp(argv[i], "pmu") == 0) |
55 | 64 | print_pmu_events(NULL, false); |
56 | - else if (strcmp(argv[i], "--raw-dump") == 0) | |
57 | - print_events(NULL, true); | |
58 | 65 | else { |
59 | 66 | char *sep = strchr(argv[i], ':'), *s; |
60 | 67 | int sep_idx; |
tools/perf/builtin-report.c
... | ... | @@ -457,6 +457,19 @@ |
457 | 457 | ui_progress__finish(); |
458 | 458 | } |
459 | 459 | |
460 | +static void report__output_resort(struct report *rep) | |
461 | +{ | |
462 | + struct ui_progress prog; | |
463 | + struct perf_evsel *pos; | |
464 | + | |
465 | + ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); | |
466 | + | |
467 | + evlist__for_each(rep->session->evlist, pos) | |
468 | + hists__output_resort(evsel__hists(pos), &prog); | |
469 | + | |
470 | + ui_progress__finish(); | |
471 | +} | |
472 | + | |
460 | 473 | static int __cmd_report(struct report *rep) |
461 | 474 | { |
462 | 475 | int ret; |
463 | 476 | |
... | ... | @@ -505,13 +518,20 @@ |
505 | 518 | if (session_done()) |
506 | 519 | return 0; |
507 | 520 | |
521 | + /* | |
522 | + * recalculate number of entries after collapsing since it | |
523 | + * might be changed during the collapse phase. | |
524 | + */ | |
525 | + rep->nr_entries = 0; | |
526 | + evlist__for_each(session->evlist, pos) | |
527 | + rep->nr_entries += evsel__hists(pos)->nr_entries; | |
528 | + | |
508 | 529 | if (rep->nr_entries == 0) { |
509 | 530 | ui__error("The %s file has no samples!\n", file->path); |
510 | 531 | return 0; |
511 | 532 | } |
512 | 533 | |
513 | - evlist__for_each(session->evlist, pos) | |
514 | - hists__output_resort(evsel__hists(pos)); | |
534 | + report__output_resort(rep); | |
515 | 535 | |
516 | 536 | return report__browse_hists(rep); |
517 | 537 | } |
tools/perf/builtin-top.c
... | ... | @@ -285,7 +285,7 @@ |
285 | 285 | } |
286 | 286 | |
287 | 287 | hists__collapse_resort(hists, NULL); |
288 | - hists__output_resort(hists); | |
288 | + hists__output_resort(hists, NULL); | |
289 | 289 | |
290 | 290 | hists__output_recalc_col_len(hists, top->print_entries - printed); |
291 | 291 | putchar('\n'); |
... | ... | @@ -554,7 +554,7 @@ |
554 | 554 | } |
555 | 555 | |
556 | 556 | hists__collapse_resort(hists, NULL); |
557 | - hists__output_resort(hists); | |
557 | + hists__output_resort(hists, NULL); | |
558 | 558 | } |
559 | 559 | |
560 | 560 | static void *display_thread_tui(void *arg) |
tools/perf/tests/hists_cumulate.c
... | ... | @@ -187,7 +187,7 @@ |
187 | 187 | * function since TEST_ASSERT_VAL() returns in case of failure. |
188 | 188 | */ |
189 | 189 | hists__collapse_resort(hists, NULL); |
190 | - hists__output_resort(hists); | |
190 | + hists__output_resort(hists, NULL); | |
191 | 191 | |
192 | 192 | if (verbose > 2) { |
193 | 193 | pr_info("use callchain: %d, cumulate callchain: %d\n", |
194 | 194 | |
195 | 195 | |
... | ... | @@ -454,12 +454,12 @@ |
454 | 454 | * 30.00% 10.00% perf perf [.] cmd_record |
455 | 455 | * 20.00% 0.00% bash libc [.] malloc |
456 | 456 | * 10.00% 10.00% bash [kernel] [k] page_fault |
457 | - * 10.00% 10.00% perf [kernel] [k] schedule | |
458 | - * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open | |
457 | + * 10.00% 10.00% bash bash [.] xmalloc | |
459 | 458 | * 10.00% 10.00% perf [kernel] [k] page_fault |
460 | - * 10.00% 10.00% perf libc [.] free | |
461 | 459 | * 10.00% 10.00% perf libc [.] malloc |
462 | - * 10.00% 10.00% bash bash [.] xmalloc | |
460 | + * 10.00% 10.00% perf [kernel] [k] schedule | |
461 | + * 10.00% 10.00% perf libc [.] free | |
462 | + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open | |
463 | 463 | */ |
464 | 464 | struct result expected[] = { |
465 | 465 | { 7000, 2000, "perf", "perf", "main" }, |
466 | 466 | |
467 | 467 | |
... | ... | @@ -468,12 +468,12 @@ |
468 | 468 | { 3000, 1000, "perf", "perf", "cmd_record" }, |
469 | 469 | { 2000, 0, "bash", "libc", "malloc" }, |
470 | 470 | { 1000, 1000, "bash", "[kernel]", "page_fault" }, |
471 | - { 1000, 1000, "perf", "[kernel]", "schedule" }, | |
472 | - { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, | |
471 | + { 1000, 1000, "bash", "bash", "xmalloc" }, | |
473 | 472 | { 1000, 1000, "perf", "[kernel]", "page_fault" }, |
473 | + { 1000, 1000, "perf", "[kernel]", "schedule" }, | |
474 | 474 | { 1000, 1000, "perf", "libc", "free" }, |
475 | 475 | { 1000, 1000, "perf", "libc", "malloc" }, |
476 | - { 1000, 1000, "bash", "bash", "xmalloc" }, | |
476 | + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, | |
477 | 477 | }; |
478 | 478 | |
479 | 479 | symbol_conf.use_callchain = false; |
480 | 480 | |
... | ... | @@ -537,10 +537,13 @@ |
537 | 537 | * malloc |
538 | 538 | * main |
539 | 539 | * |
540 | - * 10.00% 10.00% perf [kernel] [k] schedule | |
540 | + * 10.00% 10.00% bash bash [.] xmalloc | |
541 | 541 | * | |
542 | - * --- schedule | |
543 | - * run_command | |
542 | + * --- xmalloc | |
543 | + * malloc | |
544 | + * xmalloc <--- NOTE: there's a cycle | |
545 | + * malloc | |
546 | + * xmalloc | |
544 | 547 | * main |
545 | 548 | * |
546 | 549 | * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open |
... | ... | @@ -556,6 +559,12 @@ |
556 | 559 | * run_command |
557 | 560 | * main |
558 | 561 | * |
562 | + * 10.00% 10.00% perf [kernel] [k] schedule | |
563 | + * | | |
564 | + * --- schedule | |
565 | + * run_command | |
566 | + * main | |
567 | + * | |
559 | 568 | * 10.00% 10.00% perf libc [.] free |
560 | 569 | * | |
561 | 570 | * --- free |
... | ... | @@ -570,15 +579,6 @@ |
570 | 579 | * run_command |
571 | 580 | * main |
572 | 581 | * |
573 | - * 10.00% 10.00% bash bash [.] xmalloc | |
574 | - * | | |
575 | - * --- xmalloc | |
576 | - * malloc | |
577 | - * xmalloc <--- NOTE: there's a cycle | |
578 | - * malloc | |
579 | - * xmalloc | |
580 | - * main | |
581 | - * | |
582 | 582 | */ |
583 | 583 | struct result expected[] = { |
584 | 584 | { 7000, 2000, "perf", "perf", "main" }, |
585 | 585 | |
586 | 586 | |
... | ... | @@ -587,12 +587,12 @@ |
587 | 587 | { 3000, 1000, "perf", "perf", "cmd_record" }, |
588 | 588 | { 2000, 0, "bash", "libc", "malloc" }, |
589 | 589 | { 1000, 1000, "bash", "[kernel]", "page_fault" }, |
590 | - { 1000, 1000, "perf", "[kernel]", "schedule" }, | |
590 | + { 1000, 1000, "bash", "bash", "xmalloc" }, | |
591 | 591 | { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, |
592 | 592 | { 1000, 1000, "perf", "[kernel]", "page_fault" }, |
593 | + { 1000, 1000, "perf", "[kernel]", "schedule" }, | |
593 | 594 | { 1000, 1000, "perf", "libc", "free" }, |
594 | 595 | { 1000, 1000, "perf", "libc", "malloc" }, |
595 | - { 1000, 1000, "bash", "bash", "xmalloc" }, | |
596 | 596 | }; |
597 | 597 | struct callchain_result expected_callchain[] = { |
598 | 598 | { |
... | ... | @@ -622,9 +622,12 @@ |
622 | 622 | { "bash", "main" }, }, |
623 | 623 | }, |
624 | 624 | { |
625 | - 3, { { "[kernel]", "schedule" }, | |
626 | - { "perf", "run_command" }, | |
627 | - { "perf", "main" }, }, | |
625 | + 6, { { "bash", "xmalloc" }, | |
626 | + { "libc", "malloc" }, | |
627 | + { "bash", "xmalloc" }, | |
628 | + { "libc", "malloc" }, | |
629 | + { "bash", "xmalloc" }, | |
630 | + { "bash", "main" }, }, | |
628 | 631 | }, |
629 | 632 | { |
630 | 633 | 3, { { "[kernel]", "sys_perf_event_open" }, |
... | ... | @@ -638,6 +641,11 @@ |
638 | 641 | { "perf", "main" }, }, |
639 | 642 | }, |
640 | 643 | { |
644 | + 3, { { "[kernel]", "schedule" }, | |
645 | + { "perf", "run_command" }, | |
646 | + { "perf", "main" }, }, | |
647 | + }, | |
648 | + { | |
641 | 649 | 4, { { "libc", "free" }, |
642 | 650 | { "perf", "cmd_record" }, |
643 | 651 | { "perf", "run_command" }, |
... | ... | @@ -648,14 +656,6 @@ |
648 | 656 | { "perf", "cmd_record" }, |
649 | 657 | { "perf", "run_command" }, |
650 | 658 | { "perf", "main" }, }, |
651 | - }, | |
652 | - { | |
653 | - 6, { { "bash", "xmalloc" }, | |
654 | - { "libc", "malloc" }, | |
655 | - { "bash", "xmalloc" }, | |
656 | - { "libc", "malloc" }, | |
657 | - { "bash", "xmalloc" }, | |
658 | - { "bash", "main" }, }, | |
659 | 659 | }, |
660 | 660 | }; |
661 | 661 |
tools/perf/tests/hists_filter.c
tools/perf/tests/hists_output.c
... | ... | @@ -152,7 +152,7 @@ |
152 | 152 | goto out; |
153 | 153 | |
154 | 154 | hists__collapse_resort(hists, NULL); |
155 | - hists__output_resort(hists); | |
155 | + hists__output_resort(hists, NULL); | |
156 | 156 | |
157 | 157 | if (verbose > 2) { |
158 | 158 | pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); |
... | ... | @@ -252,7 +252,7 @@ |
252 | 252 | goto out; |
253 | 253 | |
254 | 254 | hists__collapse_resort(hists, NULL); |
255 | - hists__output_resort(hists); | |
255 | + hists__output_resort(hists, NULL); | |
256 | 256 | |
257 | 257 | if (verbose > 2) { |
258 | 258 | pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); |
... | ... | @@ -306,7 +306,7 @@ |
306 | 306 | goto out; |
307 | 307 | |
308 | 308 | hists__collapse_resort(hists, NULL); |
309 | - hists__output_resort(hists); | |
309 | + hists__output_resort(hists, NULL); | |
310 | 310 | |
311 | 311 | if (verbose > 2) { |
312 | 312 | pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); |
... | ... | @@ -384,7 +384,7 @@ |
384 | 384 | goto out; |
385 | 385 | |
386 | 386 | hists__collapse_resort(hists, NULL); |
387 | - hists__output_resort(hists); | |
387 | + hists__output_resort(hists, NULL); | |
388 | 388 | |
389 | 389 | if (verbose > 2) { |
390 | 390 | pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); |
... | ... | @@ -487,7 +487,7 @@ |
487 | 487 | goto out; |
488 | 488 | |
489 | 489 | hists__collapse_resort(hists, NULL); |
490 | - hists__output_resort(hists); | |
490 | + hists__output_resort(hists, NULL); | |
491 | 491 | |
492 | 492 | if (verbose > 2) { |
493 | 493 | pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); |
tools/perf/ui/browsers/hists.c
tools/perf/ui/hist.c
tools/perf/ui/tui/setup.c
1 | 1 | #include <signal.h> |
2 | 2 | #include <stdbool.h> |
3 | +#ifdef HAVE_BACKTRACE_SUPPORT | |
4 | +#include <execinfo.h> | |
5 | +#endif | |
3 | 6 | |
4 | 7 | #include "../../util/cache.h" |
5 | 8 | #include "../../util/debug.h" |
... | ... | @@ -88,6 +91,25 @@ |
88 | 91 | return SLkp_getkey(); |
89 | 92 | } |
90 | 93 | |
94 | +#ifdef HAVE_BACKTRACE_SUPPORT | |
95 | +static void ui__signal_backtrace(int sig) | |
96 | +{ | |
97 | + void *stackdump[32]; | |
98 | + size_t size; | |
99 | + | |
100 | + ui__exit(false); | |
101 | + psignal(sig, "perf"); | |
102 | + | |
103 | + printf("-------- backtrace --------\n"); | |
104 | + size = backtrace(stackdump, ARRAY_SIZE(stackdump)); | |
105 | + backtrace_symbols_fd(stackdump, size, STDOUT_FILENO); | |
106 | + | |
107 | + exit(0); | |
108 | +} | |
109 | +#else | |
110 | +# define ui__signal_backtrace ui__signal | |
111 | +#endif | |
112 | + | |
91 | 113 | static void ui__signal(int sig) |
92 | 114 | { |
93 | 115 | ui__exit(false); |
... | ... | @@ -122,8 +144,8 @@ |
122 | 144 | ui_browser__init(); |
123 | 145 | tui_progress__init(); |
124 | 146 | |
125 | - signal(SIGSEGV, ui__signal); | |
126 | - signal(SIGFPE, ui__signal); | |
147 | + signal(SIGSEGV, ui__signal_backtrace); | |
148 | + signal(SIGFPE, ui__signal_backtrace); | |
127 | 149 | signal(SIGINT, ui__signal); |
128 | 150 | signal(SIGQUIT, ui__signal); |
129 | 151 | signal(SIGTERM, ui__signal); |
tools/perf/util/callchain.c
... | ... | @@ -841,4 +841,34 @@ |
841 | 841 | |
842 | 842 | return bf; |
843 | 843 | } |
844 | + | |
845 | +static void free_callchain_node(struct callchain_node *node) | |
846 | +{ | |
847 | + struct callchain_list *list, *tmp; | |
848 | + struct callchain_node *child; | |
849 | + struct rb_node *n; | |
850 | + | |
851 | + list_for_each_entry_safe(list, tmp, &node->val, list) { | |
852 | + list_del(&list->list); | |
853 | + free(list); | |
854 | + } | |
855 | + | |
856 | + n = rb_first(&node->rb_root_in); | |
857 | + while (n) { | |
858 | + child = container_of(n, struct callchain_node, rb_node_in); | |
859 | + n = rb_next(n); | |
860 | + rb_erase(&child->rb_node_in, &node->rb_root_in); | |
861 | + | |
862 | + free_callchain_node(child); | |
863 | + free(child); | |
864 | + } | |
865 | +} | |
866 | + | |
867 | +void free_callchain(struct callchain_root *root) | |
868 | +{ | |
869 | + if (!symbol_conf.use_callchain) | |
870 | + return; | |
871 | + | |
872 | + free_callchain_node(&root->node); | |
873 | +} |
tools/perf/util/callchain.h
tools/perf/util/hist.c
... | ... | @@ -6,6 +6,7 @@ |
6 | 6 | #include "evlist.h" |
7 | 7 | #include "evsel.h" |
8 | 8 | #include "annotate.h" |
9 | +#include "ui/progress.h" | |
9 | 10 | #include <math.h> |
10 | 11 | |
11 | 12 | static bool hists__filter_entry_by_dso(struct hists *hists, |
... | ... | @@ -303,7 +304,7 @@ |
303 | 304 | size_t callchain_size = 0; |
304 | 305 | struct hist_entry *he; |
305 | 306 | |
306 | - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) | |
307 | + if (symbol_conf.use_callchain) | |
307 | 308 | callchain_size = sizeof(struct callchain_root); |
308 | 309 | |
309 | 310 | he = zalloc(sizeof(*he) + callchain_size); |
... | ... | @@ -736,7 +737,7 @@ |
736 | 737 | iter->he = he; |
737 | 738 | he_cache[iter->curr++] = he; |
738 | 739 | |
739 | - callchain_append(he->callchain, &callchain_cursor, sample->period); | |
740 | + hist_entry__append_callchain(he, sample); | |
740 | 741 | |
741 | 742 | /* |
742 | 743 | * We need to re-initialize the cursor since callchain_append() |
... | ... | @@ -809,7 +810,8 @@ |
809 | 810 | iter->he = he; |
810 | 811 | he_cache[iter->curr++] = he; |
811 | 812 | |
812 | - callchain_append(he->callchain, &cursor, sample->period); | |
813 | + if (symbol_conf.use_callchain) | |
814 | + callchain_append(he->callchain, &cursor, sample->period); | |
813 | 815 | return 0; |
814 | 816 | } |
815 | 817 | |
... | ... | @@ -945,6 +947,7 @@ |
945 | 947 | zfree(&he->mem_info); |
946 | 948 | zfree(&he->stat_acc); |
947 | 949 | free_srcline(he->srcline); |
950 | + free_callchain(he->callchain); | |
948 | 951 | free(he); |
949 | 952 | } |
950 | 953 | |
... | ... | @@ -987,6 +990,7 @@ |
987 | 990 | else |
988 | 991 | p = &(*p)->rb_right; |
989 | 992 | } |
993 | + hists->nr_entries++; | |
990 | 994 | |
991 | 995 | rb_link_node(&he->rb_node_in, parent, p); |
992 | 996 | rb_insert_color(&he->rb_node_in, root); |
993 | 997 | |
... | ... | @@ -1024,7 +1028,10 @@ |
1024 | 1028 | if (!sort__need_collapse) |
1025 | 1029 | return; |
1026 | 1030 | |
1031 | + hists->nr_entries = 0; | |
1032 | + | |
1027 | 1033 | root = hists__get_rotate_entries_in(hists); |
1034 | + | |
1028 | 1035 | next = rb_first(root); |
1029 | 1036 | |
1030 | 1037 | while (next) { |
... | ... | @@ -1119,7 +1126,7 @@ |
1119 | 1126 | rb_insert_color(&he->rb_node, entries); |
1120 | 1127 | } |
1121 | 1128 | |
1122 | -void hists__output_resort(struct hists *hists) | |
1129 | +void hists__output_resort(struct hists *hists, struct ui_progress *prog) | |
1123 | 1130 | { |
1124 | 1131 | struct rb_root *root; |
1125 | 1132 | struct rb_node *next; |
... | ... | @@ -1148,6 +1155,9 @@ |
1148 | 1155 | |
1149 | 1156 | if (!n->filtered) |
1150 | 1157 | hists__calc_col_len(hists, n); |
1158 | + | |
1159 | + if (prog) | |
1160 | + ui_progress__update(prog, 1); | |
1151 | 1161 | } |
1152 | 1162 | } |
1153 | 1163 |
tools/perf/util/hist.h
... | ... | @@ -121,7 +121,7 @@ |
121 | 121 | struct hists *hists); |
122 | 122 | void hist_entry__free(struct hist_entry *); |
123 | 123 | |
124 | -void hists__output_resort(struct hists *hists); | |
124 | +void hists__output_resort(struct hists *hists, struct ui_progress *prog); | |
125 | 125 | void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); |
126 | 126 | |
127 | 127 | void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); |
tools/perf/util/probe-event.c
... | ... | @@ -495,9 +495,11 @@ |
495 | 495 | } |
496 | 496 | |
497 | 497 | if (ntevs == 0) { /* No error but failed to find probe point. */ |
498 | - pr_warning("Probe point '%s' not found.\n", | |
498 | + pr_warning("Probe point '%s' not found in debuginfo.\n", | |
499 | 499 | synthesize_perf_probe_point(&pev->point)); |
500 | - return -ENOENT; | |
500 | + if (need_dwarf) | |
501 | + return -ENOENT; | |
502 | + return 0; | |
501 | 503 | } |
502 | 504 | /* Error path : ntevs < 0 */ |
503 | 505 | pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); |
tools/perf/util/probe-finder.c
... | ... | @@ -989,8 +989,24 @@ |
989 | 989 | int ret = 0; |
990 | 990 | |
991 | 991 | #if _ELFUTILS_PREREQ(0, 142) |
992 | + Elf *elf; | |
993 | + GElf_Ehdr ehdr; | |
994 | + GElf_Shdr shdr; | |
995 | + | |
992 | 996 | /* Get the call frame information from this dwarf */ |
993 | - pf->cfi = dwarf_getcfi_elf(dwarf_getelf(dbg->dbg)); | |
997 | + elf = dwarf_getelf(dbg->dbg); | |
998 | + if (elf == NULL) | |
999 | + return -EINVAL; | |
1000 | + | |
1001 | + if (gelf_getehdr(elf, &ehdr) == NULL) | |
1002 | + return -EINVAL; | |
1003 | + | |
1004 | + if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) && | |
1005 | + shdr.sh_type == SHT_PROGBITS) { | |
1006 | + pf->cfi = dwarf_getcfi_elf(elf); | |
1007 | + } else { | |
1008 | + pf->cfi = dwarf_getcfi(dbg->dbg); | |
1009 | + } | |
994 | 1010 | #endif |
995 | 1011 | |
996 | 1012 | off = 0; |