Commit eac23d1c384b55e4bbb89ea9e5a6bb77fb4d1140

Authored by Ian Munsie
Committed by Arnaldo Carvalho de Melo
1 parent 21ef97f05a

perf record,report,annotate,diff: Process events in order

This patch changes perf report to ask for the ID info on all events be
default if recording from multiple CPUs.

Perf report, annotate and diff will now process the events in order if
the kernel is able to provide timestamps on all events. This ensures
that events such as COMM and MMAP which are necessary to correctly
interpret samples are processed prior to those samples so that they are
attributed correctly.

Before:
 # perf record ./cachetest
 # perf report

 # Events: 6K cycles
 #
 # Overhead  Command      Shared Object                           Symbol
 # ........  .......  .................  ...............................
 #
     74.11%    :3259  [unknown]          [k] 0x4a6c
      1.50%  cachetest  ld-2.11.2.so       [.] 0x1777c
      1.46%    :3259  [kernel.kallsyms]  [k] .perf_event_mmap_ctx
      1.25%    :3259  [kernel.kallsyms]  [k] restore
      0.74%    :3259  [kernel.kallsyms]  [k] ._raw_spin_lock
      0.71%    :3259  [kernel.kallsyms]  [k] .filemap_fault
      0.66%    :3259  [kernel.kallsyms]  [k] .memset
      0.54%  cachetest  [kernel.kallsyms]  [k] .sha_transform
      0.54%    :3259  [kernel.kallsyms]  [k] .copy_4K_page
      0.54%    :3259  [kernel.kallsyms]  [k] .find_get_page
      0.52%    :3259  [kernel.kallsyms]  [k] .trace_hardirqs_off
      0.50%    :3259  [kernel.kallsyms]  [k] .__do_fault
<SNIP>

After:
 # perf report

 # Events: 6K cycles
 #
 # Overhead  Command      Shared Object                           Symbol
 # ........  .......  .................  ...............................
 #
     44.28%  cachetest  cachetest          [.] sumArrayNaive
     22.53%  cachetest  cachetest          [.] sumArrayOptimal
      6.59%  cachetest  ld-2.11.2.so       [.] 0x1777c
      2.13%  cachetest  [unknown]          [k] 0x340
      1.46%  cachetest  [kernel.kallsyms]  [k] .perf_event_mmap_ctx
      1.25%  cachetest  [kernel.kallsyms]  [k] restore
      0.74%  cachetest  [kernel.kallsyms]  [k] ._raw_spin_lock
      0.71%  cachetest  [kernel.kallsyms]  [k] .filemap_fault
      0.66%  cachetest  [kernel.kallsyms]  [k] .memset
      0.54%  cachetest  [kernel.kallsyms]  [k] .copy_4K_page
      0.54%  cachetest  [kernel.kallsyms]  [k] .find_get_page
      0.54%  cachetest  [kernel.kallsyms]  [k] .sha_transform
      0.52%  cachetest  [kernel.kallsyms]  [k] .trace_hardirqs_off
      0.50%  cachetest  [kernel.kallsyms]  [k] .__do_fault
<SNIP>

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <1291872833-839-1-git-send-email-imunsie@au1.ibm.com>
Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Showing 4 changed files with 10 additions and 1 deletions Inline Diff

tools/perf/builtin-annotate.c
1 /* 1 /*
2 * builtin-annotate.c 2 * builtin-annotate.c
3 * 3 *
4 * Builtin annotate command: Analyze the perf.data input file, 4 * Builtin annotate command: Analyze the perf.data input file,
5 * look up and read DSOs and symbol information and display 5 * look up and read DSOs and symbol information and display
6 * a histogram of results, along various sorting keys. 6 * a histogram of results, along various sorting keys.
7 */ 7 */
8 #include "builtin.h" 8 #include "builtin.h"
9 9
10 #include "util/util.h" 10 #include "util/util.h"
11 11
12 #include "util/color.h" 12 #include "util/color.h"
13 #include <linux/list.h> 13 #include <linux/list.h>
14 #include "util/cache.h" 14 #include "util/cache.h"
15 #include <linux/rbtree.h> 15 #include <linux/rbtree.h>
16 #include "util/symbol.h" 16 #include "util/symbol.h"
17 17
18 #include "perf.h" 18 #include "perf.h"
19 #include "util/debug.h" 19 #include "util/debug.h"
20 20
21 #include "util/event.h" 21 #include "util/event.h"
22 #include "util/parse-options.h" 22 #include "util/parse-options.h"
23 #include "util/parse-events.h" 23 #include "util/parse-events.h"
24 #include "util/thread.h" 24 #include "util/thread.h"
25 #include "util/sort.h" 25 #include "util/sort.h"
26 #include "util/hist.h" 26 #include "util/hist.h"
27 #include "util/session.h" 27 #include "util/session.h"
28 28
29 static char const *input_name = "perf.data"; 29 static char const *input_name = "perf.data";
30 30
31 static bool force, use_tui, use_stdio; 31 static bool force, use_tui, use_stdio;
32 32
33 static bool full_paths; 33 static bool full_paths;
34 34
35 static bool print_line; 35 static bool print_line;
36 36
37 static const char *sym_hist_filter; 37 static const char *sym_hist_filter;
38 38
39 static int hists__add_entry(struct hists *self, struct addr_location *al) 39 static int hists__add_entry(struct hists *self, struct addr_location *al)
40 { 40 {
41 struct hist_entry *he; 41 struct hist_entry *he;
42 42
43 if (sym_hist_filter != NULL && 43 if (sym_hist_filter != NULL &&
44 (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) { 44 (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) {
45 /* We're only interested in a symbol named sym_hist_filter */ 45 /* We're only interested in a symbol named sym_hist_filter */
46 if (al->sym != NULL) { 46 if (al->sym != NULL) {
47 rb_erase(&al->sym->rb_node, 47 rb_erase(&al->sym->rb_node,
48 &al->map->dso->symbols[al->map->type]); 48 &al->map->dso->symbols[al->map->type]);
49 symbol__delete(al->sym); 49 symbol__delete(al->sym);
50 } 50 }
51 return 0; 51 return 0;
52 } 52 }
53 53
54 he = __hists__add_entry(self, al, NULL, 1); 54 he = __hists__add_entry(self, al, NULL, 1);
55 if (he == NULL) 55 if (he == NULL)
56 return -ENOMEM; 56 return -ENOMEM;
57 57
58 return hist_entry__inc_addr_samples(he, al->addr); 58 return hist_entry__inc_addr_samples(he, al->addr);
59 } 59 }
60 60
61 static int process_sample_event(event_t *event, struct sample_data *sample, 61 static int process_sample_event(event_t *event, struct sample_data *sample,
62 struct perf_session *session) 62 struct perf_session *session)
63 { 63 {
64 struct addr_location al; 64 struct addr_location al;
65 65
66 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 66 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
67 pr_warning("problem processing %d event, skipping it.\n", 67 pr_warning("problem processing %d event, skipping it.\n",
68 event->header.type); 68 event->header.type);
69 return -1; 69 return -1;
70 } 70 }
71 71
72 if (!al.filtered && hists__add_entry(&session->hists, &al)) { 72 if (!al.filtered && hists__add_entry(&session->hists, &al)) {
73 pr_warning("problem incrementing symbol count, " 73 pr_warning("problem incrementing symbol count, "
74 "skipping event\n"); 74 "skipping event\n");
75 return -1; 75 return -1;
76 } 76 }
77 77
78 return 0; 78 return 0;
79 } 79 }
80 80
81 static int objdump_line__print(struct objdump_line *self, 81 static int objdump_line__print(struct objdump_line *self,
82 struct list_head *head, 82 struct list_head *head,
83 struct hist_entry *he, u64 len) 83 struct hist_entry *he, u64 len)
84 { 84 {
85 struct symbol *sym = he->ms.sym; 85 struct symbol *sym = he->ms.sym;
86 static const char *prev_line; 86 static const char *prev_line;
87 static const char *prev_color; 87 static const char *prev_color;
88 88
89 if (self->offset != -1) { 89 if (self->offset != -1) {
90 const char *path = NULL; 90 const char *path = NULL;
91 unsigned int hits = 0; 91 unsigned int hits = 0;
92 double percent = 0.0; 92 double percent = 0.0;
93 const char *color; 93 const char *color;
94 struct sym_priv *priv = symbol__priv(sym); 94 struct sym_priv *priv = symbol__priv(sym);
95 struct sym_ext *sym_ext = priv->ext; 95 struct sym_ext *sym_ext = priv->ext;
96 struct sym_hist *h = priv->hist; 96 struct sym_hist *h = priv->hist;
97 s64 offset = self->offset; 97 s64 offset = self->offset;
98 struct objdump_line *next = objdump__get_next_ip_line(head, self); 98 struct objdump_line *next = objdump__get_next_ip_line(head, self);
99 99
100 while (offset < (s64)len && 100 while (offset < (s64)len &&
101 (next == NULL || offset < next->offset)) { 101 (next == NULL || offset < next->offset)) {
102 if (sym_ext) { 102 if (sym_ext) {
103 if (path == NULL) 103 if (path == NULL)
104 path = sym_ext[offset].path; 104 path = sym_ext[offset].path;
105 percent += sym_ext[offset].percent; 105 percent += sym_ext[offset].percent;
106 } else 106 } else
107 hits += h->ip[offset]; 107 hits += h->ip[offset];
108 108
109 ++offset; 109 ++offset;
110 } 110 }
111 111
112 if (sym_ext == NULL && h->sum) 112 if (sym_ext == NULL && h->sum)
113 percent = 100.0 * hits / h->sum; 113 percent = 100.0 * hits / h->sum;
114 114
115 color = get_percent_color(percent); 115 color = get_percent_color(percent);
116 116
117 /* 117 /*
118 * Also color the filename and line if needed, with 118 * Also color the filename and line if needed, with
119 * the same color than the percentage. Don't print it 119 * the same color than the percentage. Don't print it
120 * twice for close colored ip with the same filename:line 120 * twice for close colored ip with the same filename:line
121 */ 121 */
122 if (path) { 122 if (path) {
123 if (!prev_line || strcmp(prev_line, path) 123 if (!prev_line || strcmp(prev_line, path)
124 || color != prev_color) { 124 || color != prev_color) {
125 color_fprintf(stdout, color, " %s", path); 125 color_fprintf(stdout, color, " %s", path);
126 prev_line = path; 126 prev_line = path;
127 prev_color = color; 127 prev_color = color;
128 } 128 }
129 } 129 }
130 130
131 color_fprintf(stdout, color, " %7.2f", percent); 131 color_fprintf(stdout, color, " %7.2f", percent);
132 printf(" : "); 132 printf(" : ");
133 color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line); 133 color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line);
134 } else { 134 } else {
135 if (!*self->line) 135 if (!*self->line)
136 printf(" :\n"); 136 printf(" :\n");
137 else 137 else
138 printf(" : %s\n", self->line); 138 printf(" : %s\n", self->line);
139 } 139 }
140 140
141 return 0; 141 return 0;
142 } 142 }
143 143
144 static struct rb_root root_sym_ext; 144 static struct rb_root root_sym_ext;
145 145
146 static void insert_source_line(struct sym_ext *sym_ext) 146 static void insert_source_line(struct sym_ext *sym_ext)
147 { 147 {
148 struct sym_ext *iter; 148 struct sym_ext *iter;
149 struct rb_node **p = &root_sym_ext.rb_node; 149 struct rb_node **p = &root_sym_ext.rb_node;
150 struct rb_node *parent = NULL; 150 struct rb_node *parent = NULL;
151 151
152 while (*p != NULL) { 152 while (*p != NULL) {
153 parent = *p; 153 parent = *p;
154 iter = rb_entry(parent, struct sym_ext, node); 154 iter = rb_entry(parent, struct sym_ext, node);
155 155
156 if (sym_ext->percent > iter->percent) 156 if (sym_ext->percent > iter->percent)
157 p = &(*p)->rb_left; 157 p = &(*p)->rb_left;
158 else 158 else
159 p = &(*p)->rb_right; 159 p = &(*p)->rb_right;
160 } 160 }
161 161
162 rb_link_node(&sym_ext->node, parent, p); 162 rb_link_node(&sym_ext->node, parent, p);
163 rb_insert_color(&sym_ext->node, &root_sym_ext); 163 rb_insert_color(&sym_ext->node, &root_sym_ext);
164 } 164 }
165 165
166 static void free_source_line(struct hist_entry *he, int len) 166 static void free_source_line(struct hist_entry *he, int len)
167 { 167 {
168 struct sym_priv *priv = symbol__priv(he->ms.sym); 168 struct sym_priv *priv = symbol__priv(he->ms.sym);
169 struct sym_ext *sym_ext = priv->ext; 169 struct sym_ext *sym_ext = priv->ext;
170 int i; 170 int i;
171 171
172 if (!sym_ext) 172 if (!sym_ext)
173 return; 173 return;
174 174
175 for (i = 0; i < len; i++) 175 for (i = 0; i < len; i++)
176 free(sym_ext[i].path); 176 free(sym_ext[i].path);
177 free(sym_ext); 177 free(sym_ext);
178 178
179 priv->ext = NULL; 179 priv->ext = NULL;
180 root_sym_ext = RB_ROOT; 180 root_sym_ext = RB_ROOT;
181 } 181 }
182 182
183 /* Get the filename:line for the colored entries */ 183 /* Get the filename:line for the colored entries */
184 static void 184 static void
185 get_source_line(struct hist_entry *he, int len, const char *filename) 185 get_source_line(struct hist_entry *he, int len, const char *filename)
186 { 186 {
187 struct symbol *sym = he->ms.sym; 187 struct symbol *sym = he->ms.sym;
188 u64 start; 188 u64 start;
189 int i; 189 int i;
190 char cmd[PATH_MAX * 2]; 190 char cmd[PATH_MAX * 2];
191 struct sym_ext *sym_ext; 191 struct sym_ext *sym_ext;
192 struct sym_priv *priv = symbol__priv(sym); 192 struct sym_priv *priv = symbol__priv(sym);
193 struct sym_hist *h = priv->hist; 193 struct sym_hist *h = priv->hist;
194 194
195 if (!h->sum) 195 if (!h->sum)
196 return; 196 return;
197 197
198 sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext)); 198 sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext));
199 if (!priv->ext) 199 if (!priv->ext)
200 return; 200 return;
201 201
202 start = he->ms.map->unmap_ip(he->ms.map, sym->start); 202 start = he->ms.map->unmap_ip(he->ms.map, sym->start);
203 203
204 for (i = 0; i < len; i++) { 204 for (i = 0; i < len; i++) {
205 char *path = NULL; 205 char *path = NULL;
206 size_t line_len; 206 size_t line_len;
207 u64 offset; 207 u64 offset;
208 FILE *fp; 208 FILE *fp;
209 209
210 sym_ext[i].percent = 100.0 * h->ip[i] / h->sum; 210 sym_ext[i].percent = 100.0 * h->ip[i] / h->sum;
211 if (sym_ext[i].percent <= 0.5) 211 if (sym_ext[i].percent <= 0.5)
212 continue; 212 continue;
213 213
214 offset = start + i; 214 offset = start + i;
215 sprintf(cmd, "addr2line -e %s %016llx", filename, offset); 215 sprintf(cmd, "addr2line -e %s %016llx", filename, offset);
216 fp = popen(cmd, "r"); 216 fp = popen(cmd, "r");
217 if (!fp) 217 if (!fp)
218 continue; 218 continue;
219 219
220 if (getline(&path, &line_len, fp) < 0 || !line_len) 220 if (getline(&path, &line_len, fp) < 0 || !line_len)
221 goto next; 221 goto next;
222 222
223 sym_ext[i].path = malloc(sizeof(char) * line_len + 1); 223 sym_ext[i].path = malloc(sizeof(char) * line_len + 1);
224 if (!sym_ext[i].path) 224 if (!sym_ext[i].path)
225 goto next; 225 goto next;
226 226
227 strcpy(sym_ext[i].path, path); 227 strcpy(sym_ext[i].path, path);
228 insert_source_line(&sym_ext[i]); 228 insert_source_line(&sym_ext[i]);
229 229
230 next: 230 next:
231 pclose(fp); 231 pclose(fp);
232 } 232 }
233 } 233 }
234 234
235 static void print_summary(const char *filename) 235 static void print_summary(const char *filename)
236 { 236 {
237 struct sym_ext *sym_ext; 237 struct sym_ext *sym_ext;
238 struct rb_node *node; 238 struct rb_node *node;
239 239
240 printf("\nSorted summary for file %s\n", filename); 240 printf("\nSorted summary for file %s\n", filename);
241 printf("----------------------------------------------\n\n"); 241 printf("----------------------------------------------\n\n");
242 242
243 if (RB_EMPTY_ROOT(&root_sym_ext)) { 243 if (RB_EMPTY_ROOT(&root_sym_ext)) {
244 printf(" Nothing higher than %1.1f%%\n", MIN_GREEN); 244 printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
245 return; 245 return;
246 } 246 }
247 247
248 node = rb_first(&root_sym_ext); 248 node = rb_first(&root_sym_ext);
249 while (node) { 249 while (node) {
250 double percent; 250 double percent;
251 const char *color; 251 const char *color;
252 char *path; 252 char *path;
253 253
254 sym_ext = rb_entry(node, struct sym_ext, node); 254 sym_ext = rb_entry(node, struct sym_ext, node);
255 percent = sym_ext->percent; 255 percent = sym_ext->percent;
256 color = get_percent_color(percent); 256 color = get_percent_color(percent);
257 path = sym_ext->path; 257 path = sym_ext->path;
258 258
259 color_fprintf(stdout, color, " %7.2f %s", percent, path); 259 color_fprintf(stdout, color, " %7.2f %s", percent, path);
260 node = rb_next(node); 260 node = rb_next(node);
261 } 261 }
262 } 262 }
263 263
264 static void hist_entry__print_hits(struct hist_entry *self) 264 static void hist_entry__print_hits(struct hist_entry *self)
265 { 265 {
266 struct symbol *sym = self->ms.sym; 266 struct symbol *sym = self->ms.sym;
267 struct sym_priv *priv = symbol__priv(sym); 267 struct sym_priv *priv = symbol__priv(sym);
268 struct sym_hist *h = priv->hist; 268 struct sym_hist *h = priv->hist;
269 u64 len = sym->end - sym->start, offset; 269 u64 len = sym->end - sym->start, offset;
270 270
271 for (offset = 0; offset < len; ++offset) 271 for (offset = 0; offset < len; ++offset)
272 if (h->ip[offset] != 0) 272 if (h->ip[offset] != 0)
273 printf("%*Lx: %Lu\n", BITS_PER_LONG / 2, 273 printf("%*Lx: %Lu\n", BITS_PER_LONG / 2,
274 sym->start + offset, h->ip[offset]); 274 sym->start + offset, h->ip[offset]);
275 printf("%*s: %Lu\n", BITS_PER_LONG / 2, "h->sum", h->sum); 275 printf("%*s: %Lu\n", BITS_PER_LONG / 2, "h->sum", h->sum);
276 } 276 }
277 277
278 static int hist_entry__tty_annotate(struct hist_entry *he) 278 static int hist_entry__tty_annotate(struct hist_entry *he)
279 { 279 {
280 struct map *map = he->ms.map; 280 struct map *map = he->ms.map;
281 struct dso *dso = map->dso; 281 struct dso *dso = map->dso;
282 struct symbol *sym = he->ms.sym; 282 struct symbol *sym = he->ms.sym;
283 const char *filename = dso->long_name, *d_filename; 283 const char *filename = dso->long_name, *d_filename;
284 u64 len; 284 u64 len;
285 LIST_HEAD(head); 285 LIST_HEAD(head);
286 struct objdump_line *pos, *n; 286 struct objdump_line *pos, *n;
287 287
288 if (hist_entry__annotate(he, &head, 0) < 0) 288 if (hist_entry__annotate(he, &head, 0) < 0)
289 return -1; 289 return -1;
290 290
291 if (full_paths) 291 if (full_paths)
292 d_filename = filename; 292 d_filename = filename;
293 else 293 else
294 d_filename = basename(filename); 294 d_filename = basename(filename);
295 295
296 len = sym->end - sym->start; 296 len = sym->end - sym->start;
297 297
298 if (print_line) { 298 if (print_line) {
299 get_source_line(he, len, filename); 299 get_source_line(he, len, filename);
300 print_summary(filename); 300 print_summary(filename);
301 } 301 }
302 302
303 printf("\n\n------------------------------------------------\n"); 303 printf("\n\n------------------------------------------------\n");
304 printf(" Percent | Source code & Disassembly of %s\n", d_filename); 304 printf(" Percent | Source code & Disassembly of %s\n", d_filename);
305 printf("------------------------------------------------\n"); 305 printf("------------------------------------------------\n");
306 306
307 if (verbose) 307 if (verbose)
308 hist_entry__print_hits(he); 308 hist_entry__print_hits(he);
309 309
310 list_for_each_entry_safe(pos, n, &head, node) { 310 list_for_each_entry_safe(pos, n, &head, node) {
311 objdump_line__print(pos, &head, he, len); 311 objdump_line__print(pos, &head, he, len);
312 list_del(&pos->node); 312 list_del(&pos->node);
313 objdump_line__free(pos); 313 objdump_line__free(pos);
314 } 314 }
315 315
316 if (print_line) 316 if (print_line)
317 free_source_line(he, len); 317 free_source_line(he, len);
318 318
319 return 0; 319 return 0;
320 } 320 }
321 321
322 static void hists__find_annotations(struct hists *self) 322 static void hists__find_annotations(struct hists *self)
323 { 323 {
324 struct rb_node *nd = rb_first(&self->entries), *next; 324 struct rb_node *nd = rb_first(&self->entries), *next;
325 int key = KEY_RIGHT; 325 int key = KEY_RIGHT;
326 326
327 while (nd) { 327 while (nd) {
328 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); 328 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
329 struct sym_priv *priv; 329 struct sym_priv *priv;
330 330
331 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned) 331 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
332 goto find_next; 332 goto find_next;
333 333
334 priv = symbol__priv(he->ms.sym); 334 priv = symbol__priv(he->ms.sym);
335 if (priv->hist == NULL) { 335 if (priv->hist == NULL) {
336 find_next: 336 find_next:
337 if (key == KEY_LEFT) 337 if (key == KEY_LEFT)
338 nd = rb_prev(nd); 338 nd = rb_prev(nd);
339 else 339 else
340 nd = rb_next(nd); 340 nd = rb_next(nd);
341 continue; 341 continue;
342 } 342 }
343 343
344 if (use_browser > 0) { 344 if (use_browser > 0) {
345 key = hist_entry__tui_annotate(he); 345 key = hist_entry__tui_annotate(he);
346 switch (key) { 346 switch (key) {
347 case KEY_RIGHT: 347 case KEY_RIGHT:
348 next = rb_next(nd); 348 next = rb_next(nd);
349 break; 349 break;
350 case KEY_LEFT: 350 case KEY_LEFT:
351 next = rb_prev(nd); 351 next = rb_prev(nd);
352 break; 352 break;
353 default: 353 default:
354 return; 354 return;
355 } 355 }
356 356
357 if (next != NULL) 357 if (next != NULL)
358 nd = next; 358 nd = next;
359 } else { 359 } else {
360 hist_entry__tty_annotate(he); 360 hist_entry__tty_annotate(he);
361 nd = rb_next(nd); 361 nd = rb_next(nd);
362 /* 362 /*
363 * Since we have a hist_entry per IP for the same 363 * Since we have a hist_entry per IP for the same
364 * symbol, free he->ms.sym->hist to signal we already 364 * symbol, free he->ms.sym->hist to signal we already
365 * processed this symbol. 365 * processed this symbol.
366 */ 366 */
367 free(priv->hist); 367 free(priv->hist);
368 priv->hist = NULL; 368 priv->hist = NULL;
369 } 369 }
370 } 370 }
371 } 371 }
372 372
373 static struct perf_event_ops event_ops = { 373 static struct perf_event_ops event_ops = {
374 .sample = process_sample_event, 374 .sample = process_sample_event,
375 .mmap = event__process_mmap, 375 .mmap = event__process_mmap,
376 .comm = event__process_comm, 376 .comm = event__process_comm,
377 .fork = event__process_task, 377 .fork = event__process_task,
378 .ordered_samples = true,
379 .ordering_requires_timestamps = true,
378 }; 380 };
379 381
380 static int __cmd_annotate(void) 382 static int __cmd_annotate(void)
381 { 383 {
382 int ret; 384 int ret;
383 struct perf_session *session; 385 struct perf_session *session;
384 386
385 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); 387 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
386 if (session == NULL) 388 if (session == NULL)
387 return -ENOMEM; 389 return -ENOMEM;
388 390
389 ret = perf_session__process_events(session, &event_ops); 391 ret = perf_session__process_events(session, &event_ops);
390 if (ret) 392 if (ret)
391 goto out_delete; 393 goto out_delete;
392 394
393 if (dump_trace) { 395 if (dump_trace) {
394 perf_session__fprintf_nr_events(session, stdout); 396 perf_session__fprintf_nr_events(session, stdout);
395 goto out_delete; 397 goto out_delete;
396 } 398 }
397 399
398 if (verbose > 3) 400 if (verbose > 3)
399 perf_session__fprintf(session, stdout); 401 perf_session__fprintf(session, stdout);
400 402
401 if (verbose > 2) 403 if (verbose > 2)
402 perf_session__fprintf_dsos(session, stdout); 404 perf_session__fprintf_dsos(session, stdout);
403 405
404 hists__collapse_resort(&session->hists); 406 hists__collapse_resort(&session->hists);
405 hists__output_resort(&session->hists); 407 hists__output_resort(&session->hists);
406 hists__find_annotations(&session->hists); 408 hists__find_annotations(&session->hists);
407 out_delete: 409 out_delete:
408 perf_session__delete(session); 410 perf_session__delete(session);
409 411
410 return ret; 412 return ret;
411 } 413 }
412 414
413 static const char * const annotate_usage[] = { 415 static const char * const annotate_usage[] = {
414 "perf annotate [<options>] <command>", 416 "perf annotate [<options>] <command>",
415 NULL 417 NULL
416 }; 418 };
417 419
418 static const struct option options[] = { 420 static const struct option options[] = {
419 OPT_STRING('i', "input", &input_name, "file", 421 OPT_STRING('i', "input", &input_name, "file",
420 "input file name"), 422 "input file name"),
421 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 423 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
422 "only consider symbols in these dsos"), 424 "only consider symbols in these dsos"),
423 OPT_STRING('s', "symbol", &sym_hist_filter, "symbol", 425 OPT_STRING('s', "symbol", &sym_hist_filter, "symbol",
424 "symbol to annotate"), 426 "symbol to annotate"),
425 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 427 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
426 OPT_INCR('v', "verbose", &verbose, 428 OPT_INCR('v', "verbose", &verbose,
427 "be more verbose (show symbol address, etc)"), 429 "be more verbose (show symbol address, etc)"),
428 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 430 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
429 "dump raw trace in ASCII"), 431 "dump raw trace in ASCII"),
430 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), 432 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
431 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), 433 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
432 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 434 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
433 "file", "vmlinux pathname"), 435 "file", "vmlinux pathname"),
434 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 436 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
435 "load module symbols - WARNING: use only with -k and LIVE kernel"), 437 "load module symbols - WARNING: use only with -k and LIVE kernel"),
436 OPT_BOOLEAN('l', "print-line", &print_line, 438 OPT_BOOLEAN('l', "print-line", &print_line,
437 "print matching source lines (may be slow)"), 439 "print matching source lines (may be slow)"),
438 OPT_BOOLEAN('P', "full-paths", &full_paths, 440 OPT_BOOLEAN('P', "full-paths", &full_paths,
439 "Don't shorten the displayed pathnames"), 441 "Don't shorten the displayed pathnames"),
440 OPT_END() 442 OPT_END()
441 }; 443 };
442 444
443 int cmd_annotate(int argc, const char **argv, const char *prefix __used) 445 int cmd_annotate(int argc, const char **argv, const char *prefix __used)
444 { 446 {
445 argc = parse_options(argc, argv, options, annotate_usage, 0); 447 argc = parse_options(argc, argv, options, annotate_usage, 0);
446 448
447 if (use_stdio) 449 if (use_stdio)
448 use_browser = 0; 450 use_browser = 0;
449 else if (use_tui) 451 else if (use_tui)
450 use_browser = 1; 452 use_browser = 1;
451 453
452 setup_browser(); 454 setup_browser();
453 455
454 symbol_conf.priv_size = sizeof(struct sym_priv); 456 symbol_conf.priv_size = sizeof(struct sym_priv);
455 symbol_conf.try_vmlinux_path = true; 457 symbol_conf.try_vmlinux_path = true;
456 458
457 if (symbol__init() < 0) 459 if (symbol__init() < 0)
458 return -1; 460 return -1;
459 461
460 setup_sorting(annotate_usage, options); 462 setup_sorting(annotate_usage, options);
461 463
462 if (argc) { 464 if (argc) {
463 /* 465 /*
464 * Special case: if there's an argument left then assume tha 466 * Special case: if there's an argument left then assume tha
465 * it's a symbol filter: 467 * it's a symbol filter:
466 */ 468 */
467 if (argc > 1) 469 if (argc > 1)
468 usage_with_options(annotate_usage, options); 470 usage_with_options(annotate_usage, options);
469 471
470 sym_hist_filter = argv[0]; 472 sym_hist_filter = argv[0];
471 } 473 }
472 474
473 if (field_sep && *field_sep == '.') { 475 if (field_sep && *field_sep == '.') {
474 pr_err("'.' is the only non valid --field-separator argument\n"); 476 pr_err("'.' is the only non valid --field-separator argument\n");
475 return -1; 477 return -1;
476 } 478 }
477 479
478 return __cmd_annotate(); 480 return __cmd_annotate();
479 } 481 }
480 482
tools/perf/builtin-diff.c
1 /* 1 /*
2 * builtin-diff.c 2 * builtin-diff.c
3 * 3 *
4 * Builtin diff command: Analyze two perf.data input files, look up and read 4 * Builtin diff command: Analyze two perf.data input files, look up and read
5 * DSOs and symbol information, sort them and produce a diff. 5 * DSOs and symbol information, sort them and produce a diff.
6 */ 6 */
7 #include "builtin.h" 7 #include "builtin.h"
8 8
9 #include "util/debug.h" 9 #include "util/debug.h"
10 #include "util/event.h" 10 #include "util/event.h"
11 #include "util/hist.h" 11 #include "util/hist.h"
12 #include "util/session.h" 12 #include "util/session.h"
13 #include "util/sort.h" 13 #include "util/sort.h"
14 #include "util/symbol.h" 14 #include "util/symbol.h"
15 #include "util/util.h" 15 #include "util/util.h"
16 16
17 #include <stdlib.h> 17 #include <stdlib.h>
18 18
19 static char const *input_old = "perf.data.old", 19 static char const *input_old = "perf.data.old",
20 *input_new = "perf.data"; 20 *input_new = "perf.data";
21 static char diff__default_sort_order[] = "dso,symbol"; 21 static char diff__default_sort_order[] = "dso,symbol";
22 static bool force; 22 static bool force;
23 static bool show_displacement; 23 static bool show_displacement;
24 24
25 static int hists__add_entry(struct hists *self, 25 static int hists__add_entry(struct hists *self,
26 struct addr_location *al, u64 period) 26 struct addr_location *al, u64 period)
27 { 27 {
28 if (__hists__add_entry(self, al, NULL, period) != NULL) 28 if (__hists__add_entry(self, al, NULL, period) != NULL)
29 return 0; 29 return 0;
30 return -ENOMEM; 30 return -ENOMEM;
31 } 31 }
32 32
33 static int diff__process_sample_event(event_t *event, 33 static int diff__process_sample_event(event_t *event,
34 struct sample_data *sample, 34 struct sample_data *sample,
35 struct perf_session *session) 35 struct perf_session *session)
36 { 36 {
37 struct addr_location al; 37 struct addr_location al;
38 38
39 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 39 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
40 pr_warning("problem processing %d event, skipping it.\n", 40 pr_warning("problem processing %d event, skipping it.\n",
41 event->header.type); 41 event->header.type);
42 return -1; 42 return -1;
43 } 43 }
44 44
45 if (al.filtered || al.sym == NULL) 45 if (al.filtered || al.sym == NULL)
46 return 0; 46 return 0;
47 47
48 if (hists__add_entry(&session->hists, &al, sample->period)) { 48 if (hists__add_entry(&session->hists, &al, sample->period)) {
49 pr_warning("problem incrementing symbol period, skipping event\n"); 49 pr_warning("problem incrementing symbol period, skipping event\n");
50 return -1; 50 return -1;
51 } 51 }
52 52
53 session->hists.stats.total_period += sample->period; 53 session->hists.stats.total_period += sample->period;
54 return 0; 54 return 0;
55 } 55 }
56 56
57 static struct perf_event_ops event_ops = { 57 static struct perf_event_ops event_ops = {
58 .sample = diff__process_sample_event, 58 .sample = diff__process_sample_event,
59 .mmap = event__process_mmap, 59 .mmap = event__process_mmap,
60 .comm = event__process_comm, 60 .comm = event__process_comm,
61 .exit = event__process_task, 61 .exit = event__process_task,
62 .fork = event__process_task, 62 .fork = event__process_task,
63 .lost = event__process_lost, 63 .lost = event__process_lost,
64 .ordered_samples = true,
65 .ordering_requires_timestamps = true,
64 }; 66 };
65 67
66 static void perf_session__insert_hist_entry_by_name(struct rb_root *root, 68 static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
67 struct hist_entry *he) 69 struct hist_entry *he)
68 { 70 {
69 struct rb_node **p = &root->rb_node; 71 struct rb_node **p = &root->rb_node;
70 struct rb_node *parent = NULL; 72 struct rb_node *parent = NULL;
71 struct hist_entry *iter; 73 struct hist_entry *iter;
72 74
73 while (*p != NULL) { 75 while (*p != NULL) {
74 parent = *p; 76 parent = *p;
75 iter = rb_entry(parent, struct hist_entry, rb_node); 77 iter = rb_entry(parent, struct hist_entry, rb_node);
76 if (hist_entry__cmp(he, iter) < 0) 78 if (hist_entry__cmp(he, iter) < 0)
77 p = &(*p)->rb_left; 79 p = &(*p)->rb_left;
78 else 80 else
79 p = &(*p)->rb_right; 81 p = &(*p)->rb_right;
80 } 82 }
81 83
82 rb_link_node(&he->rb_node, parent, p); 84 rb_link_node(&he->rb_node, parent, p);
83 rb_insert_color(&he->rb_node, root); 85 rb_insert_color(&he->rb_node, root);
84 } 86 }
85 87
86 static void hists__resort_entries(struct hists *self) 88 static void hists__resort_entries(struct hists *self)
87 { 89 {
88 unsigned long position = 1; 90 unsigned long position = 1;
89 struct rb_root tmp = RB_ROOT; 91 struct rb_root tmp = RB_ROOT;
90 struct rb_node *next = rb_first(&self->entries); 92 struct rb_node *next = rb_first(&self->entries);
91 93
92 while (next != NULL) { 94 while (next != NULL) {
93 struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node); 95 struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
94 96
95 next = rb_next(&n->rb_node); 97 next = rb_next(&n->rb_node);
96 rb_erase(&n->rb_node, &self->entries); 98 rb_erase(&n->rb_node, &self->entries);
97 n->position = position++; 99 n->position = position++;
98 perf_session__insert_hist_entry_by_name(&tmp, n); 100 perf_session__insert_hist_entry_by_name(&tmp, n);
99 } 101 }
100 102
101 self->entries = tmp; 103 self->entries = tmp;
102 } 104 }
103 105
104 static void hists__set_positions(struct hists *self) 106 static void hists__set_positions(struct hists *self)
105 { 107 {
106 hists__output_resort(self); 108 hists__output_resort(self);
107 hists__resort_entries(self); 109 hists__resort_entries(self);
108 } 110 }
109 111
110 static struct hist_entry *hists__find_entry(struct hists *self, 112 static struct hist_entry *hists__find_entry(struct hists *self,
111 struct hist_entry *he) 113 struct hist_entry *he)
112 { 114 {
113 struct rb_node *n = self->entries.rb_node; 115 struct rb_node *n = self->entries.rb_node;
114 116
115 while (n) { 117 while (n) {
116 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node); 118 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
117 int64_t cmp = hist_entry__cmp(he, iter); 119 int64_t cmp = hist_entry__cmp(he, iter);
118 120
119 if (cmp < 0) 121 if (cmp < 0)
120 n = n->rb_left; 122 n = n->rb_left;
121 else if (cmp > 0) 123 else if (cmp > 0)
122 n = n->rb_right; 124 n = n->rb_right;
123 else 125 else
124 return iter; 126 return iter;
125 } 127 }
126 128
127 return NULL; 129 return NULL;
128 } 130 }
129 131
130 static void hists__match(struct hists *older, struct hists *newer) 132 static void hists__match(struct hists *older, struct hists *newer)
131 { 133 {
132 struct rb_node *nd; 134 struct rb_node *nd;
133 135
134 for (nd = rb_first(&newer->entries); nd; nd = rb_next(nd)) { 136 for (nd = rb_first(&newer->entries); nd; nd = rb_next(nd)) {
135 struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node); 137 struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
136 pos->pair = hists__find_entry(older, pos); 138 pos->pair = hists__find_entry(older, pos);
137 } 139 }
138 } 140 }
139 141
140 static int __cmd_diff(void) 142 static int __cmd_diff(void)
141 { 143 {
142 int ret, i; 144 int ret, i;
143 struct perf_session *session[2]; 145 struct perf_session *session[2];
144 146
145 session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops); 147 session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops);
146 session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops); 148 session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops);
147 if (session[0] == NULL || session[1] == NULL) 149 if (session[0] == NULL || session[1] == NULL)
148 return -ENOMEM; 150 return -ENOMEM;
149 151
150 for (i = 0; i < 2; ++i) { 152 for (i = 0; i < 2; ++i) {
151 ret = perf_session__process_events(session[i], &event_ops); 153 ret = perf_session__process_events(session[i], &event_ops);
152 if (ret) 154 if (ret)
153 goto out_delete; 155 goto out_delete;
154 } 156 }
155 157
156 hists__output_resort(&session[1]->hists); 158 hists__output_resort(&session[1]->hists);
157 if (show_displacement) 159 if (show_displacement)
158 hists__set_positions(&session[0]->hists); 160 hists__set_positions(&session[0]->hists);
159 161
160 hists__match(&session[0]->hists, &session[1]->hists); 162 hists__match(&session[0]->hists, &session[1]->hists);
161 hists__fprintf(&session[1]->hists, &session[0]->hists, 163 hists__fprintf(&session[1]->hists, &session[0]->hists,
162 show_displacement, stdout); 164 show_displacement, stdout);
163 out_delete: 165 out_delete:
164 for (i = 0; i < 2; ++i) 166 for (i = 0; i < 2; ++i)
165 perf_session__delete(session[i]); 167 perf_session__delete(session[i]);
166 return ret; 168 return ret;
167 } 169 }
168 170
169 static const char * const diff_usage[] = { 171 static const char * const diff_usage[] = {
170 "perf diff [<options>] [old_file] [new_file]", 172 "perf diff [<options>] [old_file] [new_file]",
171 NULL, 173 NULL,
172 }; 174 };
173 175
174 static const struct option options[] = { 176 static const struct option options[] = {
175 OPT_INCR('v', "verbose", &verbose, 177 OPT_INCR('v', "verbose", &verbose,
176 "be more verbose (show symbol address, etc)"), 178 "be more verbose (show symbol address, etc)"),
177 OPT_BOOLEAN('M', "displacement", &show_displacement, 179 OPT_BOOLEAN('M', "displacement", &show_displacement,
178 "Show position displacement relative to baseline"), 180 "Show position displacement relative to baseline"),
179 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 181 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
180 "dump raw trace in ASCII"), 182 "dump raw trace in ASCII"),
181 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 183 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
182 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 184 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
183 "load module symbols - WARNING: use only with -k and LIVE kernel"), 185 "load module symbols - WARNING: use only with -k and LIVE kernel"),
184 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 186 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
185 "only consider symbols in these dsos"), 187 "only consider symbols in these dsos"),
186 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", 188 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
187 "only consider symbols in these comms"), 189 "only consider symbols in these comms"),
188 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 190 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
189 "only consider these symbols"), 191 "only consider these symbols"),
190 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 192 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
191 "sort by key(s): pid, comm, dso, symbol, parent"), 193 "sort by key(s): pid, comm, dso, symbol, parent"),
192 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", 194 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
193 "separator for columns, no spaces will be added between " 195 "separator for columns, no spaces will be added between "
194 "columns '.' is reserved."), 196 "columns '.' is reserved."),
195 OPT_END() 197 OPT_END()
196 }; 198 };
197 199
198 int cmd_diff(int argc, const char **argv, const char *prefix __used) 200 int cmd_diff(int argc, const char **argv, const char *prefix __used)
199 { 201 {
200 sort_order = diff__default_sort_order; 202 sort_order = diff__default_sort_order;
201 argc = parse_options(argc, argv, options, diff_usage, 0); 203 argc = parse_options(argc, argv, options, diff_usage, 0);
202 if (argc) { 204 if (argc) {
203 if (argc > 2) 205 if (argc > 2)
204 usage_with_options(diff_usage, options); 206 usage_with_options(diff_usage, options);
205 if (argc == 2) { 207 if (argc == 2) {
206 input_old = argv[0]; 208 input_old = argv[0];
207 input_new = argv[1]; 209 input_new = argv[1];
208 } else 210 } else
209 input_new = argv[0]; 211 input_new = argv[0];
210 } else if (symbol_conf.default_guest_vmlinux_name || 212 } else if (symbol_conf.default_guest_vmlinux_name ||
211 symbol_conf.default_guest_kallsyms) { 213 symbol_conf.default_guest_kallsyms) {
212 input_old = "perf.data.host"; 214 input_old = "perf.data.host";
213 input_new = "perf.data.guest"; 215 input_new = "perf.data.guest";
214 } 216 }
215 217
216 symbol_conf.exclude_other = false; 218 symbol_conf.exclude_other = false;
217 if (symbol__init() < 0) 219 if (symbol__init() < 0)
218 return -1; 220 return -1;
219 221
220 setup_sorting(diff_usage, options); 222 setup_sorting(diff_usage, options);
221 setup_pager(); 223 setup_pager();
222 224
223 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL); 225 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL);
224 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", NULL); 226 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", NULL);
225 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", NULL); 227 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", NULL);
226 228
227 return __cmd_diff(); 229 return __cmd_diff();
228 } 230 }
229 231
tools/perf/builtin-record.c
1 /* 1 /*
2 * builtin-record.c 2 * builtin-record.c
3 * 3 *
4 * Builtin record command: Record the profile of a workload 4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for 5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report. 6 * later analysis via perf report.
7 */ 7 */
8 #define _FILE_OFFSET_BITS 64 8 #define _FILE_OFFSET_BITS 64
9 9
10 #include "builtin.h" 10 #include "builtin.h"
11 11
12 #include "perf.h" 12 #include "perf.h"
13 13
14 #include "util/build-id.h" 14 #include "util/build-id.h"
15 #include "util/util.h" 15 #include "util/util.h"
16 #include "util/parse-options.h" 16 #include "util/parse-options.h"
17 #include "util/parse-events.h" 17 #include "util/parse-events.h"
18 18
19 #include "util/header.h" 19 #include "util/header.h"
20 #include "util/event.h" 20 #include "util/event.h"
21 #include "util/debug.h" 21 #include "util/debug.h"
22 #include "util/session.h" 22 #include "util/session.h"
23 #include "util/symbol.h" 23 #include "util/symbol.h"
24 #include "util/cpumap.h" 24 #include "util/cpumap.h"
25 25
26 #include <unistd.h> 26 #include <unistd.h>
27 #include <sched.h> 27 #include <sched.h>
28 #include <sys/mman.h> 28 #include <sys/mman.h>
29 29
30 enum write_mode_t { 30 enum write_mode_t {
31 WRITE_FORCE, 31 WRITE_FORCE,
32 WRITE_APPEND 32 WRITE_APPEND
33 }; 33 };
34 34
35 static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 35 static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
36 36
37 static u64 user_interval = ULLONG_MAX; 37 static u64 user_interval = ULLONG_MAX;
38 static u64 default_interval = 0; 38 static u64 default_interval = 0;
39 static u64 sample_type; 39 static u64 sample_type;
40 40
41 static int nr_cpus = 0; 41 static int nr_cpus = 0;
42 static unsigned int page_size; 42 static unsigned int page_size;
43 static unsigned int mmap_pages = 128; 43 static unsigned int mmap_pages = 128;
44 static unsigned int user_freq = UINT_MAX; 44 static unsigned int user_freq = UINT_MAX;
45 static int freq = 1000; 45 static int freq = 1000;
46 static int output; 46 static int output;
47 static int pipe_output = 0; 47 static int pipe_output = 0;
48 static const char *output_name = "perf.data"; 48 static const char *output_name = "perf.data";
49 static int group = 0; 49 static int group = 0;
50 static int realtime_prio = 0; 50 static int realtime_prio = 0;
51 static bool raw_samples = false; 51 static bool raw_samples = false;
52 static bool sample_id_all_avail = true; 52 static bool sample_id_all_avail = true;
53 static bool system_wide = false; 53 static bool system_wide = false;
54 static pid_t target_pid = -1; 54 static pid_t target_pid = -1;
55 static pid_t target_tid = -1; 55 static pid_t target_tid = -1;
56 static pid_t *all_tids = NULL; 56 static pid_t *all_tids = NULL;
57 static int thread_num = 0; 57 static int thread_num = 0;
58 static pid_t child_pid = -1; 58 static pid_t child_pid = -1;
59 static bool no_inherit = false; 59 static bool no_inherit = false;
60 static enum write_mode_t write_mode = WRITE_FORCE; 60 static enum write_mode_t write_mode = WRITE_FORCE;
61 static bool call_graph = false; 61 static bool call_graph = false;
62 static bool inherit_stat = false; 62 static bool inherit_stat = false;
63 static bool no_samples = false; 63 static bool no_samples = false;
64 static bool sample_address = false; 64 static bool sample_address = false;
65 static bool sample_time = false; 65 static bool sample_time = false;
66 static bool no_buildid = false; 66 static bool no_buildid = false;
67 static bool no_buildid_cache = false; 67 static bool no_buildid_cache = false;
68 68
69 static long samples = 0; 69 static long samples = 0;
70 static u64 bytes_written = 0; 70 static u64 bytes_written = 0;
71 71
72 static struct pollfd *event_array; 72 static struct pollfd *event_array;
73 73
74 static int nr_poll = 0; 74 static int nr_poll = 0;
75 static int nr_cpu = 0; 75 static int nr_cpu = 0;
76 76
77 static int file_new = 1; 77 static int file_new = 1;
78 static off_t post_processing_offset; 78 static off_t post_processing_offset;
79 79
80 static struct perf_session *session; 80 static struct perf_session *session;
81 static const char *cpu_list; 81 static const char *cpu_list;
82 82
83 struct mmap_data { 83 struct mmap_data {
84 int counter; 84 int counter;
85 void *base; 85 void *base;
86 unsigned int mask; 86 unsigned int mask;
87 unsigned int prev; 87 unsigned int prev;
88 }; 88 };
89 89
90 static struct mmap_data mmap_array[MAX_NR_CPUS]; 90 static struct mmap_data mmap_array[MAX_NR_CPUS];
91 91
92 static unsigned long mmap_read_head(struct mmap_data *md) 92 static unsigned long mmap_read_head(struct mmap_data *md)
93 { 93 {
94 struct perf_event_mmap_page *pc = md->base; 94 struct perf_event_mmap_page *pc = md->base;
95 long head; 95 long head;
96 96
97 head = pc->data_head; 97 head = pc->data_head;
98 rmb(); 98 rmb();
99 99
100 return head; 100 return head;
101 } 101 }
102 102
103 static void mmap_write_tail(struct mmap_data *md, unsigned long tail) 103 static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
104 { 104 {
105 struct perf_event_mmap_page *pc = md->base; 105 struct perf_event_mmap_page *pc = md->base;
106 106
107 /* 107 /*
108 * ensure all reads are done before we write the tail out. 108 * ensure all reads are done before we write the tail out.
109 */ 109 */
110 /* mb(); */ 110 /* mb(); */
111 pc->data_tail = tail; 111 pc->data_tail = tail;
112 } 112 }
113 113
114 static void advance_output(size_t size) 114 static void advance_output(size_t size)
115 { 115 {
116 bytes_written += size; 116 bytes_written += size;
117 } 117 }
118 118
119 static void write_output(void *buf, size_t size) 119 static void write_output(void *buf, size_t size)
120 { 120 {
121 while (size) { 121 while (size) {
122 int ret = write(output, buf, size); 122 int ret = write(output, buf, size);
123 123
124 if (ret < 0) 124 if (ret < 0)
125 die("failed to write"); 125 die("failed to write");
126 126
127 size -= ret; 127 size -= ret;
128 buf += ret; 128 buf += ret;
129 129
130 bytes_written += ret; 130 bytes_written += ret;
131 } 131 }
132 } 132 }
133 133
134 static int process_synthesized_event(event_t *event, 134 static int process_synthesized_event(event_t *event,
135 struct sample_data *sample __used, 135 struct sample_data *sample __used,
136 struct perf_session *self __used) 136 struct perf_session *self __used)
137 { 137 {
138 write_output(event, event->header.size); 138 write_output(event, event->header.size);
139 return 0; 139 return 0;
140 } 140 }
141 141
142 static void mmap_read(struct mmap_data *md) 142 static void mmap_read(struct mmap_data *md)
143 { 143 {
144 unsigned int head = mmap_read_head(md); 144 unsigned int head = mmap_read_head(md);
145 unsigned int old = md->prev; 145 unsigned int old = md->prev;
146 unsigned char *data = md->base + page_size; 146 unsigned char *data = md->base + page_size;
147 unsigned long size; 147 unsigned long size;
148 void *buf; 148 void *buf;
149 int diff; 149 int diff;
150 150
151 /* 151 /*
152 * If we're further behind than half the buffer, there's a chance 152 * If we're further behind than half the buffer, there's a chance
153 * the writer will bite our tail and mess up the samples under us. 153 * the writer will bite our tail and mess up the samples under us.
154 * 154 *
155 * If we somehow ended up ahead of the head, we got messed up. 155 * If we somehow ended up ahead of the head, we got messed up.
156 * 156 *
157 * In either case, truncate and restart at head. 157 * In either case, truncate and restart at head.
158 */ 158 */
159 diff = head - old; 159 diff = head - old;
160 if (diff < 0) { 160 if (diff < 0) {
161 fprintf(stderr, "WARNING: failed to keep up with mmap data\n"); 161 fprintf(stderr, "WARNING: failed to keep up with mmap data\n");
162 /* 162 /*
163 * head points to a known good entry, start there. 163 * head points to a known good entry, start there.
164 */ 164 */
165 old = head; 165 old = head;
166 } 166 }
167 167
168 if (old != head) 168 if (old != head)
169 samples++; 169 samples++;
170 170
171 size = head - old; 171 size = head - old;
172 172
173 if ((old & md->mask) + size != (head & md->mask)) { 173 if ((old & md->mask) + size != (head & md->mask)) {
174 buf = &data[old & md->mask]; 174 buf = &data[old & md->mask];
175 size = md->mask + 1 - (old & md->mask); 175 size = md->mask + 1 - (old & md->mask);
176 old += size; 176 old += size;
177 177
178 write_output(buf, size); 178 write_output(buf, size);
179 } 179 }
180 180
181 buf = &data[old & md->mask]; 181 buf = &data[old & md->mask];
182 size = head - old; 182 size = head - old;
183 old += size; 183 old += size;
184 184
185 write_output(buf, size); 185 write_output(buf, size);
186 186
187 md->prev = old; 187 md->prev = old;
188 mmap_write_tail(md, old); 188 mmap_write_tail(md, old);
189 } 189 }
190 190
191 static volatile int done = 0; 191 static volatile int done = 0;
192 static volatile int signr = -1; 192 static volatile int signr = -1;
193 193
194 static void sig_handler(int sig) 194 static void sig_handler(int sig)
195 { 195 {
196 done = 1; 196 done = 1;
197 signr = sig; 197 signr = sig;
198 } 198 }
199 199
200 static void sig_atexit(void) 200 static void sig_atexit(void)
201 { 201 {
202 if (child_pid > 0) 202 if (child_pid > 0)
203 kill(child_pid, SIGTERM); 203 kill(child_pid, SIGTERM);
204 204
205 if (signr == -1 || signr == SIGUSR1) 205 if (signr == -1 || signr == SIGUSR1)
206 return; 206 return;
207 207
208 signal(signr, SIG_DFL); 208 signal(signr, SIG_DFL);
209 kill(getpid(), signr); 209 kill(getpid(), signr);
210 } 210 }
211 211
212 static int group_fd; 212 static int group_fd;
213 213
214 static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr) 214 static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
215 { 215 {
216 struct perf_header_attr *h_attr; 216 struct perf_header_attr *h_attr;
217 217
218 if (nr < session->header.attrs) { 218 if (nr < session->header.attrs) {
219 h_attr = session->header.attr[nr]; 219 h_attr = session->header.attr[nr];
220 } else { 220 } else {
221 h_attr = perf_header_attr__new(a); 221 h_attr = perf_header_attr__new(a);
222 if (h_attr != NULL) 222 if (h_attr != NULL)
223 if (perf_header__add_attr(&session->header, h_attr) < 0) { 223 if (perf_header__add_attr(&session->header, h_attr) < 0) {
224 perf_header_attr__delete(h_attr); 224 perf_header_attr__delete(h_attr);
225 h_attr = NULL; 225 h_attr = NULL;
226 } 226 }
227 } 227 }
228 228
229 return h_attr; 229 return h_attr;
230 } 230 }
231 231
232 static void create_counter(int counter, int cpu) 232 static void create_counter(int counter, int cpu)
233 { 233 {
234 char *filter = filters[counter]; 234 char *filter = filters[counter];
235 struct perf_event_attr *attr = attrs + counter; 235 struct perf_event_attr *attr = attrs + counter;
236 struct perf_header_attr *h_attr; 236 struct perf_header_attr *h_attr;
237 int track = !counter; /* only the first counter needs these */ 237 int track = !counter; /* only the first counter needs these */
238 int thread_index; 238 int thread_index;
239 int ret; 239 int ret;
240 struct { 240 struct {
241 u64 count; 241 u64 count;
242 u64 time_enabled; 242 u64 time_enabled;
243 u64 time_running; 243 u64 time_running;
244 u64 id; 244 u64 id;
245 } read_data; 245 } read_data;
246 246
247 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 247 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
248 PERF_FORMAT_TOTAL_TIME_RUNNING | 248 PERF_FORMAT_TOTAL_TIME_RUNNING |
249 PERF_FORMAT_ID; 249 PERF_FORMAT_ID;
250 250
251 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 251 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
252 252
253 if (nr_counters > 1) 253 if (nr_counters > 1)
254 attr->sample_type |= PERF_SAMPLE_ID; 254 attr->sample_type |= PERF_SAMPLE_ID;
255 255
256 /* 256 /*
257 * We default some events to a 1 default interval. But keep 257 * We default some events to a 1 default interval. But keep
258 * it a weak assumption overridable by the user. 258 * it a weak assumption overridable by the user.
259 */ 259 */
260 if (!attr->sample_period || (user_freq != UINT_MAX && 260 if (!attr->sample_period || (user_freq != UINT_MAX &&
261 user_interval != ULLONG_MAX)) { 261 user_interval != ULLONG_MAX)) {
262 if (freq) { 262 if (freq) {
263 attr->sample_type |= PERF_SAMPLE_PERIOD; 263 attr->sample_type |= PERF_SAMPLE_PERIOD;
264 attr->freq = 1; 264 attr->freq = 1;
265 attr->sample_freq = freq; 265 attr->sample_freq = freq;
266 } else { 266 } else {
267 attr->sample_period = default_interval; 267 attr->sample_period = default_interval;
268 } 268 }
269 } 269 }
270 270
271 if (no_samples) 271 if (no_samples)
272 attr->sample_freq = 0; 272 attr->sample_freq = 0;
273 273
274 if (inherit_stat) 274 if (inherit_stat)
275 attr->inherit_stat = 1; 275 attr->inherit_stat = 1;
276 276
277 if (sample_address) { 277 if (sample_address) {
278 attr->sample_type |= PERF_SAMPLE_ADDR; 278 attr->sample_type |= PERF_SAMPLE_ADDR;
279 attr->mmap_data = track; 279 attr->mmap_data = track;
280 } 280 }
281 281
282 if (call_graph) 282 if (call_graph)
283 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 283 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
284 284
285 if (system_wide) 285 if (system_wide)
286 attr->sample_type |= PERF_SAMPLE_CPU; 286 attr->sample_type |= PERF_SAMPLE_CPU;
287 287
288 if (sample_time) 288 if (sample_time || system_wide || !no_inherit || cpu_list)
289 attr->sample_type |= PERF_SAMPLE_TIME; 289 attr->sample_type |= PERF_SAMPLE_TIME;
290 290
291 if (raw_samples) { 291 if (raw_samples) {
292 attr->sample_type |= PERF_SAMPLE_TIME; 292 attr->sample_type |= PERF_SAMPLE_TIME;
293 attr->sample_type |= PERF_SAMPLE_RAW; 293 attr->sample_type |= PERF_SAMPLE_RAW;
294 attr->sample_type |= PERF_SAMPLE_CPU; 294 attr->sample_type |= PERF_SAMPLE_CPU;
295 } 295 }
296 296
297 if (!sample_type) 297 if (!sample_type)
298 sample_type = attr->sample_type; 298 sample_type = attr->sample_type;
299 299
300 attr->mmap = track; 300 attr->mmap = track;
301 attr->comm = track; 301 attr->comm = track;
302 attr->inherit = !no_inherit; 302 attr->inherit = !no_inherit;
303 if (target_pid == -1 && target_tid == -1 && !system_wide) { 303 if (target_pid == -1 && target_tid == -1 && !system_wide) {
304 attr->disabled = 1; 304 attr->disabled = 1;
305 attr->enable_on_exec = 1; 305 attr->enable_on_exec = 1;
306 } 306 }
307 retry_sample_id: 307 retry_sample_id:
308 attr->sample_id_all = sample_id_all_avail ? 1 : 0; 308 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
309 309
310 for (thread_index = 0; thread_index < thread_num; thread_index++) { 310 for (thread_index = 0; thread_index < thread_num; thread_index++) {
311 try_again: 311 try_again:
312 fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr, 312 fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr,
313 all_tids[thread_index], cpu, group_fd, 0); 313 all_tids[thread_index], cpu, group_fd, 0);
314 314
315 if (fd[nr_cpu][counter][thread_index] < 0) { 315 if (fd[nr_cpu][counter][thread_index] < 0) {
316 int err = errno; 316 int err = errno;
317 317
318 if (err == EPERM || err == EACCES) 318 if (err == EPERM || err == EACCES)
319 die("Permission error - are you root?\n" 319 die("Permission error - are you root?\n"
320 "\t Consider tweaking" 320 "\t Consider tweaking"
321 " /proc/sys/kernel/perf_event_paranoid.\n"); 321 " /proc/sys/kernel/perf_event_paranoid.\n");
322 else if (err == ENODEV && cpu_list) { 322 else if (err == ENODEV && cpu_list) {
323 die("No such device - did you specify" 323 die("No such device - did you specify"
324 " an out-of-range profile CPU?\n"); 324 " an out-of-range profile CPU?\n");
325 } else if (err == EINVAL && sample_id_all_avail) { 325 } else if (err == EINVAL && sample_id_all_avail) {
326 /* 326 /*
327 * Old kernel, no attr->sample_id_type_all field 327 * Old kernel, no attr->sample_id_type_all field
328 */ 328 */
329 sample_id_all_avail = false; 329 sample_id_all_avail = false;
330 if (!sample_time && !raw_samples)
331 attr->sample_type &= ~PERF_SAMPLE_TIME;
332
330 goto retry_sample_id; 333 goto retry_sample_id;
331 } 334 }
332 335
333 /* 336 /*
334 * If it's cycles then fall back to hrtimer 337 * If it's cycles then fall back to hrtimer
335 * based cpu-clock-tick sw counter, which 338 * based cpu-clock-tick sw counter, which
336 * is always available even if no PMU support: 339 * is always available even if no PMU support:
337 */ 340 */
338 if (attr->type == PERF_TYPE_HARDWARE 341 if (attr->type == PERF_TYPE_HARDWARE
339 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 342 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
340 343
341 if (verbose) 344 if (verbose)
342 warning(" ... trying to fall back to cpu-clock-ticks\n"); 345 warning(" ... trying to fall back to cpu-clock-ticks\n");
343 attr->type = PERF_TYPE_SOFTWARE; 346 attr->type = PERF_TYPE_SOFTWARE;
344 attr->config = PERF_COUNT_SW_CPU_CLOCK; 347 attr->config = PERF_COUNT_SW_CPU_CLOCK;
345 goto try_again; 348 goto try_again;
346 } 349 }
347 printf("\n"); 350 printf("\n");
348 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", 351 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
349 fd[nr_cpu][counter][thread_index], strerror(err)); 352 fd[nr_cpu][counter][thread_index], strerror(err));
350 353
351 #if defined(__i386__) || defined(__x86_64__) 354 #if defined(__i386__) || defined(__x86_64__)
352 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) 355 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
353 die("No hardware sampling interrupt available." 356 die("No hardware sampling interrupt available."
354 " No APIC? If so then you can boot the kernel" 357 " No APIC? If so then you can boot the kernel"
355 " with the \"lapic\" boot parameter to" 358 " with the \"lapic\" boot parameter to"
356 " force-enable it.\n"); 359 " force-enable it.\n");
357 #endif 360 #endif
358 361
359 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 362 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
360 exit(-1); 363 exit(-1);
361 } 364 }
362 365
363 h_attr = get_header_attr(attr, counter); 366 h_attr = get_header_attr(attr, counter);
364 if (h_attr == NULL) 367 if (h_attr == NULL)
365 die("nomem\n"); 368 die("nomem\n");
366 369
367 if (!file_new) { 370 if (!file_new) {
368 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { 371 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
369 fprintf(stderr, "incompatible append\n"); 372 fprintf(stderr, "incompatible append\n");
370 exit(-1); 373 exit(-1);
371 } 374 }
372 } 375 }
373 376
374 if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) { 377 if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) {
375 perror("Unable to read perf file descriptor"); 378 perror("Unable to read perf file descriptor");
376 exit(-1); 379 exit(-1);
377 } 380 }
378 381
379 if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { 382 if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
380 pr_warning("Not enough memory to add id\n"); 383 pr_warning("Not enough memory to add id\n");
381 exit(-1); 384 exit(-1);
382 } 385 }
383 386
384 assert(fd[nr_cpu][counter][thread_index] >= 0); 387 assert(fd[nr_cpu][counter][thread_index] >= 0);
385 fcntl(fd[nr_cpu][counter][thread_index], F_SETFL, O_NONBLOCK); 388 fcntl(fd[nr_cpu][counter][thread_index], F_SETFL, O_NONBLOCK);
386 389
387 /* 390 /*
388 * First counter acts as the group leader: 391 * First counter acts as the group leader:
389 */ 392 */
390 if (group && group_fd == -1) 393 if (group && group_fd == -1)
391 group_fd = fd[nr_cpu][counter][thread_index]; 394 group_fd = fd[nr_cpu][counter][thread_index];
392 395
393 if (counter || thread_index) { 396 if (counter || thread_index) {
394 ret = ioctl(fd[nr_cpu][counter][thread_index], 397 ret = ioctl(fd[nr_cpu][counter][thread_index],
395 PERF_EVENT_IOC_SET_OUTPUT, 398 PERF_EVENT_IOC_SET_OUTPUT,
396 fd[nr_cpu][0][0]); 399 fd[nr_cpu][0][0]);
397 if (ret) { 400 if (ret) {
398 error("failed to set output: %d (%s)\n", errno, 401 error("failed to set output: %d (%s)\n", errno,
399 strerror(errno)); 402 strerror(errno));
400 exit(-1); 403 exit(-1);
401 } 404 }
402 } else { 405 } else {
403 mmap_array[nr_cpu].counter = counter; 406 mmap_array[nr_cpu].counter = counter;
404 mmap_array[nr_cpu].prev = 0; 407 mmap_array[nr_cpu].prev = 0;
405 mmap_array[nr_cpu].mask = mmap_pages*page_size - 1; 408 mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
406 mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size, 409 mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
407 PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0); 410 PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
408 if (mmap_array[nr_cpu].base == MAP_FAILED) { 411 if (mmap_array[nr_cpu].base == MAP_FAILED) {
409 error("failed to mmap with %d (%s)\n", errno, strerror(errno)); 412 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
410 exit(-1); 413 exit(-1);
411 } 414 }
412 415
413 event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index]; 416 event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
414 event_array[nr_poll].events = POLLIN; 417 event_array[nr_poll].events = POLLIN;
415 nr_poll++; 418 nr_poll++;
416 } 419 }
417 420
418 if (filter != NULL) { 421 if (filter != NULL) {
419 ret = ioctl(fd[nr_cpu][counter][thread_index], 422 ret = ioctl(fd[nr_cpu][counter][thread_index],
420 PERF_EVENT_IOC_SET_FILTER, filter); 423 PERF_EVENT_IOC_SET_FILTER, filter);
421 if (ret) { 424 if (ret) {
422 error("failed to set filter with %d (%s)\n", errno, 425 error("failed to set filter with %d (%s)\n", errno,
423 strerror(errno)); 426 strerror(errno));
424 exit(-1); 427 exit(-1);
425 } 428 }
426 } 429 }
427 } 430 }
428 } 431 }
429 432
430 static void open_counters(int cpu) 433 static void open_counters(int cpu)
431 { 434 {
432 int counter; 435 int counter;
433 436
434 group_fd = -1; 437 group_fd = -1;
435 for (counter = 0; counter < nr_counters; counter++) 438 for (counter = 0; counter < nr_counters; counter++)
436 create_counter(counter, cpu); 439 create_counter(counter, cpu);
437 440
438 nr_cpu++; 441 nr_cpu++;
439 } 442 }
440 443
441 static int process_buildids(void) 444 static int process_buildids(void)
442 { 445 {
443 u64 size = lseek(output, 0, SEEK_CUR); 446 u64 size = lseek(output, 0, SEEK_CUR);
444 447
445 if (size == 0) 448 if (size == 0)
446 return 0; 449 return 0;
447 450
448 session->fd = output; 451 session->fd = output;
449 return __perf_session__process_events(session, post_processing_offset, 452 return __perf_session__process_events(session, post_processing_offset,
450 size - post_processing_offset, 453 size - post_processing_offset,
451 size, &build_id__mark_dso_hit_ops); 454 size, &build_id__mark_dso_hit_ops);
452 } 455 }
453 456
454 static void atexit_header(void) 457 static void atexit_header(void)
455 { 458 {
456 if (!pipe_output) { 459 if (!pipe_output) {
457 session->header.data_size += bytes_written; 460 session->header.data_size += bytes_written;
458 461
459 if (!no_buildid) 462 if (!no_buildid)
460 process_buildids(); 463 process_buildids();
461 perf_header__write(&session->header, output, true); 464 perf_header__write(&session->header, output, true);
462 perf_session__delete(session); 465 perf_session__delete(session);
463 symbol__exit(); 466 symbol__exit();
464 } 467 }
465 } 468 }
466 469
467 static void event__synthesize_guest_os(struct machine *machine, void *data) 470 static void event__synthesize_guest_os(struct machine *machine, void *data)
468 { 471 {
469 int err; 472 int err;
470 struct perf_session *psession = data; 473 struct perf_session *psession = data;
471 474
472 if (machine__is_host(machine)) 475 if (machine__is_host(machine))
473 return; 476 return;
474 477
475 /* 478 /*
476 *As for guest kernel when processing subcommand record&report, 479 *As for guest kernel when processing subcommand record&report,
477 *we arrange module mmap prior to guest kernel mmap and trigger 480 *we arrange module mmap prior to guest kernel mmap and trigger
478 *a preload dso because default guest module symbols are loaded 481 *a preload dso because default guest module symbols are loaded
479 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 482 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
480 *method is used to avoid symbol missing when the first addr is 483 *method is used to avoid symbol missing when the first addr is
481 *in module instead of in guest kernel. 484 *in module instead of in guest kernel.
482 */ 485 */
483 err = event__synthesize_modules(process_synthesized_event, 486 err = event__synthesize_modules(process_synthesized_event,
484 psession, machine); 487 psession, machine);
485 if (err < 0) 488 if (err < 0)
486 pr_err("Couldn't record guest kernel [%d]'s reference" 489 pr_err("Couldn't record guest kernel [%d]'s reference"
487 " relocation symbol.\n", machine->pid); 490 " relocation symbol.\n", machine->pid);
488 491
489 /* 492 /*
490 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 493 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
491 * have no _text sometimes. 494 * have no _text sometimes.
492 */ 495 */
493 err = event__synthesize_kernel_mmap(process_synthesized_event, 496 err = event__synthesize_kernel_mmap(process_synthesized_event,
494 psession, machine, "_text"); 497 psession, machine, "_text");
495 if (err < 0) 498 if (err < 0)
496 err = event__synthesize_kernel_mmap(process_synthesized_event, 499 err = event__synthesize_kernel_mmap(process_synthesized_event,
497 psession, machine, "_stext"); 500 psession, machine, "_stext");
498 if (err < 0) 501 if (err < 0)
499 pr_err("Couldn't record guest kernel [%d]'s reference" 502 pr_err("Couldn't record guest kernel [%d]'s reference"
500 " relocation symbol.\n", machine->pid); 503 " relocation symbol.\n", machine->pid);
501 } 504 }
502 505
503 static struct perf_event_header finished_round_event = { 506 static struct perf_event_header finished_round_event = {
504 .size = sizeof(struct perf_event_header), 507 .size = sizeof(struct perf_event_header),
505 .type = PERF_RECORD_FINISHED_ROUND, 508 .type = PERF_RECORD_FINISHED_ROUND,
506 }; 509 };
507 510
508 static void mmap_read_all(void) 511 static void mmap_read_all(void)
509 { 512 {
510 int i; 513 int i;
511 514
512 for (i = 0; i < nr_cpu; i++) { 515 for (i = 0; i < nr_cpu; i++) {
513 if (mmap_array[i].base) 516 if (mmap_array[i].base)
514 mmap_read(&mmap_array[i]); 517 mmap_read(&mmap_array[i]);
515 } 518 }
516 519
517 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO)) 520 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
518 write_output(&finished_round_event, sizeof(finished_round_event)); 521 write_output(&finished_round_event, sizeof(finished_round_event));
519 } 522 }
520 523
521 static int __cmd_record(int argc, const char **argv) 524 static int __cmd_record(int argc, const char **argv)
522 { 525 {
523 int i, counter; 526 int i, counter;
524 struct stat st; 527 struct stat st;
525 int flags; 528 int flags;
526 int err; 529 int err;
527 unsigned long waking = 0; 530 unsigned long waking = 0;
528 int child_ready_pipe[2], go_pipe[2]; 531 int child_ready_pipe[2], go_pipe[2];
529 const bool forks = argc > 0; 532 const bool forks = argc > 0;
530 char buf; 533 char buf;
531 struct machine *machine; 534 struct machine *machine;
532 535
533 page_size = sysconf(_SC_PAGE_SIZE); 536 page_size = sysconf(_SC_PAGE_SIZE);
534 537
535 atexit(sig_atexit); 538 atexit(sig_atexit);
536 signal(SIGCHLD, sig_handler); 539 signal(SIGCHLD, sig_handler);
537 signal(SIGINT, sig_handler); 540 signal(SIGINT, sig_handler);
538 signal(SIGUSR1, sig_handler); 541 signal(SIGUSR1, sig_handler);
539 542
540 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { 543 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
541 perror("failed to create pipes"); 544 perror("failed to create pipes");
542 exit(-1); 545 exit(-1);
543 } 546 }
544 547
545 if (!strcmp(output_name, "-")) 548 if (!strcmp(output_name, "-"))
546 pipe_output = 1; 549 pipe_output = 1;
547 else if (!stat(output_name, &st) && st.st_size) { 550 else if (!stat(output_name, &st) && st.st_size) {
548 if (write_mode == WRITE_FORCE) { 551 if (write_mode == WRITE_FORCE) {
549 char oldname[PATH_MAX]; 552 char oldname[PATH_MAX];
550 snprintf(oldname, sizeof(oldname), "%s.old", 553 snprintf(oldname, sizeof(oldname), "%s.old",
551 output_name); 554 output_name);
552 unlink(oldname); 555 unlink(oldname);
553 rename(output_name, oldname); 556 rename(output_name, oldname);
554 } 557 }
555 } else if (write_mode == WRITE_APPEND) { 558 } else if (write_mode == WRITE_APPEND) {
556 write_mode = WRITE_FORCE; 559 write_mode = WRITE_FORCE;
557 } 560 }
558 561
559 flags = O_CREAT|O_RDWR; 562 flags = O_CREAT|O_RDWR;
560 if (write_mode == WRITE_APPEND) 563 if (write_mode == WRITE_APPEND)
561 file_new = 0; 564 file_new = 0;
562 else 565 else
563 flags |= O_TRUNC; 566 flags |= O_TRUNC;
564 567
565 if (pipe_output) 568 if (pipe_output)
566 output = STDOUT_FILENO; 569 output = STDOUT_FILENO;
567 else 570 else
568 output = open(output_name, flags, S_IRUSR | S_IWUSR); 571 output = open(output_name, flags, S_IRUSR | S_IWUSR);
569 if (output < 0) { 572 if (output < 0) {
570 perror("failed to create output file"); 573 perror("failed to create output file");
571 exit(-1); 574 exit(-1);
572 } 575 }
573 576
574 session = perf_session__new(output_name, O_WRONLY, 577 session = perf_session__new(output_name, O_WRONLY,
575 write_mode == WRITE_FORCE, false, NULL); 578 write_mode == WRITE_FORCE, false, NULL);
576 if (session == NULL) { 579 if (session == NULL) {
577 pr_err("Not enough memory for reading perf file header\n"); 580 pr_err("Not enough memory for reading perf file header\n");
578 return -1; 581 return -1;
579 } 582 }
580 583
581 if (!no_buildid) 584 if (!no_buildid)
582 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 585 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
583 586
584 if (!file_new) { 587 if (!file_new) {
585 err = perf_header__read(session, output); 588 err = perf_header__read(session, output);
586 if (err < 0) 589 if (err < 0)
587 goto out_delete_session; 590 goto out_delete_session;
588 } 591 }
589 592
590 if (have_tracepoints(attrs, nr_counters)) 593 if (have_tracepoints(attrs, nr_counters))
591 perf_header__set_feat(&session->header, HEADER_TRACE_INFO); 594 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
592 595
593 /* 596 /*
594 * perf_session__delete(session) will be called at atexit_header() 597 * perf_session__delete(session) will be called at atexit_header()
595 */ 598 */
596 atexit(atexit_header); 599 atexit(atexit_header);
597 600
598 if (forks) { 601 if (forks) {
599 child_pid = fork(); 602 child_pid = fork();
600 if (child_pid < 0) { 603 if (child_pid < 0) {
601 perror("failed to fork"); 604 perror("failed to fork");
602 exit(-1); 605 exit(-1);
603 } 606 }
604 607
605 if (!child_pid) { 608 if (!child_pid) {
606 if (pipe_output) 609 if (pipe_output)
607 dup2(2, 1); 610 dup2(2, 1);
608 close(child_ready_pipe[0]); 611 close(child_ready_pipe[0]);
609 close(go_pipe[1]); 612 close(go_pipe[1]);
610 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 613 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
611 614
612 /* 615 /*
613 * Do a dummy execvp to get the PLT entry resolved, 616 * Do a dummy execvp to get the PLT entry resolved,
614 * so we avoid the resolver overhead on the real 617 * so we avoid the resolver overhead on the real
615 * execvp call. 618 * execvp call.
616 */ 619 */
617 execvp("", (char **)argv); 620 execvp("", (char **)argv);
618 621
619 /* 622 /*
620 * Tell the parent we're ready to go 623 * Tell the parent we're ready to go
621 */ 624 */
622 close(child_ready_pipe[1]); 625 close(child_ready_pipe[1]);
623 626
624 /* 627 /*
625 * Wait until the parent tells us to go. 628 * Wait until the parent tells us to go.
626 */ 629 */
627 if (read(go_pipe[0], &buf, 1) == -1) 630 if (read(go_pipe[0], &buf, 1) == -1)
628 perror("unable to read pipe"); 631 perror("unable to read pipe");
629 632
630 execvp(argv[0], (char **)argv); 633 execvp(argv[0], (char **)argv);
631 634
632 perror(argv[0]); 635 perror(argv[0]);
633 kill(getppid(), SIGUSR1); 636 kill(getppid(), SIGUSR1);
634 exit(-1); 637 exit(-1);
635 } 638 }
636 639
637 if (!system_wide && target_tid == -1 && target_pid == -1) 640 if (!system_wide && target_tid == -1 && target_pid == -1)
638 all_tids[0] = child_pid; 641 all_tids[0] = child_pid;
639 642
640 close(child_ready_pipe[1]); 643 close(child_ready_pipe[1]);
641 close(go_pipe[0]); 644 close(go_pipe[0]);
642 /* 645 /*
643 * wait for child to settle 646 * wait for child to settle
644 */ 647 */
645 if (read(child_ready_pipe[0], &buf, 1) == -1) { 648 if (read(child_ready_pipe[0], &buf, 1) == -1) {
646 perror("unable to read pipe"); 649 perror("unable to read pipe");
647 exit(-1); 650 exit(-1);
648 } 651 }
649 close(child_ready_pipe[0]); 652 close(child_ready_pipe[0]);
650 } 653 }
651 654
652 nr_cpus = read_cpu_map(cpu_list); 655 nr_cpus = read_cpu_map(cpu_list);
653 if (nr_cpus < 1) { 656 if (nr_cpus < 1) {
654 perror("failed to collect number of CPUs"); 657 perror("failed to collect number of CPUs");
655 return -1; 658 return -1;
656 } 659 }
657 660
658 if (!system_wide && no_inherit && !cpu_list) { 661 if (!system_wide && no_inherit && !cpu_list) {
659 open_counters(-1); 662 open_counters(-1);
660 } else { 663 } else {
661 for (i = 0; i < nr_cpus; i++) 664 for (i = 0; i < nr_cpus; i++)
662 open_counters(cpumap[i]); 665 open_counters(cpumap[i]);
663 } 666 }
664 667
665 perf_session__set_sample_type(session, sample_type); 668 perf_session__set_sample_type(session, sample_type);
666 669
667 if (pipe_output) { 670 if (pipe_output) {
668 err = perf_header__write_pipe(output); 671 err = perf_header__write_pipe(output);
669 if (err < 0) 672 if (err < 0)
670 return err; 673 return err;
671 } else if (file_new) { 674 } else if (file_new) {
672 err = perf_header__write(&session->header, output, false); 675 err = perf_header__write(&session->header, output, false);
673 if (err < 0) 676 if (err < 0)
674 return err; 677 return err;
675 } 678 }
676 679
677 post_processing_offset = lseek(output, 0, SEEK_CUR); 680 post_processing_offset = lseek(output, 0, SEEK_CUR);
678 681
679 perf_session__set_sample_id_all(session, sample_id_all_avail); 682 perf_session__set_sample_id_all(session, sample_id_all_avail);
680 683
681 if (pipe_output) { 684 if (pipe_output) {
682 err = event__synthesize_attrs(&session->header, 685 err = event__synthesize_attrs(&session->header,
683 process_synthesized_event, 686 process_synthesized_event,
684 session); 687 session);
685 if (err < 0) { 688 if (err < 0) {
686 pr_err("Couldn't synthesize attrs.\n"); 689 pr_err("Couldn't synthesize attrs.\n");
687 return err; 690 return err;
688 } 691 }
689 692
690 err = event__synthesize_event_types(process_synthesized_event, 693 err = event__synthesize_event_types(process_synthesized_event,
691 session); 694 session);
692 if (err < 0) { 695 if (err < 0) {
693 pr_err("Couldn't synthesize event_types.\n"); 696 pr_err("Couldn't synthesize event_types.\n");
694 return err; 697 return err;
695 } 698 }
696 699
697 if (have_tracepoints(attrs, nr_counters)) { 700 if (have_tracepoints(attrs, nr_counters)) {
698 /* 701 /*
699 * FIXME err <= 0 here actually means that 702 * FIXME err <= 0 here actually means that
700 * there were no tracepoints so its not really 703 * there were no tracepoints so its not really
701 * an error, just that we don't need to 704 * an error, just that we don't need to
702 * synthesize anything. We really have to 705 * synthesize anything. We really have to
703 * return this more properly and also 706 * return this more properly and also
704 * propagate errors that now are calling die() 707 * propagate errors that now are calling die()
705 */ 708 */
706 err = event__synthesize_tracing_data(output, attrs, 709 err = event__synthesize_tracing_data(output, attrs,
707 nr_counters, 710 nr_counters,
708 process_synthesized_event, 711 process_synthesized_event,
709 session); 712 session);
710 if (err <= 0) { 713 if (err <= 0) {
711 pr_err("Couldn't record tracing data.\n"); 714 pr_err("Couldn't record tracing data.\n");
712 return err; 715 return err;
713 } 716 }
714 advance_output(err); 717 advance_output(err);
715 } 718 }
716 } 719 }
717 720
718 machine = perf_session__find_host_machine(session); 721 machine = perf_session__find_host_machine(session);
719 if (!machine) { 722 if (!machine) {
720 pr_err("Couldn't find native kernel information.\n"); 723 pr_err("Couldn't find native kernel information.\n");
721 return -1; 724 return -1;
722 } 725 }
723 726
724 err = event__synthesize_kernel_mmap(process_synthesized_event, 727 err = event__synthesize_kernel_mmap(process_synthesized_event,
725 session, machine, "_text"); 728 session, machine, "_text");
726 if (err < 0) 729 if (err < 0)
727 err = event__synthesize_kernel_mmap(process_synthesized_event, 730 err = event__synthesize_kernel_mmap(process_synthesized_event,
728 session, machine, "_stext"); 731 session, machine, "_stext");
729 if (err < 0) 732 if (err < 0)
730 pr_err("Couldn't record kernel reference relocation symbol\n" 733 pr_err("Couldn't record kernel reference relocation symbol\n"
731 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 734 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
732 "Check /proc/kallsyms permission or run as root.\n"); 735 "Check /proc/kallsyms permission or run as root.\n");
733 736
734 err = event__synthesize_modules(process_synthesized_event, 737 err = event__synthesize_modules(process_synthesized_event,
735 session, machine); 738 session, machine);
736 if (err < 0) 739 if (err < 0)
737 pr_err("Couldn't record kernel module information.\n" 740 pr_err("Couldn't record kernel module information.\n"
738 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 741 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
739 "Check /proc/modules permission or run as root.\n"); 742 "Check /proc/modules permission or run as root.\n");
740 743
741 if (perf_guest) 744 if (perf_guest)
742 perf_session__process_machines(session, event__synthesize_guest_os); 745 perf_session__process_machines(session, event__synthesize_guest_os);
743 746
744 if (!system_wide) 747 if (!system_wide)
745 event__synthesize_thread(target_tid, process_synthesized_event, 748 event__synthesize_thread(target_tid, process_synthesized_event,
746 session); 749 session);
747 else 750 else
748 event__synthesize_threads(process_synthesized_event, session); 751 event__synthesize_threads(process_synthesized_event, session);
749 752
750 if (realtime_prio) { 753 if (realtime_prio) {
751 struct sched_param param; 754 struct sched_param param;
752 755
753 param.sched_priority = realtime_prio; 756 param.sched_priority = realtime_prio;
754 if (sched_setscheduler(0, SCHED_FIFO, &param)) { 757 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
755 pr_err("Could not set realtime priority.\n"); 758 pr_err("Could not set realtime priority.\n");
756 exit(-1); 759 exit(-1);
757 } 760 }
758 } 761 }
759 762
760 /* 763 /*
761 * Let the child rip 764 * Let the child rip
762 */ 765 */
763 if (forks) 766 if (forks)
764 close(go_pipe[1]); 767 close(go_pipe[1]);
765 768
766 for (;;) { 769 for (;;) {
767 int hits = samples; 770 int hits = samples;
768 int thread; 771 int thread;
769 772
770 mmap_read_all(); 773 mmap_read_all();
771 774
772 if (hits == samples) { 775 if (hits == samples) {
773 if (done) 776 if (done)
774 break; 777 break;
775 err = poll(event_array, nr_poll, -1); 778 err = poll(event_array, nr_poll, -1);
776 waking++; 779 waking++;
777 } 780 }
778 781
779 if (done) { 782 if (done) {
780 for (i = 0; i < nr_cpu; i++) { 783 for (i = 0; i < nr_cpu; i++) {
781 for (counter = 0; 784 for (counter = 0;
782 counter < nr_counters; 785 counter < nr_counters;
783 counter++) { 786 counter++) {
784 for (thread = 0; 787 for (thread = 0;
785 thread < thread_num; 788 thread < thread_num;
786 thread++) 789 thread++)
787 ioctl(fd[i][counter][thread], 790 ioctl(fd[i][counter][thread],
788 PERF_EVENT_IOC_DISABLE); 791 PERF_EVENT_IOC_DISABLE);
789 } 792 }
790 } 793 }
791 } 794 }
792 } 795 }
793 796
794 if (quiet || signr == SIGUSR1) 797 if (quiet || signr == SIGUSR1)
795 return 0; 798 return 0;
796 799
797 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 800 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
798 801
799 /* 802 /*
800 * Approximate RIP event size: 24 bytes. 803 * Approximate RIP event size: 24 bytes.
801 */ 804 */
802 fprintf(stderr, 805 fprintf(stderr,
803 "[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n", 806 "[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
804 (double)bytes_written / 1024.0 / 1024.0, 807 (double)bytes_written / 1024.0 / 1024.0,
805 output_name, 808 output_name,
806 bytes_written / 24); 809 bytes_written / 24);
807 810
808 return 0; 811 return 0;
809 812
810 out_delete_session: 813 out_delete_session:
811 perf_session__delete(session); 814 perf_session__delete(session);
812 return err; 815 return err;
813 } 816 }
814 817
815 static const char * const record_usage[] = { 818 static const char * const record_usage[] = {
816 "perf record [<options>] [<command>]", 819 "perf record [<options>] [<command>]",
817 "perf record [<options>] -- <command> [<options>]", 820 "perf record [<options>] -- <command> [<options>]",
818 NULL 821 NULL
819 }; 822 };
820 823
821 static bool force, append_file; 824 static bool force, append_file;
822 825
823 const struct option record_options[] = { 826 const struct option record_options[] = {
824 OPT_CALLBACK('e', "event", NULL, "event", 827 OPT_CALLBACK('e', "event", NULL, "event",
825 "event selector. use 'perf list' to list available events", 828 "event selector. use 'perf list' to list available events",
826 parse_events), 829 parse_events),
827 OPT_CALLBACK(0, "filter", NULL, "filter", 830 OPT_CALLBACK(0, "filter", NULL, "filter",
828 "event filter", parse_filter), 831 "event filter", parse_filter),
829 OPT_INTEGER('p', "pid", &target_pid, 832 OPT_INTEGER('p', "pid", &target_pid,
830 "record events on existing process id"), 833 "record events on existing process id"),
831 OPT_INTEGER('t', "tid", &target_tid, 834 OPT_INTEGER('t', "tid", &target_tid,
832 "record events on existing thread id"), 835 "record events on existing thread id"),
833 OPT_INTEGER('r', "realtime", &realtime_prio, 836 OPT_INTEGER('r', "realtime", &realtime_prio,
834 "collect data with this RT SCHED_FIFO priority"), 837 "collect data with this RT SCHED_FIFO priority"),
835 OPT_BOOLEAN('R', "raw-samples", &raw_samples, 838 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
836 "collect raw sample records from all opened counters"), 839 "collect raw sample records from all opened counters"),
837 OPT_BOOLEAN('a', "all-cpus", &system_wide, 840 OPT_BOOLEAN('a', "all-cpus", &system_wide,
838 "system-wide collection from all CPUs"), 841 "system-wide collection from all CPUs"),
839 OPT_BOOLEAN('A', "append", &append_file, 842 OPT_BOOLEAN('A', "append", &append_file,
840 "append to the output file to do incremental profiling"), 843 "append to the output file to do incremental profiling"),
841 OPT_STRING('C', "cpu", &cpu_list, "cpu", 844 OPT_STRING('C', "cpu", &cpu_list, "cpu",
842 "list of cpus to monitor"), 845 "list of cpus to monitor"),
843 OPT_BOOLEAN('f', "force", &force, 846 OPT_BOOLEAN('f', "force", &force,
844 "overwrite existing data file (deprecated)"), 847 "overwrite existing data file (deprecated)"),
845 OPT_U64('c', "count", &user_interval, "event period to sample"), 848 OPT_U64('c', "count", &user_interval, "event period to sample"),
846 OPT_STRING('o', "output", &output_name, "file", 849 OPT_STRING('o', "output", &output_name, "file",
847 "output file name"), 850 "output file name"),
848 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 851 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
849 "child tasks do not inherit counters"), 852 "child tasks do not inherit counters"),
850 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"), 853 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
851 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 854 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
852 OPT_BOOLEAN('g', "call-graph", &call_graph, 855 OPT_BOOLEAN('g', "call-graph", &call_graph,
853 "do call-graph (stack chain/backtrace) recording"), 856 "do call-graph (stack chain/backtrace) recording"),
854 OPT_INCR('v', "verbose", &verbose, 857 OPT_INCR('v', "verbose", &verbose,
855 "be more verbose (show counter open errors, etc)"), 858 "be more verbose (show counter open errors, etc)"),
856 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 859 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
857 OPT_BOOLEAN('s', "stat", &inherit_stat, 860 OPT_BOOLEAN('s', "stat", &inherit_stat,
858 "per thread counts"), 861 "per thread counts"),
859 OPT_BOOLEAN('d', "data", &sample_address, 862 OPT_BOOLEAN('d', "data", &sample_address,
860 "Sample addresses"), 863 "Sample addresses"),
861 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"), 864 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
862 OPT_BOOLEAN('n', "no-samples", &no_samples, 865 OPT_BOOLEAN('n', "no-samples", &no_samples,
863 "don't sample"), 866 "don't sample"),
864 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache, 867 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
865 "do not update the buildid cache"), 868 "do not update the buildid cache"),
866 OPT_BOOLEAN('B', "no-buildid", &no_buildid, 869 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
867 "do not collect buildids in perf.data"), 870 "do not collect buildids in perf.data"),
868 OPT_END() 871 OPT_END()
869 }; 872 };
870 873
871 int cmd_record(int argc, const char **argv, const char *prefix __used) 874 int cmd_record(int argc, const char **argv, const char *prefix __used)
872 { 875 {
873 int i, j, err = -ENOMEM; 876 int i, j, err = -ENOMEM;
874 877
875 argc = parse_options(argc, argv, record_options, record_usage, 878 argc = parse_options(argc, argv, record_options, record_usage,
876 PARSE_OPT_STOP_AT_NON_OPTION); 879 PARSE_OPT_STOP_AT_NON_OPTION);
877 if (!argc && target_pid == -1 && target_tid == -1 && 880 if (!argc && target_pid == -1 && target_tid == -1 &&
878 !system_wide && !cpu_list) 881 !system_wide && !cpu_list)
879 usage_with_options(record_usage, record_options); 882 usage_with_options(record_usage, record_options);
880 883
881 if (force && append_file) { 884 if (force && append_file) {
882 fprintf(stderr, "Can't overwrite and append at the same time." 885 fprintf(stderr, "Can't overwrite and append at the same time."
883 " You need to choose between -f and -A"); 886 " You need to choose between -f and -A");
884 usage_with_options(record_usage, record_options); 887 usage_with_options(record_usage, record_options);
885 } else if (append_file) { 888 } else if (append_file) {
886 write_mode = WRITE_APPEND; 889 write_mode = WRITE_APPEND;
887 } else { 890 } else {
888 write_mode = WRITE_FORCE; 891 write_mode = WRITE_FORCE;
889 } 892 }
890 893
891 symbol__init(); 894 symbol__init();
892 895
893 if (no_buildid_cache || no_buildid) 896 if (no_buildid_cache || no_buildid)
894 disable_buildid_cache(); 897 disable_buildid_cache();
895 898
896 if (!nr_counters) { 899 if (!nr_counters) {
897 nr_counters = 1; 900 nr_counters = 1;
898 attrs[0].type = PERF_TYPE_HARDWARE; 901 attrs[0].type = PERF_TYPE_HARDWARE;
899 attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; 902 attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
900 } 903 }
901 904
902 if (target_pid != -1) { 905 if (target_pid != -1) {
903 target_tid = target_pid; 906 target_tid = target_pid;
904 thread_num = find_all_tid(target_pid, &all_tids); 907 thread_num = find_all_tid(target_pid, &all_tids);
905 if (thread_num <= 0) { 908 if (thread_num <= 0) {
906 fprintf(stderr, "Can't find all threads of pid %d\n", 909 fprintf(stderr, "Can't find all threads of pid %d\n",
907 target_pid); 910 target_pid);
908 usage_with_options(record_usage, record_options); 911 usage_with_options(record_usage, record_options);
909 } 912 }
910 } else { 913 } else {
911 all_tids=malloc(sizeof(pid_t)); 914 all_tids=malloc(sizeof(pid_t));
912 if (!all_tids) 915 if (!all_tids)
913 goto out_symbol_exit; 916 goto out_symbol_exit;
914 917
915 all_tids[0] = target_tid; 918 all_tids[0] = target_tid;
916 thread_num = 1; 919 thread_num = 1;
917 } 920 }
918 921
919 for (i = 0; i < MAX_NR_CPUS; i++) { 922 for (i = 0; i < MAX_NR_CPUS; i++) {
920 for (j = 0; j < MAX_COUNTERS; j++) { 923 for (j = 0; j < MAX_COUNTERS; j++) {
921 fd[i][j] = malloc(sizeof(int)*thread_num); 924 fd[i][j] = malloc(sizeof(int)*thread_num);
922 if (!fd[i][j]) 925 if (!fd[i][j])
923 goto out_free_fd; 926 goto out_free_fd;
924 } 927 }
925 } 928 }
926 event_array = malloc( 929 event_array = malloc(
927 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); 930 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
928 if (!event_array) 931 if (!event_array)
929 goto out_free_fd; 932 goto out_free_fd;
930 933
931 if (user_interval != ULLONG_MAX) 934 if (user_interval != ULLONG_MAX)
932 default_interval = user_interval; 935 default_interval = user_interval;
933 if (user_freq != UINT_MAX) 936 if (user_freq != UINT_MAX)
934 freq = user_freq; 937 freq = user_freq;
935 938
936 /* 939 /*
937 * User specified count overrides default frequency. 940 * User specified count overrides default frequency.
938 */ 941 */
939 if (default_interval) 942 if (default_interval)
940 freq = 0; 943 freq = 0;
941 else if (freq) { 944 else if (freq) {
942 default_interval = freq; 945 default_interval = freq;
943 } else { 946 } else {
944 fprintf(stderr, "frequency and count are zero, aborting\n"); 947 fprintf(stderr, "frequency and count are zero, aborting\n");
945 err = -EINVAL; 948 err = -EINVAL;
946 goto out_free_event_array; 949 goto out_free_event_array;
947 } 950 }
948 951
949 err = __cmd_record(argc, argv); 952 err = __cmd_record(argc, argv);
950 953
951 out_free_event_array: 954 out_free_event_array:
952 free(event_array); 955 free(event_array);
953 out_free_fd: 956 out_free_fd:
954 for (i = 0; i < MAX_NR_CPUS; i++) { 957 for (i = 0; i < MAX_NR_CPUS; i++) {
955 for (j = 0; j < MAX_COUNTERS; j++) 958 for (j = 0; j < MAX_COUNTERS; j++)
956 free(fd[i][j]); 959 free(fd[i][j]);
957 } 960 }
958 free(all_tids); 961 free(all_tids);
959 all_tids = NULL; 962 all_tids = NULL;
960 out_symbol_exit: 963 out_symbol_exit:
961 symbol__exit(); 964 symbol__exit();
962 return err; 965 return err;
963 } 966 }
964 967
tools/perf/builtin-report.c
1 /* 1 /*
2 * builtin-report.c 2 * builtin-report.c
3 * 3 *
4 * Builtin report command: Analyze the perf.data input file, 4 * Builtin report command: Analyze the perf.data input file,
5 * look up and read DSOs and symbol information and display 5 * look up and read DSOs and symbol information and display
6 * a histogram of results, along various sorting keys. 6 * a histogram of results, along various sorting keys.
7 */ 7 */
8 #include "builtin.h" 8 #include "builtin.h"
9 9
10 #include "util/util.h" 10 #include "util/util.h"
11 11
12 #include "util/color.h" 12 #include "util/color.h"
13 #include <linux/list.h> 13 #include <linux/list.h>
14 #include "util/cache.h" 14 #include "util/cache.h"
15 #include <linux/rbtree.h> 15 #include <linux/rbtree.h>
16 #include "util/symbol.h" 16 #include "util/symbol.h"
17 #include "util/callchain.h" 17 #include "util/callchain.h"
18 #include "util/strlist.h" 18 #include "util/strlist.h"
19 #include "util/values.h" 19 #include "util/values.h"
20 20
21 #include "perf.h" 21 #include "perf.h"
22 #include "util/debug.h" 22 #include "util/debug.h"
23 #include "util/header.h" 23 #include "util/header.h"
24 #include "util/session.h" 24 #include "util/session.h"
25 25
26 #include "util/parse-options.h" 26 #include "util/parse-options.h"
27 #include "util/parse-events.h" 27 #include "util/parse-events.h"
28 28
29 #include "util/thread.h" 29 #include "util/thread.h"
30 #include "util/sort.h" 30 #include "util/sort.h"
31 #include "util/hist.h" 31 #include "util/hist.h"
32 32
33 static char const *input_name = "perf.data"; 33 static char const *input_name = "perf.data";
34 34
35 static bool force, use_tui, use_stdio; 35 static bool force, use_tui, use_stdio;
36 static bool hide_unresolved; 36 static bool hide_unresolved;
37 static bool dont_use_callchains; 37 static bool dont_use_callchains;
38 38
39 static bool show_threads; 39 static bool show_threads;
40 static struct perf_read_values show_threads_values; 40 static struct perf_read_values show_threads_values;
41 41
42 static const char default_pretty_printing_style[] = "normal"; 42 static const char default_pretty_printing_style[] = "normal";
43 static const char *pretty_printing_style = default_pretty_printing_style; 43 static const char *pretty_printing_style = default_pretty_printing_style;
44 44
45 static char callchain_default_opt[] = "fractal,0.5"; 45 static char callchain_default_opt[] = "fractal,0.5";
46 46
47 static struct hists *perf_session__hists_findnew(struct perf_session *self, 47 static struct hists *perf_session__hists_findnew(struct perf_session *self,
48 u64 event_stream, u32 type, 48 u64 event_stream, u32 type,
49 u64 config) 49 u64 config)
50 { 50 {
51 struct rb_node **p = &self->hists_tree.rb_node; 51 struct rb_node **p = &self->hists_tree.rb_node;
52 struct rb_node *parent = NULL; 52 struct rb_node *parent = NULL;
53 struct hists *iter, *new; 53 struct hists *iter, *new;
54 54
55 while (*p != NULL) { 55 while (*p != NULL) {
56 parent = *p; 56 parent = *p;
57 iter = rb_entry(parent, struct hists, rb_node); 57 iter = rb_entry(parent, struct hists, rb_node);
58 if (iter->config == config) 58 if (iter->config == config)
59 return iter; 59 return iter;
60 60
61 61
62 if (config > iter->config) 62 if (config > iter->config)
63 p = &(*p)->rb_right; 63 p = &(*p)->rb_right;
64 else 64 else
65 p = &(*p)->rb_left; 65 p = &(*p)->rb_left;
66 } 66 }
67 67
68 new = malloc(sizeof(struct hists)); 68 new = malloc(sizeof(struct hists));
69 if (new == NULL) 69 if (new == NULL)
70 return NULL; 70 return NULL;
71 memset(new, 0, sizeof(struct hists)); 71 memset(new, 0, sizeof(struct hists));
72 new->event_stream = event_stream; 72 new->event_stream = event_stream;
73 new->config = config; 73 new->config = config;
74 new->type = type; 74 new->type = type;
75 rb_link_node(&new->rb_node, parent, p); 75 rb_link_node(&new->rb_node, parent, p);
76 rb_insert_color(&new->rb_node, &self->hists_tree); 76 rb_insert_color(&new->rb_node, &self->hists_tree);
77 return new; 77 return new;
78 } 78 }
79 79
80 static int perf_session__add_hist_entry(struct perf_session *self, 80 static int perf_session__add_hist_entry(struct perf_session *self,
81 struct addr_location *al, 81 struct addr_location *al,
82 struct sample_data *data) 82 struct sample_data *data)
83 { 83 {
84 struct map_symbol *syms = NULL; 84 struct map_symbol *syms = NULL;
85 struct symbol *parent = NULL; 85 struct symbol *parent = NULL;
86 int err = -ENOMEM; 86 int err = -ENOMEM;
87 struct hist_entry *he; 87 struct hist_entry *he;
88 struct hists *hists; 88 struct hists *hists;
89 struct perf_event_attr *attr; 89 struct perf_event_attr *attr;
90 90
91 if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) { 91 if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) {
92 syms = perf_session__resolve_callchain(self, al->thread, 92 syms = perf_session__resolve_callchain(self, al->thread,
93 data->callchain, &parent); 93 data->callchain, &parent);
94 if (syms == NULL) 94 if (syms == NULL)
95 return -ENOMEM; 95 return -ENOMEM;
96 } 96 }
97 97
98 attr = perf_header__find_attr(data->id, &self->header); 98 attr = perf_header__find_attr(data->id, &self->header);
99 if (attr) 99 if (attr)
100 hists = perf_session__hists_findnew(self, data->id, attr->type, attr->config); 100 hists = perf_session__hists_findnew(self, data->id, attr->type, attr->config);
101 else 101 else
102 hists = perf_session__hists_findnew(self, data->id, 0, 0); 102 hists = perf_session__hists_findnew(self, data->id, 0, 0);
103 if (hists == NULL) 103 if (hists == NULL)
104 goto out_free_syms; 104 goto out_free_syms;
105 he = __hists__add_entry(hists, al, parent, data->period); 105 he = __hists__add_entry(hists, al, parent, data->period);
106 if (he == NULL) 106 if (he == NULL)
107 goto out_free_syms; 107 goto out_free_syms;
108 err = 0; 108 err = 0;
109 if (symbol_conf.use_callchain) { 109 if (symbol_conf.use_callchain) {
110 err = callchain_append(he->callchain, data->callchain, syms, 110 err = callchain_append(he->callchain, data->callchain, syms,
111 data->period); 111 data->period);
112 if (err) 112 if (err)
113 goto out_free_syms; 113 goto out_free_syms;
114 } 114 }
115 /* 115 /*
116 * Only in the newt browser we are doing integrated annotation, 116 * Only in the newt browser we are doing integrated annotation,
117 * so we don't allocated the extra space needed because the stdio 117 * so we don't allocated the extra space needed because the stdio
118 * code will not use it. 118 * code will not use it.
119 */ 119 */
120 if (use_browser > 0) 120 if (use_browser > 0)
121 err = hist_entry__inc_addr_samples(he, al->addr); 121 err = hist_entry__inc_addr_samples(he, al->addr);
122 out_free_syms: 122 out_free_syms:
123 free(syms); 123 free(syms);
124 return err; 124 return err;
125 } 125 }
126 126
127 static int add_event_total(struct perf_session *session, 127 static int add_event_total(struct perf_session *session,
128 struct sample_data *data, 128 struct sample_data *data,
129 struct perf_event_attr *attr) 129 struct perf_event_attr *attr)
130 { 130 {
131 struct hists *hists; 131 struct hists *hists;
132 132
133 if (attr) 133 if (attr)
134 hists = perf_session__hists_findnew(session, data->id, 134 hists = perf_session__hists_findnew(session, data->id,
135 attr->type, attr->config); 135 attr->type, attr->config);
136 else 136 else
137 hists = perf_session__hists_findnew(session, data->id, 0, 0); 137 hists = perf_session__hists_findnew(session, data->id, 0, 0);
138 138
139 if (!hists) 139 if (!hists)
140 return -ENOMEM; 140 return -ENOMEM;
141 141
142 hists->stats.total_period += data->period; 142 hists->stats.total_period += data->period;
143 /* 143 /*
144 * FIXME: add_event_total should be moved from here to 144 * FIXME: add_event_total should be moved from here to
145 * perf_session__process_event so that the proper hist is passed to 145 * perf_session__process_event so that the proper hist is passed to
146 * the event_op methods. 146 * the event_op methods.
147 */ 147 */
148 hists__inc_nr_events(hists, PERF_RECORD_SAMPLE); 148 hists__inc_nr_events(hists, PERF_RECORD_SAMPLE);
149 session->hists.stats.total_period += data->period; 149 session->hists.stats.total_period += data->period;
150 return 0; 150 return 0;
151 } 151 }
152 152
153 static int process_sample_event(event_t *event, struct sample_data *sample, 153 static int process_sample_event(event_t *event, struct sample_data *sample,
154 struct perf_session *session) 154 struct perf_session *session)
155 { 155 {
156 struct addr_location al; 156 struct addr_location al;
157 struct perf_event_attr *attr; 157 struct perf_event_attr *attr;
158 158
159 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 159 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
160 fprintf(stderr, "problem processing %d event, skipping it.\n", 160 fprintf(stderr, "problem processing %d event, skipping it.\n",
161 event->header.type); 161 event->header.type);
162 return -1; 162 return -1;
163 } 163 }
164 164
165 if (al.filtered || (hide_unresolved && al.sym == NULL)) 165 if (al.filtered || (hide_unresolved && al.sym == NULL))
166 return 0; 166 return 0;
167 167
168 if (perf_session__add_hist_entry(session, &al, sample)) { 168 if (perf_session__add_hist_entry(session, &al, sample)) {
169 pr_debug("problem incrementing symbol period, skipping event\n"); 169 pr_debug("problem incrementing symbol period, skipping event\n");
170 return -1; 170 return -1;
171 } 171 }
172 172
173 attr = perf_header__find_attr(sample->id, &session->header); 173 attr = perf_header__find_attr(sample->id, &session->header);
174 174
175 if (add_event_total(session, sample, attr)) { 175 if (add_event_total(session, sample, attr)) {
176 pr_debug("problem adding event period\n"); 176 pr_debug("problem adding event period\n");
177 return -1; 177 return -1;
178 } 178 }
179 179
180 return 0; 180 return 0;
181 } 181 }
182 182
183 static int process_read_event(event_t *event, struct sample_data *sample __used, 183 static int process_read_event(event_t *event, struct sample_data *sample __used,
184 struct perf_session *session __used) 184 struct perf_session *session __used)
185 { 185 {
186 struct perf_event_attr *attr; 186 struct perf_event_attr *attr;
187 187
188 attr = perf_header__find_attr(event->read.id, &session->header); 188 attr = perf_header__find_attr(event->read.id, &session->header);
189 189
190 if (show_threads) { 190 if (show_threads) {
191 const char *name = attr ? __event_name(attr->type, attr->config) 191 const char *name = attr ? __event_name(attr->type, attr->config)
192 : "unknown"; 192 : "unknown";
193 perf_read_values_add_value(&show_threads_values, 193 perf_read_values_add_value(&show_threads_values,
194 event->read.pid, event->read.tid, 194 event->read.pid, event->read.tid,
195 event->read.id, 195 event->read.id,
196 name, 196 name,
197 event->read.value); 197 event->read.value);
198 } 198 }
199 199
200 dump_printf(": %d %d %s %Lu\n", event->read.pid, event->read.tid, 200 dump_printf(": %d %d %s %Lu\n", event->read.pid, event->read.tid,
201 attr ? __event_name(attr->type, attr->config) : "FAIL", 201 attr ? __event_name(attr->type, attr->config) : "FAIL",
202 event->read.value); 202 event->read.value);
203 203
204 return 0; 204 return 0;
205 } 205 }
206 206
207 static int perf_session__setup_sample_type(struct perf_session *self) 207 static int perf_session__setup_sample_type(struct perf_session *self)
208 { 208 {
209 if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) { 209 if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
210 if (sort__has_parent) { 210 if (sort__has_parent) {
211 fprintf(stderr, "selected --sort parent, but no" 211 fprintf(stderr, "selected --sort parent, but no"
212 " callchain data. Did you call" 212 " callchain data. Did you call"
213 " perf record without -g?\n"); 213 " perf record without -g?\n");
214 return -EINVAL; 214 return -EINVAL;
215 } 215 }
216 if (symbol_conf.use_callchain) { 216 if (symbol_conf.use_callchain) {
217 fprintf(stderr, "selected -g but no callchain data." 217 fprintf(stderr, "selected -g but no callchain data."
218 " Did you call perf record without" 218 " Did you call perf record without"
219 " -g?\n"); 219 " -g?\n");
220 return -1; 220 return -1;
221 } 221 }
222 } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE && 222 } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
223 !symbol_conf.use_callchain) { 223 !symbol_conf.use_callchain) {
224 symbol_conf.use_callchain = true; 224 symbol_conf.use_callchain = true;
225 if (register_callchain_param(&callchain_param) < 0) { 225 if (register_callchain_param(&callchain_param) < 0) {
226 fprintf(stderr, "Can't register callchain" 226 fprintf(stderr, "Can't register callchain"
227 " params\n"); 227 " params\n");
228 return -EINVAL; 228 return -EINVAL;
229 } 229 }
230 } 230 }
231 231
232 return 0; 232 return 0;
233 } 233 }
234 234
235 static struct perf_event_ops event_ops = { 235 static struct perf_event_ops event_ops = {
236 .sample = process_sample_event, 236 .sample = process_sample_event,
237 .mmap = event__process_mmap, 237 .mmap = event__process_mmap,
238 .comm = event__process_comm, 238 .comm = event__process_comm,
239 .exit = event__process_task, 239 .exit = event__process_task,
240 .fork = event__process_task, 240 .fork = event__process_task,
241 .lost = event__process_lost, 241 .lost = event__process_lost,
242 .read = process_read_event, 242 .read = process_read_event,
243 .attr = event__process_attr, 243 .attr = event__process_attr,
244 .event_type = event__process_event_type, 244 .event_type = event__process_event_type,
245 .tracing_data = event__process_tracing_data, 245 .tracing_data = event__process_tracing_data,
246 .build_id = event__process_build_id, 246 .build_id = event__process_build_id,
247 .ordered_samples = true,
248 .ordering_requires_timestamps = true,
247 }; 249 };
248 250
249 extern volatile int session_done; 251 extern volatile int session_done;
250 252
251 static void sig_handler(int sig __used) 253 static void sig_handler(int sig __used)
252 { 254 {
253 session_done = 1; 255 session_done = 1;
254 } 256 }
255 257
256 static size_t hists__fprintf_nr_sample_events(struct hists *self, 258 static size_t hists__fprintf_nr_sample_events(struct hists *self,
257 const char *evname, FILE *fp) 259 const char *evname, FILE *fp)
258 { 260 {
259 size_t ret; 261 size_t ret;
260 char unit; 262 char unit;
261 unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE]; 263 unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE];
262 264
263 nr_events = convert_unit(nr_events, &unit); 265 nr_events = convert_unit(nr_events, &unit);
264 ret = fprintf(fp, "# Events: %lu%c", nr_events, unit); 266 ret = fprintf(fp, "# Events: %lu%c", nr_events, unit);
265 if (evname != NULL) 267 if (evname != NULL)
266 ret += fprintf(fp, " %s", evname); 268 ret += fprintf(fp, " %s", evname);
267 return ret + fprintf(fp, "\n#\n"); 269 return ret + fprintf(fp, "\n#\n");
268 } 270 }
269 271
270 static int hists__tty_browse_tree(struct rb_root *tree, const char *help) 272 static int hists__tty_browse_tree(struct rb_root *tree, const char *help)
271 { 273 {
272 struct rb_node *next = rb_first(tree); 274 struct rb_node *next = rb_first(tree);
273 275
274 while (next) { 276 while (next) {
275 struct hists *hists = rb_entry(next, struct hists, rb_node); 277 struct hists *hists = rb_entry(next, struct hists, rb_node);
276 const char *evname = NULL; 278 const char *evname = NULL;
277 279
278 if (rb_first(&hists->entries) != rb_last(&hists->entries)) 280 if (rb_first(&hists->entries) != rb_last(&hists->entries))
279 evname = __event_name(hists->type, hists->config); 281 evname = __event_name(hists->type, hists->config);
280 282
281 hists__fprintf_nr_sample_events(hists, evname, stdout); 283 hists__fprintf_nr_sample_events(hists, evname, stdout);
282 hists__fprintf(hists, NULL, false, stdout); 284 hists__fprintf(hists, NULL, false, stdout);
283 fprintf(stdout, "\n\n"); 285 fprintf(stdout, "\n\n");
284 next = rb_next(&hists->rb_node); 286 next = rb_next(&hists->rb_node);
285 } 287 }
286 288
287 if (sort_order == default_sort_order && 289 if (sort_order == default_sort_order &&
288 parent_pattern == default_parent_pattern) { 290 parent_pattern == default_parent_pattern) {
289 fprintf(stdout, "#\n# (%s)\n#\n", help); 291 fprintf(stdout, "#\n# (%s)\n#\n", help);
290 292
291 if (show_threads) { 293 if (show_threads) {
292 bool style = !strcmp(pretty_printing_style, "raw"); 294 bool style = !strcmp(pretty_printing_style, "raw");
293 perf_read_values_display(stdout, &show_threads_values, 295 perf_read_values_display(stdout, &show_threads_values,
294 style); 296 style);
295 perf_read_values_destroy(&show_threads_values); 297 perf_read_values_destroy(&show_threads_values);
296 } 298 }
297 } 299 }
298 300
299 return 0; 301 return 0;
300 } 302 }
301 303
302 static int __cmd_report(void) 304 static int __cmd_report(void)
303 { 305 {
304 int ret = -EINVAL; 306 int ret = -EINVAL;
305 struct perf_session *session; 307 struct perf_session *session;
306 struct rb_node *next; 308 struct rb_node *next;
307 const char *help = "For a higher level overview, try: perf report --sort comm,dso"; 309 const char *help = "For a higher level overview, try: perf report --sort comm,dso";
308 310
309 signal(SIGINT, sig_handler); 311 signal(SIGINT, sig_handler);
310 312
311 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); 313 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
312 if (session == NULL) 314 if (session == NULL)
313 return -ENOMEM; 315 return -ENOMEM;
314 316
315 if (show_threads) 317 if (show_threads)
316 perf_read_values_init(&show_threads_values); 318 perf_read_values_init(&show_threads_values);
317 319
318 ret = perf_session__setup_sample_type(session); 320 ret = perf_session__setup_sample_type(session);
319 if (ret) 321 if (ret)
320 goto out_delete; 322 goto out_delete;
321 323
322 ret = perf_session__process_events(session, &event_ops); 324 ret = perf_session__process_events(session, &event_ops);
323 if (ret) 325 if (ret)
324 goto out_delete; 326 goto out_delete;
325 327
326 if (dump_trace) { 328 if (dump_trace) {
327 perf_session__fprintf_nr_events(session, stdout); 329 perf_session__fprintf_nr_events(session, stdout);
328 goto out_delete; 330 goto out_delete;
329 } 331 }
330 332
331 if (verbose > 3) 333 if (verbose > 3)
332 perf_session__fprintf(session, stdout); 334 perf_session__fprintf(session, stdout);
333 335
334 if (verbose > 2) 336 if (verbose > 2)
335 perf_session__fprintf_dsos(session, stdout); 337 perf_session__fprintf_dsos(session, stdout);
336 338
337 next = rb_first(&session->hists_tree); 339 next = rb_first(&session->hists_tree);
338 while (next) { 340 while (next) {
339 struct hists *hists; 341 struct hists *hists;
340 342
341 hists = rb_entry(next, struct hists, rb_node); 343 hists = rb_entry(next, struct hists, rb_node);
342 hists__collapse_resort(hists); 344 hists__collapse_resort(hists);
343 hists__output_resort(hists); 345 hists__output_resort(hists);
344 next = rb_next(&hists->rb_node); 346 next = rb_next(&hists->rb_node);
345 } 347 }
346 348
347 if (use_browser > 0) 349 if (use_browser > 0)
348 hists__tui_browse_tree(&session->hists_tree, help); 350 hists__tui_browse_tree(&session->hists_tree, help);
349 else 351 else
350 hists__tty_browse_tree(&session->hists_tree, help); 352 hists__tty_browse_tree(&session->hists_tree, help);
351 353
352 out_delete: 354 out_delete:
353 /* 355 /*
354 * Speed up the exit process, for large files this can 356 * Speed up the exit process, for large files this can
355 * take quite a while. 357 * take quite a while.
356 * 358 *
357 * XXX Enable this when using valgrind or if we ever 359 * XXX Enable this when using valgrind or if we ever
358 * librarize this command. 360 * librarize this command.
359 * 361 *
360 * Also experiment with obstacks to see how much speed 362 * Also experiment with obstacks to see how much speed
361 * up we'll get here. 363 * up we'll get here.
362 * 364 *
363 * perf_session__delete(session); 365 * perf_session__delete(session);
364 */ 366 */
365 return ret; 367 return ret;
366 } 368 }
367 369
368 static int 370 static int
369 parse_callchain_opt(const struct option *opt __used, const char *arg, 371 parse_callchain_opt(const struct option *opt __used, const char *arg,
370 int unset) 372 int unset)
371 { 373 {
372 char *tok, *tok2; 374 char *tok, *tok2;
373 char *endptr; 375 char *endptr;
374 376
375 /* 377 /*
376 * --no-call-graph 378 * --no-call-graph
377 */ 379 */
378 if (unset) { 380 if (unset) {
379 dont_use_callchains = true; 381 dont_use_callchains = true;
380 return 0; 382 return 0;
381 } 383 }
382 384
383 symbol_conf.use_callchain = true; 385 symbol_conf.use_callchain = true;
384 386
385 if (!arg) 387 if (!arg)
386 return 0; 388 return 0;
387 389
388 tok = strtok((char *)arg, ","); 390 tok = strtok((char *)arg, ",");
389 if (!tok) 391 if (!tok)
390 return -1; 392 return -1;
391 393
392 /* get the output mode */ 394 /* get the output mode */
393 if (!strncmp(tok, "graph", strlen(arg))) 395 if (!strncmp(tok, "graph", strlen(arg)))
394 callchain_param.mode = CHAIN_GRAPH_ABS; 396 callchain_param.mode = CHAIN_GRAPH_ABS;
395 397
396 else if (!strncmp(tok, "flat", strlen(arg))) 398 else if (!strncmp(tok, "flat", strlen(arg)))
397 callchain_param.mode = CHAIN_FLAT; 399 callchain_param.mode = CHAIN_FLAT;
398 400
399 else if (!strncmp(tok, "fractal", strlen(arg))) 401 else if (!strncmp(tok, "fractal", strlen(arg)))
400 callchain_param.mode = CHAIN_GRAPH_REL; 402 callchain_param.mode = CHAIN_GRAPH_REL;
401 403
402 else if (!strncmp(tok, "none", strlen(arg))) { 404 else if (!strncmp(tok, "none", strlen(arg))) {
403 callchain_param.mode = CHAIN_NONE; 405 callchain_param.mode = CHAIN_NONE;
404 symbol_conf.use_callchain = false; 406 symbol_conf.use_callchain = false;
405 407
406 return 0; 408 return 0;
407 } 409 }
408 410
409 else 411 else
410 return -1; 412 return -1;
411 413
412 /* get the min percentage */ 414 /* get the min percentage */
413 tok = strtok(NULL, ","); 415 tok = strtok(NULL, ",");
414 if (!tok) 416 if (!tok)
415 goto setup; 417 goto setup;
416 418
417 tok2 = strtok(NULL, ","); 419 tok2 = strtok(NULL, ",");
418 callchain_param.min_percent = strtod(tok, &endptr); 420 callchain_param.min_percent = strtod(tok, &endptr);
419 if (tok == endptr) 421 if (tok == endptr)
420 return -1; 422 return -1;
421 423
422 if (tok2) 424 if (tok2)
423 callchain_param.print_limit = strtod(tok2, &endptr); 425 callchain_param.print_limit = strtod(tok2, &endptr);
424 setup: 426 setup:
425 if (register_callchain_param(&callchain_param) < 0) { 427 if (register_callchain_param(&callchain_param) < 0) {
426 fprintf(stderr, "Can't register callchain params\n"); 428 fprintf(stderr, "Can't register callchain params\n");
427 return -1; 429 return -1;
428 } 430 }
429 return 0; 431 return 0;
430 } 432 }
431 433
432 static const char * const report_usage[] = { 434 static const char * const report_usage[] = {
433 "perf report [<options>] <command>", 435 "perf report [<options>] <command>",
434 NULL 436 NULL
435 }; 437 };
436 438
437 static const struct option options[] = { 439 static const struct option options[] = {
438 OPT_STRING('i', "input", &input_name, "file", 440 OPT_STRING('i', "input", &input_name, "file",
439 "input file name"), 441 "input file name"),
440 OPT_INCR('v', "verbose", &verbose, 442 OPT_INCR('v', "verbose", &verbose,
441 "be more verbose (show symbol address, etc)"), 443 "be more verbose (show symbol address, etc)"),
442 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 444 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
443 "dump raw trace in ASCII"), 445 "dump raw trace in ASCII"),
444 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 446 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
445 "file", "vmlinux pathname"), 447 "file", "vmlinux pathname"),
446 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 448 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
447 "file", "kallsyms pathname"), 449 "file", "kallsyms pathname"),
448 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 450 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
449 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 451 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
450 "load module symbols - WARNING: use only with -k and LIVE kernel"), 452 "load module symbols - WARNING: use only with -k and LIVE kernel"),
451 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, 453 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
452 "Show a column with the number of samples"), 454 "Show a column with the number of samples"),
453 OPT_BOOLEAN('T', "threads", &show_threads, 455 OPT_BOOLEAN('T', "threads", &show_threads,
454 "Show per-thread event counters"), 456 "Show per-thread event counters"),
455 OPT_STRING(0, "pretty", &pretty_printing_style, "key", 457 OPT_STRING(0, "pretty", &pretty_printing_style, "key",
456 "pretty printing style key: normal raw"), 458 "pretty printing style key: normal raw"),
457 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), 459 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
458 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), 460 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
459 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 461 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
460 "sort by key(s): pid, comm, dso, symbol, parent"), 462 "sort by key(s): pid, comm, dso, symbol, parent"),
461 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 463 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
462 "Show sample percentage for different cpu modes"), 464 "Show sample percentage for different cpu modes"),
463 OPT_STRING('p', "parent", &parent_pattern, "regex", 465 OPT_STRING('p', "parent", &parent_pattern, "regex",
464 "regex filter to identify parent, see: '--sort parent'"), 466 "regex filter to identify parent, see: '--sort parent'"),
465 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, 467 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
466 "Only display entries with parent-match"), 468 "Only display entries with parent-match"),
467 OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent", 469 OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent",
468 "Display callchains using output_type (graph, flat, fractal, or none) and min percent threshold. " 470 "Display callchains using output_type (graph, flat, fractal, or none) and min percent threshold. "
469 "Default: fractal,0.5", &parse_callchain_opt, callchain_default_opt), 471 "Default: fractal,0.5", &parse_callchain_opt, callchain_default_opt),
470 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 472 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
471 "only consider symbols in these dsos"), 473 "only consider symbols in these dsos"),
472 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", 474 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
473 "only consider symbols in these comms"), 475 "only consider symbols in these comms"),
474 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 476 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
475 "only consider these symbols"), 477 "only consider these symbols"),
476 OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str, 478 OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
477 "width[,width...]", 479 "width[,width...]",
478 "don't try to adjust column width, use these fixed values"), 480 "don't try to adjust column width, use these fixed values"),
479 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", 481 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
480 "separator for columns, no spaces will be added between " 482 "separator for columns, no spaces will be added between "
481 "columns '.' is reserved."), 483 "columns '.' is reserved."),
482 OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved, 484 OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
483 "Only display entries resolved to a symbol"), 485 "Only display entries resolved to a symbol"),
484 OPT_END() 486 OPT_END()
485 }; 487 };
486 488
487 int cmd_report(int argc, const char **argv, const char *prefix __used) 489 int cmd_report(int argc, const char **argv, const char *prefix __used)
488 { 490 {
489 argc = parse_options(argc, argv, options, report_usage, 0); 491 argc = parse_options(argc, argv, options, report_usage, 0);
490 492
491 if (use_stdio) 493 if (use_stdio)
492 use_browser = 0; 494 use_browser = 0;
493 else if (use_tui) 495 else if (use_tui)
494 use_browser = 1; 496 use_browser = 1;
495 497
496 if (strcmp(input_name, "-") != 0) 498 if (strcmp(input_name, "-") != 0)
497 setup_browser(); 499 setup_browser();
498 else 500 else
499 use_browser = 0; 501 use_browser = 0;
500 /* 502 /*
501 * Only in the newt browser we are doing integrated annotation, 503 * Only in the newt browser we are doing integrated annotation,
502 * so don't allocate extra space that won't be used in the stdio 504 * so don't allocate extra space that won't be used in the stdio
503 * implementation. 505 * implementation.
504 */ 506 */
505 if (use_browser > 0) { 507 if (use_browser > 0) {
506 symbol_conf.priv_size = sizeof(struct sym_priv); 508 symbol_conf.priv_size = sizeof(struct sym_priv);
507 /* 509 /*
508 * For searching by name on the "Browse map details". 510 * For searching by name on the "Browse map details".
509 * providing it only in verbose mode not to bloat too 511 * providing it only in verbose mode not to bloat too
510 * much struct symbol. 512 * much struct symbol.
511 */ 513 */
512 if (verbose) { 514 if (verbose) {
513 /* 515 /*
514 * XXX: Need to provide a less kludgy way to ask for 516 * XXX: Need to provide a less kludgy way to ask for
515 * more space per symbol, the u32 is for the index on 517 * more space per symbol, the u32 is for the index on
516 * the ui browser. 518 * the ui browser.
517 * See symbol__browser_index. 519 * See symbol__browser_index.
518 */ 520 */
519 symbol_conf.priv_size += sizeof(u32); 521 symbol_conf.priv_size += sizeof(u32);
520 symbol_conf.sort_by_name = true; 522 symbol_conf.sort_by_name = true;
521 } 523 }
522 } 524 }
523 525
524 if (symbol__init() < 0) 526 if (symbol__init() < 0)
525 return -1; 527 return -1;
526 528
527 setup_sorting(report_usage, options); 529 setup_sorting(report_usage, options);
528 530
529 if (parent_pattern != default_parent_pattern) { 531 if (parent_pattern != default_parent_pattern) {
530 if (sort_dimension__add("parent") < 0) 532 if (sort_dimension__add("parent") < 0)
531 return -1; 533 return -1;
532 sort_parent.elide = 1; 534 sort_parent.elide = 1;
533 } else 535 } else
534 symbol_conf.exclude_other = false; 536 symbol_conf.exclude_other = false;
535 537
536 /* 538 /*
537 * Any (unrecognized) arguments left? 539 * Any (unrecognized) arguments left?
538 */ 540 */
539 if (argc) 541 if (argc)
540 usage_with_options(report_usage, options); 542 usage_with_options(report_usage, options);
541 543
542 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); 544 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
543 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); 545 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
544 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); 546 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
545 547
546 return __cmd_report(); 548 return __cmd_report();
547 } 549 }
548 550