Commit 3e76ac78b08479e84a3eca3fb1b3066fb8230461

Authored by Andrew Vagin
Committed by Arnaldo Carvalho de Melo
1 parent 124ba94033

perf record: Add ability to record event period

The problem is that when SAMPLE_PERIOD is not set, the kernel generates
a number of samples in proportion to an event's period. Number of these
samples may be too big and the kernel throttles all samples above a
defined limit.

E.g.: I want to trace when a process sleeps. I created a process which
sleeps for 1ms and for 4ms.  perf got 100 events in both cases.

swapper 0 [000] 1141.371830: sched_stat_sleep: comm=foo pid=1801 delay=1386750 [ns]
swapper 0 [000] 1141.369444: sched_stat_sleep: comm=foo pid=1801 delay=4499585 [ns]

In the first case a kernel want to send 4499585 events and in the second
case it wants to send 1386750 events.  perf-reports shows that process
sleeps in both places equal time.

Instead of this we can get only one sample with an attribute period. As
result we have less data transferring between kernel and user-space and
we avoid throttling of samples.

The patch "events: Don't divide events if it has field period" added a
kernel part of this functionality.

Acked-by: Arun Sharma <asharma@fb.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: devel@openvz.org
Link: http://lkml.kernel.org/r/1324391565-1369947-1-git-send-email-avagin@openvz.org
Signed-off-by: Andrew Vagin <avagin@openvz.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Showing 3 changed files with 5 additions and 0 deletions Inline Diff

tools/perf/builtin-record.c
1 /* 1 /*
2 * builtin-record.c 2 * builtin-record.c
3 * 3 *
4 * Builtin record command: Record the profile of a workload 4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for 5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report. 6 * later analysis via perf report.
7 */ 7 */
8 #define _FILE_OFFSET_BITS 64 8 #define _FILE_OFFSET_BITS 64
9 9
10 #include "builtin.h" 10 #include "builtin.h"
11 11
12 #include "perf.h" 12 #include "perf.h"
13 13
14 #include "util/build-id.h" 14 #include "util/build-id.h"
15 #include "util/util.h" 15 #include "util/util.h"
16 #include "util/parse-options.h" 16 #include "util/parse-options.h"
17 #include "util/parse-events.h" 17 #include "util/parse-events.h"
18 18
19 #include "util/header.h" 19 #include "util/header.h"
20 #include "util/event.h" 20 #include "util/event.h"
21 #include "util/evlist.h" 21 #include "util/evlist.h"
22 #include "util/evsel.h" 22 #include "util/evsel.h"
23 #include "util/debug.h" 23 #include "util/debug.h"
24 #include "util/session.h" 24 #include "util/session.h"
25 #include "util/tool.h" 25 #include "util/tool.h"
26 #include "util/symbol.h" 26 #include "util/symbol.h"
27 #include "util/cpumap.h" 27 #include "util/cpumap.h"
28 #include "util/thread_map.h" 28 #include "util/thread_map.h"
29 29
30 #include <unistd.h> 30 #include <unistd.h>
31 #include <sched.h> 31 #include <sched.h>
32 #include <sys/mman.h> 32 #include <sys/mman.h>
33 33
34 enum write_mode_t { 34 enum write_mode_t {
35 WRITE_FORCE, 35 WRITE_FORCE,
36 WRITE_APPEND 36 WRITE_APPEND
37 }; 37 };
38 38
39 struct perf_record { 39 struct perf_record {
40 struct perf_tool tool; 40 struct perf_tool tool;
41 struct perf_record_opts opts; 41 struct perf_record_opts opts;
42 u64 bytes_written; 42 u64 bytes_written;
43 const char *output_name; 43 const char *output_name;
44 struct perf_evlist *evlist; 44 struct perf_evlist *evlist;
45 struct perf_session *session; 45 struct perf_session *session;
46 const char *progname; 46 const char *progname;
47 int output; 47 int output;
48 unsigned int page_size; 48 unsigned int page_size;
49 int realtime_prio; 49 int realtime_prio;
50 enum write_mode_t write_mode; 50 enum write_mode_t write_mode;
51 bool no_buildid; 51 bool no_buildid;
52 bool no_buildid_cache; 52 bool no_buildid_cache;
53 bool force; 53 bool force;
54 bool file_new; 54 bool file_new;
55 bool append_file; 55 bool append_file;
56 long samples; 56 long samples;
57 off_t post_processing_offset; 57 off_t post_processing_offset;
58 }; 58 };
59 59
60 static void advance_output(struct perf_record *rec, size_t size) 60 static void advance_output(struct perf_record *rec, size_t size)
61 { 61 {
62 rec->bytes_written += size; 62 rec->bytes_written += size;
63 } 63 }
64 64
65 static void write_output(struct perf_record *rec, void *buf, size_t size) 65 static void write_output(struct perf_record *rec, void *buf, size_t size)
66 { 66 {
67 while (size) { 67 while (size) {
68 int ret = write(rec->output, buf, size); 68 int ret = write(rec->output, buf, size);
69 69
70 if (ret < 0) 70 if (ret < 0)
71 die("failed to write"); 71 die("failed to write");
72 72
73 size -= ret; 73 size -= ret;
74 buf += ret; 74 buf += ret;
75 75
76 rec->bytes_written += ret; 76 rec->bytes_written += ret;
77 } 77 }
78 } 78 }
79 79
80 static int process_synthesized_event(struct perf_tool *tool, 80 static int process_synthesized_event(struct perf_tool *tool,
81 union perf_event *event, 81 union perf_event *event,
82 struct perf_sample *sample __used, 82 struct perf_sample *sample __used,
83 struct machine *machine __used) 83 struct machine *machine __used)
84 { 84 {
85 struct perf_record *rec = container_of(tool, struct perf_record, tool); 85 struct perf_record *rec = container_of(tool, struct perf_record, tool);
86 write_output(rec, event, event->header.size); 86 write_output(rec, event, event->header.size);
87 return 0; 87 return 0;
88 } 88 }
89 89
90 static void perf_record__mmap_read(struct perf_record *rec, 90 static void perf_record__mmap_read(struct perf_record *rec,
91 struct perf_mmap *md) 91 struct perf_mmap *md)
92 { 92 {
93 unsigned int head = perf_mmap__read_head(md); 93 unsigned int head = perf_mmap__read_head(md);
94 unsigned int old = md->prev; 94 unsigned int old = md->prev;
95 unsigned char *data = md->base + rec->page_size; 95 unsigned char *data = md->base + rec->page_size;
96 unsigned long size; 96 unsigned long size;
97 void *buf; 97 void *buf;
98 98
99 if (old == head) 99 if (old == head)
100 return; 100 return;
101 101
102 rec->samples++; 102 rec->samples++;
103 103
104 size = head - old; 104 size = head - old;
105 105
106 if ((old & md->mask) + size != (head & md->mask)) { 106 if ((old & md->mask) + size != (head & md->mask)) {
107 buf = &data[old & md->mask]; 107 buf = &data[old & md->mask];
108 size = md->mask + 1 - (old & md->mask); 108 size = md->mask + 1 - (old & md->mask);
109 old += size; 109 old += size;
110 110
111 write_output(rec, buf, size); 111 write_output(rec, buf, size);
112 } 112 }
113 113
114 buf = &data[old & md->mask]; 114 buf = &data[old & md->mask];
115 size = head - old; 115 size = head - old;
116 old += size; 116 old += size;
117 117
118 write_output(rec, buf, size); 118 write_output(rec, buf, size);
119 119
120 md->prev = old; 120 md->prev = old;
121 perf_mmap__write_tail(md, old); 121 perf_mmap__write_tail(md, old);
122 } 122 }
123 123
124 static volatile int done = 0; 124 static volatile int done = 0;
125 static volatile int signr = -1; 125 static volatile int signr = -1;
126 static volatile int child_finished = 0; 126 static volatile int child_finished = 0;
127 127
128 static void sig_handler(int sig) 128 static void sig_handler(int sig)
129 { 129 {
130 if (sig == SIGCHLD) 130 if (sig == SIGCHLD)
131 child_finished = 1; 131 child_finished = 1;
132 132
133 done = 1; 133 done = 1;
134 signr = sig; 134 signr = sig;
135 } 135 }
136 136
137 static void perf_record__sig_exit(int exit_status __used, void *arg) 137 static void perf_record__sig_exit(int exit_status __used, void *arg)
138 { 138 {
139 struct perf_record *rec = arg; 139 struct perf_record *rec = arg;
140 int status; 140 int status;
141 141
142 if (rec->evlist->workload.pid > 0) { 142 if (rec->evlist->workload.pid > 0) {
143 if (!child_finished) 143 if (!child_finished)
144 kill(rec->evlist->workload.pid, SIGTERM); 144 kill(rec->evlist->workload.pid, SIGTERM);
145 145
146 wait(&status); 146 wait(&status);
147 if (WIFSIGNALED(status)) 147 if (WIFSIGNALED(status))
148 psignal(WTERMSIG(status), rec->progname); 148 psignal(WTERMSIG(status), rec->progname);
149 } 149 }
150 150
151 if (signr == -1 || signr == SIGUSR1) 151 if (signr == -1 || signr == SIGUSR1)
152 return; 152 return;
153 153
154 signal(signr, SIG_DFL); 154 signal(signr, SIG_DFL);
155 kill(getpid(), signr); 155 kill(getpid(), signr);
156 } 156 }
157 157
158 static bool perf_evlist__equal(struct perf_evlist *evlist, 158 static bool perf_evlist__equal(struct perf_evlist *evlist,
159 struct perf_evlist *other) 159 struct perf_evlist *other)
160 { 160 {
161 struct perf_evsel *pos, *pair; 161 struct perf_evsel *pos, *pair;
162 162
163 if (evlist->nr_entries != other->nr_entries) 163 if (evlist->nr_entries != other->nr_entries)
164 return false; 164 return false;
165 165
166 pair = list_entry(other->entries.next, struct perf_evsel, node); 166 pair = list_entry(other->entries.next, struct perf_evsel, node);
167 167
168 list_for_each_entry(pos, &evlist->entries, node) { 168 list_for_each_entry(pos, &evlist->entries, node) {
169 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0)) 169 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170 return false; 170 return false;
171 pair = list_entry(pair->node.next, struct perf_evsel, node); 171 pair = list_entry(pair->node.next, struct perf_evsel, node);
172 } 172 }
173 173
174 return true; 174 return true;
175 } 175 }
176 176
177 static void perf_record__open(struct perf_record *rec) 177 static void perf_record__open(struct perf_record *rec)
178 { 178 {
179 struct perf_evsel *pos, *first; 179 struct perf_evsel *pos, *first;
180 struct perf_evlist *evlist = rec->evlist; 180 struct perf_evlist *evlist = rec->evlist;
181 struct perf_session *session = rec->session; 181 struct perf_session *session = rec->session;
182 struct perf_record_opts *opts = &rec->opts; 182 struct perf_record_opts *opts = &rec->opts;
183 183
184 first = list_entry(evlist->entries.next, struct perf_evsel, node); 184 first = list_entry(evlist->entries.next, struct perf_evsel, node);
185 185
186 perf_evlist__config_attrs(evlist, opts); 186 perf_evlist__config_attrs(evlist, opts);
187 187
188 list_for_each_entry(pos, &evlist->entries, node) { 188 list_for_each_entry(pos, &evlist->entries, node) {
189 struct perf_event_attr *attr = &pos->attr; 189 struct perf_event_attr *attr = &pos->attr;
190 struct xyarray *group_fd = NULL; 190 struct xyarray *group_fd = NULL;
191 /* 191 /*
192 * Check if parse_single_tracepoint_event has already asked for 192 * Check if parse_single_tracepoint_event has already asked for
193 * PERF_SAMPLE_TIME. 193 * PERF_SAMPLE_TIME.
194 * 194 *
195 * XXX this is kludgy but short term fix for problems introduced by 195 * XXX this is kludgy but short term fix for problems introduced by
196 * eac23d1c that broke 'perf script' by having different sample_types 196 * eac23d1c that broke 'perf script' by having different sample_types
197 * when using multiple tracepoint events when we use a perf binary 197 * when using multiple tracepoint events when we use a perf binary
198 * that tries to use sample_id_all on an older kernel. 198 * that tries to use sample_id_all on an older kernel.
199 * 199 *
200 * We need to move counter creation to perf_session, support 200 * We need to move counter creation to perf_session, support
201 * different sample_types, etc. 201 * different sample_types, etc.
202 */ 202 */
203 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; 203 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204 204
205 if (opts->group && pos != first) 205 if (opts->group && pos != first)
206 group_fd = first->fd; 206 group_fd = first->fd;
207 retry_sample_id: 207 retry_sample_id:
208 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 208 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
209 try_again: 209 try_again:
210 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, 210 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
211 opts->group, group_fd) < 0) { 211 opts->group, group_fd) < 0) {
212 int err = errno; 212 int err = errno;
213 213
214 if (err == EPERM || err == EACCES) { 214 if (err == EPERM || err == EACCES) {
215 ui__error_paranoid(); 215 ui__error_paranoid();
216 exit(EXIT_FAILURE); 216 exit(EXIT_FAILURE);
217 } else if (err == ENODEV && opts->cpu_list) { 217 } else if (err == ENODEV && opts->cpu_list) {
218 die("No such device - did you specify" 218 die("No such device - did you specify"
219 " an out-of-range profile CPU?\n"); 219 " an out-of-range profile CPU?\n");
220 } else if (err == EINVAL && opts->sample_id_all_avail) { 220 } else if (err == EINVAL && opts->sample_id_all_avail) {
221 /* 221 /*
222 * Old kernel, no attr->sample_id_type_all field 222 * Old kernel, no attr->sample_id_type_all field
223 */ 223 */
224 opts->sample_id_all_avail = false; 224 opts->sample_id_all_avail = false;
225 if (!opts->sample_time && !opts->raw_samples && !time_needed) 225 if (!opts->sample_time && !opts->raw_samples && !time_needed)
226 attr->sample_type &= ~PERF_SAMPLE_TIME; 226 attr->sample_type &= ~PERF_SAMPLE_TIME;
227 227
228 goto retry_sample_id; 228 goto retry_sample_id;
229 } 229 }
230 230
231 /* 231 /*
232 * If it's cycles then fall back to hrtimer 232 * If it's cycles then fall back to hrtimer
233 * based cpu-clock-tick sw counter, which 233 * based cpu-clock-tick sw counter, which
234 * is always available even if no PMU support: 234 * is always available even if no PMU support:
235 */ 235 */
236 if (attr->type == PERF_TYPE_HARDWARE 236 if (attr->type == PERF_TYPE_HARDWARE
237 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 237 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
238 238
239 if (verbose) 239 if (verbose)
240 ui__warning("The cycles event is not supported, " 240 ui__warning("The cycles event is not supported, "
241 "trying to fall back to cpu-clock-ticks\n"); 241 "trying to fall back to cpu-clock-ticks\n");
242 attr->type = PERF_TYPE_SOFTWARE; 242 attr->type = PERF_TYPE_SOFTWARE;
243 attr->config = PERF_COUNT_SW_CPU_CLOCK; 243 attr->config = PERF_COUNT_SW_CPU_CLOCK;
244 goto try_again; 244 goto try_again;
245 } 245 }
246 246
247 if (err == ENOENT) { 247 if (err == ENOENT) {
248 ui__warning("The %s event is not supported.\n", 248 ui__warning("The %s event is not supported.\n",
249 event_name(pos)); 249 event_name(pos));
250 exit(EXIT_FAILURE); 250 exit(EXIT_FAILURE);
251 } 251 }
252 252
253 printf("\n"); 253 printf("\n");
254 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", 254 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
255 err, strerror(err)); 255 err, strerror(err));
256 256
257 #if defined(__i386__) || defined(__x86_64__) 257 #if defined(__i386__) || defined(__x86_64__)
258 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) 258 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
259 die("No hardware sampling interrupt available." 259 die("No hardware sampling interrupt available."
260 " No APIC? If so then you can boot the kernel" 260 " No APIC? If so then you can boot the kernel"
261 " with the \"lapic\" boot parameter to" 261 " with the \"lapic\" boot parameter to"
262 " force-enable it.\n"); 262 " force-enable it.\n");
263 #endif 263 #endif
264 264
265 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 265 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
266 } 266 }
267 } 267 }
268 268
269 if (perf_evlist__set_filters(evlist)) { 269 if (perf_evlist__set_filters(evlist)) {
270 error("failed to set filter with %d (%s)\n", errno, 270 error("failed to set filter with %d (%s)\n", errno,
271 strerror(errno)); 271 strerror(errno));
272 exit(-1); 272 exit(-1);
273 } 273 }
274 274
275 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) 275 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0)
276 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 276 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
277 277
278 if (rec->file_new) 278 if (rec->file_new)
279 session->evlist = evlist; 279 session->evlist = evlist;
280 else { 280 else {
281 if (!perf_evlist__equal(session->evlist, evlist)) { 281 if (!perf_evlist__equal(session->evlist, evlist)) {
282 fprintf(stderr, "incompatible append\n"); 282 fprintf(stderr, "incompatible append\n");
283 exit(-1); 283 exit(-1);
284 } 284 }
285 } 285 }
286 286
287 perf_session__update_sample_type(session); 287 perf_session__update_sample_type(session);
288 } 288 }
289 289
290 static int process_buildids(struct perf_record *rec) 290 static int process_buildids(struct perf_record *rec)
291 { 291 {
292 u64 size = lseek(rec->output, 0, SEEK_CUR); 292 u64 size = lseek(rec->output, 0, SEEK_CUR);
293 293
294 if (size == 0) 294 if (size == 0)
295 return 0; 295 return 0;
296 296
297 rec->session->fd = rec->output; 297 rec->session->fd = rec->output;
298 return __perf_session__process_events(rec->session, rec->post_processing_offset, 298 return __perf_session__process_events(rec->session, rec->post_processing_offset,
299 size - rec->post_processing_offset, 299 size - rec->post_processing_offset,
300 size, &build_id__mark_dso_hit_ops); 300 size, &build_id__mark_dso_hit_ops);
301 } 301 }
302 302
303 static void perf_record__exit(int status __used, void *arg) 303 static void perf_record__exit(int status __used, void *arg)
304 { 304 {
305 struct perf_record *rec = arg; 305 struct perf_record *rec = arg;
306 306
307 if (!rec->opts.pipe_output) { 307 if (!rec->opts.pipe_output) {
308 rec->session->header.data_size += rec->bytes_written; 308 rec->session->header.data_size += rec->bytes_written;
309 309
310 if (!rec->no_buildid) 310 if (!rec->no_buildid)
311 process_buildids(rec); 311 process_buildids(rec);
312 perf_session__write_header(rec->session, rec->evlist, 312 perf_session__write_header(rec->session, rec->evlist,
313 rec->output, true); 313 rec->output, true);
314 perf_session__delete(rec->session); 314 perf_session__delete(rec->session);
315 perf_evlist__delete(rec->evlist); 315 perf_evlist__delete(rec->evlist);
316 symbol__exit(); 316 symbol__exit();
317 } 317 }
318 } 318 }
319 319
320 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 320 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
321 { 321 {
322 int err; 322 int err;
323 struct perf_tool *tool = data; 323 struct perf_tool *tool = data;
324 324
325 if (machine__is_host(machine)) 325 if (machine__is_host(machine))
326 return; 326 return;
327 327
328 /* 328 /*
329 *As for guest kernel when processing subcommand record&report, 329 *As for guest kernel when processing subcommand record&report,
330 *we arrange module mmap prior to guest kernel mmap and trigger 330 *we arrange module mmap prior to guest kernel mmap and trigger
331 *a preload dso because default guest module symbols are loaded 331 *a preload dso because default guest module symbols are loaded
332 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 332 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
333 *method is used to avoid symbol missing when the first addr is 333 *method is used to avoid symbol missing when the first addr is
334 *in module instead of in guest kernel. 334 *in module instead of in guest kernel.
335 */ 335 */
336 err = perf_event__synthesize_modules(tool, process_synthesized_event, 336 err = perf_event__synthesize_modules(tool, process_synthesized_event,
337 machine); 337 machine);
338 if (err < 0) 338 if (err < 0)
339 pr_err("Couldn't record guest kernel [%d]'s reference" 339 pr_err("Couldn't record guest kernel [%d]'s reference"
340 " relocation symbol.\n", machine->pid); 340 " relocation symbol.\n", machine->pid);
341 341
342 /* 342 /*
343 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 343 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
344 * have no _text sometimes. 344 * have no _text sometimes.
345 */ 345 */
346 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 346 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
347 machine, "_text"); 347 machine, "_text");
348 if (err < 0) 348 if (err < 0)
349 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 349 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
350 machine, "_stext"); 350 machine, "_stext");
351 if (err < 0) 351 if (err < 0)
352 pr_err("Couldn't record guest kernel [%d]'s reference" 352 pr_err("Couldn't record guest kernel [%d]'s reference"
353 " relocation symbol.\n", machine->pid); 353 " relocation symbol.\n", machine->pid);
354 } 354 }
355 355
356 static struct perf_event_header finished_round_event = { 356 static struct perf_event_header finished_round_event = {
357 .size = sizeof(struct perf_event_header), 357 .size = sizeof(struct perf_event_header),
358 .type = PERF_RECORD_FINISHED_ROUND, 358 .type = PERF_RECORD_FINISHED_ROUND,
359 }; 359 };
360 360
361 static void perf_record__mmap_read_all(struct perf_record *rec) 361 static void perf_record__mmap_read_all(struct perf_record *rec)
362 { 362 {
363 int i; 363 int i;
364 364
365 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 365 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
366 if (rec->evlist->mmap[i].base) 366 if (rec->evlist->mmap[i].base)
367 perf_record__mmap_read(rec, &rec->evlist->mmap[i]); 367 perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
368 } 368 }
369 369
370 if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO)) 370 if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
371 write_output(rec, &finished_round_event, sizeof(finished_round_event)); 371 write_output(rec, &finished_round_event, sizeof(finished_round_event));
372 } 372 }
373 373
374 static int __cmd_record(struct perf_record *rec, int argc, const char **argv) 374 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
375 { 375 {
376 struct stat st; 376 struct stat st;
377 int flags; 377 int flags;
378 int err, output; 378 int err, output;
379 unsigned long waking = 0; 379 unsigned long waking = 0;
380 const bool forks = argc > 0; 380 const bool forks = argc > 0;
381 struct machine *machine; 381 struct machine *machine;
382 struct perf_tool *tool = &rec->tool; 382 struct perf_tool *tool = &rec->tool;
383 struct perf_record_opts *opts = &rec->opts; 383 struct perf_record_opts *opts = &rec->opts;
384 struct perf_evlist *evsel_list = rec->evlist; 384 struct perf_evlist *evsel_list = rec->evlist;
385 const char *output_name = rec->output_name; 385 const char *output_name = rec->output_name;
386 struct perf_session *session; 386 struct perf_session *session;
387 387
388 rec->progname = argv[0]; 388 rec->progname = argv[0];
389 389
390 rec->page_size = sysconf(_SC_PAGE_SIZE); 390 rec->page_size = sysconf(_SC_PAGE_SIZE);
391 391
392 on_exit(perf_record__sig_exit, rec); 392 on_exit(perf_record__sig_exit, rec);
393 signal(SIGCHLD, sig_handler); 393 signal(SIGCHLD, sig_handler);
394 signal(SIGINT, sig_handler); 394 signal(SIGINT, sig_handler);
395 signal(SIGUSR1, sig_handler); 395 signal(SIGUSR1, sig_handler);
396 396
397 if (!output_name) { 397 if (!output_name) {
398 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) 398 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
399 opts->pipe_output = true; 399 opts->pipe_output = true;
400 else 400 else
401 rec->output_name = output_name = "perf.data"; 401 rec->output_name = output_name = "perf.data";
402 } 402 }
403 if (output_name) { 403 if (output_name) {
404 if (!strcmp(output_name, "-")) 404 if (!strcmp(output_name, "-"))
405 opts->pipe_output = true; 405 opts->pipe_output = true;
406 else if (!stat(output_name, &st) && st.st_size) { 406 else if (!stat(output_name, &st) && st.st_size) {
407 if (rec->write_mode == WRITE_FORCE) { 407 if (rec->write_mode == WRITE_FORCE) {
408 char oldname[PATH_MAX]; 408 char oldname[PATH_MAX];
409 snprintf(oldname, sizeof(oldname), "%s.old", 409 snprintf(oldname, sizeof(oldname), "%s.old",
410 output_name); 410 output_name);
411 unlink(oldname); 411 unlink(oldname);
412 rename(output_name, oldname); 412 rename(output_name, oldname);
413 } 413 }
414 } else if (rec->write_mode == WRITE_APPEND) { 414 } else if (rec->write_mode == WRITE_APPEND) {
415 rec->write_mode = WRITE_FORCE; 415 rec->write_mode = WRITE_FORCE;
416 } 416 }
417 } 417 }
418 418
419 flags = O_CREAT|O_RDWR; 419 flags = O_CREAT|O_RDWR;
420 if (rec->write_mode == WRITE_APPEND) 420 if (rec->write_mode == WRITE_APPEND)
421 rec->file_new = 0; 421 rec->file_new = 0;
422 else 422 else
423 flags |= O_TRUNC; 423 flags |= O_TRUNC;
424 424
425 if (opts->pipe_output) 425 if (opts->pipe_output)
426 output = STDOUT_FILENO; 426 output = STDOUT_FILENO;
427 else 427 else
428 output = open(output_name, flags, S_IRUSR | S_IWUSR); 428 output = open(output_name, flags, S_IRUSR | S_IWUSR);
429 if (output < 0) { 429 if (output < 0) {
430 perror("failed to create output file"); 430 perror("failed to create output file");
431 exit(-1); 431 exit(-1);
432 } 432 }
433 433
434 rec->output = output; 434 rec->output = output;
435 435
436 session = perf_session__new(output_name, O_WRONLY, 436 session = perf_session__new(output_name, O_WRONLY,
437 rec->write_mode == WRITE_FORCE, false, NULL); 437 rec->write_mode == WRITE_FORCE, false, NULL);
438 if (session == NULL) { 438 if (session == NULL) {
439 pr_err("Not enough memory for reading perf file header\n"); 439 pr_err("Not enough memory for reading perf file header\n");
440 return -1; 440 return -1;
441 } 441 }
442 442
443 rec->session = session; 443 rec->session = session;
444 444
445 if (!rec->no_buildid) 445 if (!rec->no_buildid)
446 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 446 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
447 447
448 if (!rec->file_new) { 448 if (!rec->file_new) {
449 err = perf_session__read_header(session, output); 449 err = perf_session__read_header(session, output);
450 if (err < 0) 450 if (err < 0)
451 goto out_delete_session; 451 goto out_delete_session;
452 } 452 }
453 453
454 if (have_tracepoints(&evsel_list->entries)) 454 if (have_tracepoints(&evsel_list->entries))
455 perf_header__set_feat(&session->header, HEADER_TRACE_INFO); 455 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
456 456
457 perf_header__set_feat(&session->header, HEADER_HOSTNAME); 457 perf_header__set_feat(&session->header, HEADER_HOSTNAME);
458 perf_header__set_feat(&session->header, HEADER_OSRELEASE); 458 perf_header__set_feat(&session->header, HEADER_OSRELEASE);
459 perf_header__set_feat(&session->header, HEADER_ARCH); 459 perf_header__set_feat(&session->header, HEADER_ARCH);
460 perf_header__set_feat(&session->header, HEADER_CPUDESC); 460 perf_header__set_feat(&session->header, HEADER_CPUDESC);
461 perf_header__set_feat(&session->header, HEADER_NRCPUS); 461 perf_header__set_feat(&session->header, HEADER_NRCPUS);
462 perf_header__set_feat(&session->header, HEADER_EVENT_DESC); 462 perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
463 perf_header__set_feat(&session->header, HEADER_CMDLINE); 463 perf_header__set_feat(&session->header, HEADER_CMDLINE);
464 perf_header__set_feat(&session->header, HEADER_VERSION); 464 perf_header__set_feat(&session->header, HEADER_VERSION);
465 perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); 465 perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
466 perf_header__set_feat(&session->header, HEADER_TOTAL_MEM); 466 perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
467 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY); 467 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
468 perf_header__set_feat(&session->header, HEADER_CPUID); 468 perf_header__set_feat(&session->header, HEADER_CPUID);
469 469
470 if (forks) { 470 if (forks) {
471 err = perf_evlist__prepare_workload(evsel_list, opts, argv); 471 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
472 if (err < 0) { 472 if (err < 0) {
473 pr_err("Couldn't run the workload!\n"); 473 pr_err("Couldn't run the workload!\n");
474 goto out_delete_session; 474 goto out_delete_session;
475 } 475 }
476 } 476 }
477 477
478 perf_record__open(rec); 478 perf_record__open(rec);
479 479
480 /* 480 /*
481 * perf_session__delete(session) will be called at perf_record__exit() 481 * perf_session__delete(session) will be called at perf_record__exit()
482 */ 482 */
483 on_exit(perf_record__exit, rec); 483 on_exit(perf_record__exit, rec);
484 484
485 if (opts->pipe_output) { 485 if (opts->pipe_output) {
486 err = perf_header__write_pipe(output); 486 err = perf_header__write_pipe(output);
487 if (err < 0) 487 if (err < 0)
488 return err; 488 return err;
489 } else if (rec->file_new) { 489 } else if (rec->file_new) {
490 err = perf_session__write_header(session, evsel_list, 490 err = perf_session__write_header(session, evsel_list,
491 output, false); 491 output, false);
492 if (err < 0) 492 if (err < 0)
493 return err; 493 return err;
494 } 494 }
495 495
496 rec->post_processing_offset = lseek(output, 0, SEEK_CUR); 496 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
497 497
498 machine = perf_session__find_host_machine(session); 498 machine = perf_session__find_host_machine(session);
499 if (!machine) { 499 if (!machine) {
500 pr_err("Couldn't find native kernel information.\n"); 500 pr_err("Couldn't find native kernel information.\n");
501 return -1; 501 return -1;
502 } 502 }
503 503
504 if (opts->pipe_output) { 504 if (opts->pipe_output) {
505 err = perf_event__synthesize_attrs(tool, session, 505 err = perf_event__synthesize_attrs(tool, session,
506 process_synthesized_event); 506 process_synthesized_event);
507 if (err < 0) { 507 if (err < 0) {
508 pr_err("Couldn't synthesize attrs.\n"); 508 pr_err("Couldn't synthesize attrs.\n");
509 return err; 509 return err;
510 } 510 }
511 511
512 err = perf_event__synthesize_event_types(tool, process_synthesized_event, 512 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
513 machine); 513 machine);
514 if (err < 0) { 514 if (err < 0) {
515 pr_err("Couldn't synthesize event_types.\n"); 515 pr_err("Couldn't synthesize event_types.\n");
516 return err; 516 return err;
517 } 517 }
518 518
519 if (have_tracepoints(&evsel_list->entries)) { 519 if (have_tracepoints(&evsel_list->entries)) {
520 /* 520 /*
521 * FIXME err <= 0 here actually means that 521 * FIXME err <= 0 here actually means that
522 * there were no tracepoints so its not really 522 * there were no tracepoints so its not really
523 * an error, just that we don't need to 523 * an error, just that we don't need to
524 * synthesize anything. We really have to 524 * synthesize anything. We really have to
525 * return this more properly and also 525 * return this more properly and also
526 * propagate errors that now are calling die() 526 * propagate errors that now are calling die()
527 */ 527 */
528 err = perf_event__synthesize_tracing_data(tool, output, evsel_list, 528 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
529 process_synthesized_event); 529 process_synthesized_event);
530 if (err <= 0) { 530 if (err <= 0) {
531 pr_err("Couldn't record tracing data.\n"); 531 pr_err("Couldn't record tracing data.\n");
532 return err; 532 return err;
533 } 533 }
534 advance_output(rec, err); 534 advance_output(rec, err);
535 } 535 }
536 } 536 }
537 537
538 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 538 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
539 machine, "_text"); 539 machine, "_text");
540 if (err < 0) 540 if (err < 0)
541 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 541 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
542 machine, "_stext"); 542 machine, "_stext");
543 if (err < 0) 543 if (err < 0)
544 pr_err("Couldn't record kernel reference relocation symbol\n" 544 pr_err("Couldn't record kernel reference relocation symbol\n"
545 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 545 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
546 "Check /proc/kallsyms permission or run as root.\n"); 546 "Check /proc/kallsyms permission or run as root.\n");
547 547
548 err = perf_event__synthesize_modules(tool, process_synthesized_event, 548 err = perf_event__synthesize_modules(tool, process_synthesized_event,
549 machine); 549 machine);
550 if (err < 0) 550 if (err < 0)
551 pr_err("Couldn't record kernel module information.\n" 551 pr_err("Couldn't record kernel module information.\n"
552 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 552 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
553 "Check /proc/modules permission or run as root.\n"); 553 "Check /proc/modules permission or run as root.\n");
554 554
555 if (perf_guest) 555 if (perf_guest)
556 perf_session__process_machines(session, tool, 556 perf_session__process_machines(session, tool,
557 perf_event__synthesize_guest_os); 557 perf_event__synthesize_guest_os);
558 558
559 if (!opts->system_wide) 559 if (!opts->system_wide)
560 perf_event__synthesize_thread_map(tool, evsel_list->threads, 560 perf_event__synthesize_thread_map(tool, evsel_list->threads,
561 process_synthesized_event, 561 process_synthesized_event,
562 machine); 562 machine);
563 else 563 else
564 perf_event__synthesize_threads(tool, process_synthesized_event, 564 perf_event__synthesize_threads(tool, process_synthesized_event,
565 machine); 565 machine);
566 566
567 if (rec->realtime_prio) { 567 if (rec->realtime_prio) {
568 struct sched_param param; 568 struct sched_param param;
569 569
570 param.sched_priority = rec->realtime_prio; 570 param.sched_priority = rec->realtime_prio;
571 if (sched_setscheduler(0, SCHED_FIFO, &param)) { 571 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
572 pr_err("Could not set realtime priority.\n"); 572 pr_err("Could not set realtime priority.\n");
573 exit(-1); 573 exit(-1);
574 } 574 }
575 } 575 }
576 576
577 perf_evlist__enable(evsel_list); 577 perf_evlist__enable(evsel_list);
578 578
579 /* 579 /*
580 * Let the child rip 580 * Let the child rip
581 */ 581 */
582 if (forks) 582 if (forks)
583 perf_evlist__start_workload(evsel_list); 583 perf_evlist__start_workload(evsel_list);
584 584
585 for (;;) { 585 for (;;) {
586 int hits = rec->samples; 586 int hits = rec->samples;
587 587
588 perf_record__mmap_read_all(rec); 588 perf_record__mmap_read_all(rec);
589 589
590 if (hits == rec->samples) { 590 if (hits == rec->samples) {
591 if (done) 591 if (done)
592 break; 592 break;
593 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1); 593 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
594 waking++; 594 waking++;
595 } 595 }
596 596
597 if (done) 597 if (done)
598 perf_evlist__disable(evsel_list); 598 perf_evlist__disable(evsel_list);
599 } 599 }
600 600
601 if (quiet || signr == SIGUSR1) 601 if (quiet || signr == SIGUSR1)
602 return 0; 602 return 0;
603 603
604 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 604 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
605 605
606 /* 606 /*
607 * Approximate RIP event size: 24 bytes. 607 * Approximate RIP event size: 24 bytes.
608 */ 608 */
609 fprintf(stderr, 609 fprintf(stderr,
610 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", 610 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
611 (double)rec->bytes_written / 1024.0 / 1024.0, 611 (double)rec->bytes_written / 1024.0 / 1024.0,
612 output_name, 612 output_name,
613 rec->bytes_written / 24); 613 rec->bytes_written / 24);
614 614
615 return 0; 615 return 0;
616 616
617 out_delete_session: 617 out_delete_session:
618 perf_session__delete(session); 618 perf_session__delete(session);
619 return err; 619 return err;
620 } 620 }
621 621
622 static const char * const record_usage[] = { 622 static const char * const record_usage[] = {
623 "perf record [<options>] [<command>]", 623 "perf record [<options>] [<command>]",
624 "perf record [<options>] -- <command> [<options>]", 624 "perf record [<options>] -- <command> [<options>]",
625 NULL 625 NULL
626 }; 626 };
627 627
628 /* 628 /*
629 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new 629 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
630 * because we need to have access to it in perf_record__exit, that is called 630 * because we need to have access to it in perf_record__exit, that is called
631 * after cmd_record() exits, but since record_options need to be accessible to 631 * after cmd_record() exits, but since record_options need to be accessible to
632 * builtin-script, leave it here. 632 * builtin-script, leave it here.
633 * 633 *
634 * At least we don't ouch it in all the other functions here directly. 634 * At least we don't ouch it in all the other functions here directly.
635 * 635 *
636 * Just say no to tons of global variables, sigh. 636 * Just say no to tons of global variables, sigh.
637 */ 637 */
638 static struct perf_record record = { 638 static struct perf_record record = {
639 .opts = { 639 .opts = {
640 .target_pid = -1, 640 .target_pid = -1,
641 .target_tid = -1, 641 .target_tid = -1,
642 .mmap_pages = UINT_MAX, 642 .mmap_pages = UINT_MAX,
643 .user_freq = UINT_MAX, 643 .user_freq = UINT_MAX,
644 .user_interval = ULLONG_MAX, 644 .user_interval = ULLONG_MAX,
645 .freq = 1000, 645 .freq = 1000,
646 .sample_id_all_avail = true, 646 .sample_id_all_avail = true,
647 }, 647 },
648 .write_mode = WRITE_FORCE, 648 .write_mode = WRITE_FORCE,
649 .file_new = true, 649 .file_new = true,
650 }; 650 };
651 651
652 /* 652 /*
653 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 653 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
654 * with it and switch to use the library functions in perf_evlist that came 654 * with it and switch to use the library functions in perf_evlist that came
655 * from builtin-record.c, i.e. use perf_record_opts, 655 * from builtin-record.c, i.e. use perf_record_opts,
656 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 656 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
657 * using pipes, etc. 657 * using pipes, etc.
658 */ 658 */
659 const struct option record_options[] = { 659 const struct option record_options[] = {
660 OPT_CALLBACK('e', "event", &record.evlist, "event", 660 OPT_CALLBACK('e', "event", &record.evlist, "event",
661 "event selector. use 'perf list' to list available events", 661 "event selector. use 'perf list' to list available events",
662 parse_events_option), 662 parse_events_option),
663 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 663 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
664 "event filter", parse_filter), 664 "event filter", parse_filter),
665 OPT_INTEGER('p', "pid", &record.opts.target_pid, 665 OPT_INTEGER('p', "pid", &record.opts.target_pid,
666 "record events on existing process id"), 666 "record events on existing process id"),
667 OPT_INTEGER('t', "tid", &record.opts.target_tid, 667 OPT_INTEGER('t', "tid", &record.opts.target_tid,
668 "record events on existing thread id"), 668 "record events on existing thread id"),
669 OPT_INTEGER('r', "realtime", &record.realtime_prio, 669 OPT_INTEGER('r', "realtime", &record.realtime_prio,
670 "collect data with this RT SCHED_FIFO priority"), 670 "collect data with this RT SCHED_FIFO priority"),
671 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay, 671 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
672 "collect data without buffering"), 672 "collect data without buffering"),
673 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 673 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
674 "collect raw sample records from all opened counters"), 674 "collect raw sample records from all opened counters"),
675 OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide, 675 OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
676 "system-wide collection from all CPUs"), 676 "system-wide collection from all CPUs"),
677 OPT_BOOLEAN('A', "append", &record.append_file, 677 OPT_BOOLEAN('A', "append", &record.append_file,
678 "append to the output file to do incremental profiling"), 678 "append to the output file to do incremental profiling"),
679 OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu", 679 OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
680 "list of cpus to monitor"), 680 "list of cpus to monitor"),
681 OPT_BOOLEAN('f', "force", &record.force, 681 OPT_BOOLEAN('f', "force", &record.force,
682 "overwrite existing data file (deprecated)"), 682 "overwrite existing data file (deprecated)"),
683 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 683 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
684 OPT_STRING('o', "output", &record.output_name, "file", 684 OPT_STRING('o', "output", &record.output_name, "file",
685 "output file name"), 685 "output file name"),
686 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, 686 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
687 "child tasks do not inherit counters"), 687 "child tasks do not inherit counters"),
688 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 688 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
689 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages, 689 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
690 "number of mmap data pages"), 690 "number of mmap data pages"),
691 OPT_BOOLEAN(0, "group", &record.opts.group, 691 OPT_BOOLEAN(0, "group", &record.opts.group,
692 "put the counters into a counter group"), 692 "put the counters into a counter group"),
693 OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph, 693 OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
694 "do call-graph (stack chain/backtrace) recording"), 694 "do call-graph (stack chain/backtrace) recording"),
695 OPT_INCR('v', "verbose", &verbose, 695 OPT_INCR('v', "verbose", &verbose,
696 "be more verbose (show counter open errors, etc)"), 696 "be more verbose (show counter open errors, etc)"),
697 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 697 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
698 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 698 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
699 "per thread counts"), 699 "per thread counts"),
700 OPT_BOOLEAN('d', "data", &record.opts.sample_address, 700 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
701 "Sample addresses"), 701 "Sample addresses"),
702 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"), 702 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
703 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
703 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 704 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
704 "don't sample"), 705 "don't sample"),
705 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, 706 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
706 "do not update the buildid cache"), 707 "do not update the buildid cache"),
707 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid, 708 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
708 "do not collect buildids in perf.data"), 709 "do not collect buildids in perf.data"),
709 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 710 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
710 "monitor event in cgroup name only", 711 "monitor event in cgroup name only",
711 parse_cgroups), 712 parse_cgroups),
712 OPT_END() 713 OPT_END()
713 }; 714 };
714 715
715 int cmd_record(int argc, const char **argv, const char *prefix __used) 716 int cmd_record(int argc, const char **argv, const char *prefix __used)
716 { 717 {
717 int err = -ENOMEM; 718 int err = -ENOMEM;
718 struct perf_evsel *pos; 719 struct perf_evsel *pos;
719 struct perf_evlist *evsel_list; 720 struct perf_evlist *evsel_list;
720 struct perf_record *rec = &record; 721 struct perf_record *rec = &record;
721 722
722 perf_header__set_cmdline(argc, argv); 723 perf_header__set_cmdline(argc, argv);
723 724
724 evsel_list = perf_evlist__new(NULL, NULL); 725 evsel_list = perf_evlist__new(NULL, NULL);
725 if (evsel_list == NULL) 726 if (evsel_list == NULL)
726 return -ENOMEM; 727 return -ENOMEM;
727 728
728 rec->evlist = evsel_list; 729 rec->evlist = evsel_list;
729 730
730 argc = parse_options(argc, argv, record_options, record_usage, 731 argc = parse_options(argc, argv, record_options, record_usage,
731 PARSE_OPT_STOP_AT_NON_OPTION); 732 PARSE_OPT_STOP_AT_NON_OPTION);
732 if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && 733 if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
733 !rec->opts.system_wide && !rec->opts.cpu_list) 734 !rec->opts.system_wide && !rec->opts.cpu_list)
734 usage_with_options(record_usage, record_options); 735 usage_with_options(record_usage, record_options);
735 736
736 if (rec->force && rec->append_file) { 737 if (rec->force && rec->append_file) {
737 fprintf(stderr, "Can't overwrite and append at the same time." 738 fprintf(stderr, "Can't overwrite and append at the same time."
738 " You need to choose between -f and -A"); 739 " You need to choose between -f and -A");
739 usage_with_options(record_usage, record_options); 740 usage_with_options(record_usage, record_options);
740 } else if (rec->append_file) { 741 } else if (rec->append_file) {
741 rec->write_mode = WRITE_APPEND; 742 rec->write_mode = WRITE_APPEND;
742 } else { 743 } else {
743 rec->write_mode = WRITE_FORCE; 744 rec->write_mode = WRITE_FORCE;
744 } 745 }
745 746
746 if (nr_cgroups && !rec->opts.system_wide) { 747 if (nr_cgroups && !rec->opts.system_wide) {
747 fprintf(stderr, "cgroup monitoring only available in" 748 fprintf(stderr, "cgroup monitoring only available in"
748 " system-wide mode\n"); 749 " system-wide mode\n");
749 usage_with_options(record_usage, record_options); 750 usage_with_options(record_usage, record_options);
750 } 751 }
751 752
752 symbol__init(); 753 symbol__init();
753 754
754 if (symbol_conf.kptr_restrict) 755 if (symbol_conf.kptr_restrict)
755 pr_warning( 756 pr_warning(
756 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 757 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
757 "check /proc/sys/kernel/kptr_restrict.\n\n" 758 "check /proc/sys/kernel/kptr_restrict.\n\n"
758 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 759 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
759 "file is not found in the buildid cache or in the vmlinux path.\n\n" 760 "file is not found in the buildid cache or in the vmlinux path.\n\n"
760 "Samples in kernel modules won't be resolved at all.\n\n" 761 "Samples in kernel modules won't be resolved at all.\n\n"
761 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 762 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
762 "even with a suitable vmlinux or kallsyms file.\n\n"); 763 "even with a suitable vmlinux or kallsyms file.\n\n");
763 764
764 if (rec->no_buildid_cache || rec->no_buildid) 765 if (rec->no_buildid_cache || rec->no_buildid)
765 disable_buildid_cache(); 766 disable_buildid_cache();
766 767
767 if (evsel_list->nr_entries == 0 && 768 if (evsel_list->nr_entries == 0 &&
768 perf_evlist__add_default(evsel_list) < 0) { 769 perf_evlist__add_default(evsel_list) < 0) {
769 pr_err("Not enough memory for event selector list\n"); 770 pr_err("Not enough memory for event selector list\n");
770 goto out_symbol_exit; 771 goto out_symbol_exit;
771 } 772 }
772 773
773 if (rec->opts.target_pid != -1) 774 if (rec->opts.target_pid != -1)
774 rec->opts.target_tid = rec->opts.target_pid; 775 rec->opts.target_tid = rec->opts.target_pid;
775 776
776 if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, 777 if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
777 rec->opts.target_tid, rec->opts.cpu_list) < 0) 778 rec->opts.target_tid, rec->opts.cpu_list) < 0)
778 usage_with_options(record_usage, record_options); 779 usage_with_options(record_usage, record_options);
779 780
780 list_for_each_entry(pos, &evsel_list->entries, node) { 781 list_for_each_entry(pos, &evsel_list->entries, node) {
781 if (perf_header__push_event(pos->attr.config, event_name(pos))) 782 if (perf_header__push_event(pos->attr.config, event_name(pos)))
782 goto out_free_fd; 783 goto out_free_fd;
783 } 784 }
784 785
785 if (rec->opts.user_interval != ULLONG_MAX) 786 if (rec->opts.user_interval != ULLONG_MAX)
786 rec->opts.default_interval = rec->opts.user_interval; 787 rec->opts.default_interval = rec->opts.user_interval;
787 if (rec->opts.user_freq != UINT_MAX) 788 if (rec->opts.user_freq != UINT_MAX)
788 rec->opts.freq = rec->opts.user_freq; 789 rec->opts.freq = rec->opts.user_freq;
789 790
790 /* 791 /*
791 * User specified count overrides default frequency. 792 * User specified count overrides default frequency.
792 */ 793 */
793 if (rec->opts.default_interval) 794 if (rec->opts.default_interval)
794 rec->opts.freq = 0; 795 rec->opts.freq = 0;
795 else if (rec->opts.freq) { 796 else if (rec->opts.freq) {
796 rec->opts.default_interval = rec->opts.freq; 797 rec->opts.default_interval = rec->opts.freq;
797 } else { 798 } else {
798 fprintf(stderr, "frequency and count are zero, aborting\n"); 799 fprintf(stderr, "frequency and count are zero, aborting\n");
799 err = -EINVAL; 800 err = -EINVAL;
800 goto out_free_fd; 801 goto out_free_fd;
801 } 802 }
802 803
803 err = __cmd_record(&record, argc, argv); 804 err = __cmd_record(&record, argc, argv);
804 out_free_fd: 805 out_free_fd:
805 perf_evlist__delete_maps(evsel_list); 806 perf_evlist__delete_maps(evsel_list);
806 out_symbol_exit: 807 out_symbol_exit:
807 symbol__exit(); 808 symbol__exit();
808 return err; 809 return err;
809 } 810 }
810 811
1 #ifndef _PERF_PERF_H 1 #ifndef _PERF_PERF_H
2 #define _PERF_PERF_H 2 #define _PERF_PERF_H
3 3
4 struct winsize; 4 struct winsize;
5 5
6 void get_term_dimensions(struct winsize *ws); 6 void get_term_dimensions(struct winsize *ws);
7 7
8 #if defined(__i386__) 8 #if defined(__i386__)
9 #include "../../arch/x86/include/asm/unistd.h" 9 #include "../../arch/x86/include/asm/unistd.h"
10 #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") 10 #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
11 #define cpu_relax() asm volatile("rep; nop" ::: "memory"); 11 #define cpu_relax() asm volatile("rep; nop" ::: "memory");
12 #define CPUINFO_PROC "model name" 12 #define CPUINFO_PROC "model name"
13 #endif 13 #endif
14 14
15 #if defined(__x86_64__) 15 #if defined(__x86_64__)
16 #include "../../arch/x86/include/asm/unistd.h" 16 #include "../../arch/x86/include/asm/unistd.h"
17 #define rmb() asm volatile("lfence" ::: "memory") 17 #define rmb() asm volatile("lfence" ::: "memory")
18 #define cpu_relax() asm volatile("rep; nop" ::: "memory"); 18 #define cpu_relax() asm volatile("rep; nop" ::: "memory");
19 #define CPUINFO_PROC "model name" 19 #define CPUINFO_PROC "model name"
20 #endif 20 #endif
21 21
22 #ifdef __powerpc__ 22 #ifdef __powerpc__
23 #include "../../arch/powerpc/include/asm/unistd.h" 23 #include "../../arch/powerpc/include/asm/unistd.h"
24 #define rmb() asm volatile ("sync" ::: "memory") 24 #define rmb() asm volatile ("sync" ::: "memory")
25 #define cpu_relax() asm volatile ("" ::: "memory"); 25 #define cpu_relax() asm volatile ("" ::: "memory");
26 #define CPUINFO_PROC "cpu" 26 #define CPUINFO_PROC "cpu"
27 #endif 27 #endif
28 28
29 #ifdef __s390__ 29 #ifdef __s390__
30 #include "../../arch/s390/include/asm/unistd.h" 30 #include "../../arch/s390/include/asm/unistd.h"
31 #define rmb() asm volatile("bcr 15,0" ::: "memory") 31 #define rmb() asm volatile("bcr 15,0" ::: "memory")
32 #define cpu_relax() asm volatile("" ::: "memory"); 32 #define cpu_relax() asm volatile("" ::: "memory");
33 #endif 33 #endif
34 34
35 #ifdef __sh__ 35 #ifdef __sh__
36 #include "../../arch/sh/include/asm/unistd.h" 36 #include "../../arch/sh/include/asm/unistd.h"
37 #if defined(__SH4A__) || defined(__SH5__) 37 #if defined(__SH4A__) || defined(__SH5__)
38 # define rmb() asm volatile("synco" ::: "memory") 38 # define rmb() asm volatile("synco" ::: "memory")
39 #else 39 #else
40 # define rmb() asm volatile("" ::: "memory") 40 # define rmb() asm volatile("" ::: "memory")
41 #endif 41 #endif
42 #define cpu_relax() asm volatile("" ::: "memory") 42 #define cpu_relax() asm volatile("" ::: "memory")
43 #define CPUINFO_PROC "cpu type" 43 #define CPUINFO_PROC "cpu type"
44 #endif 44 #endif
45 45
46 #ifdef __hppa__ 46 #ifdef __hppa__
47 #include "../../arch/parisc/include/asm/unistd.h" 47 #include "../../arch/parisc/include/asm/unistd.h"
48 #define rmb() asm volatile("" ::: "memory") 48 #define rmb() asm volatile("" ::: "memory")
49 #define cpu_relax() asm volatile("" ::: "memory"); 49 #define cpu_relax() asm volatile("" ::: "memory");
50 #define CPUINFO_PROC "cpu" 50 #define CPUINFO_PROC "cpu"
51 #endif 51 #endif
52 52
53 #ifdef __sparc__ 53 #ifdef __sparc__
54 #include "../../arch/sparc/include/asm/unistd.h" 54 #include "../../arch/sparc/include/asm/unistd.h"
55 #define rmb() asm volatile("":::"memory") 55 #define rmb() asm volatile("":::"memory")
56 #define cpu_relax() asm volatile("":::"memory") 56 #define cpu_relax() asm volatile("":::"memory")
57 #define CPUINFO_PROC "cpu" 57 #define CPUINFO_PROC "cpu"
58 #endif 58 #endif
59 59
60 #ifdef __alpha__ 60 #ifdef __alpha__
61 #include "../../arch/alpha/include/asm/unistd.h" 61 #include "../../arch/alpha/include/asm/unistd.h"
62 #define rmb() asm volatile("mb" ::: "memory") 62 #define rmb() asm volatile("mb" ::: "memory")
63 #define cpu_relax() asm volatile("" ::: "memory") 63 #define cpu_relax() asm volatile("" ::: "memory")
64 #define CPUINFO_PROC "cpu model" 64 #define CPUINFO_PROC "cpu model"
65 #endif 65 #endif
66 66
67 #ifdef __ia64__ 67 #ifdef __ia64__
68 #include "../../arch/ia64/include/asm/unistd.h" 68 #include "../../arch/ia64/include/asm/unistd.h"
69 #define rmb() asm volatile ("mf" ::: "memory") 69 #define rmb() asm volatile ("mf" ::: "memory")
70 #define cpu_relax() asm volatile ("hint @pause" ::: "memory") 70 #define cpu_relax() asm volatile ("hint @pause" ::: "memory")
71 #define CPUINFO_PROC "model name" 71 #define CPUINFO_PROC "model name"
72 #endif 72 #endif
73 73
74 #ifdef __arm__ 74 #ifdef __arm__
75 #include "../../arch/arm/include/asm/unistd.h" 75 #include "../../arch/arm/include/asm/unistd.h"
76 /* 76 /*
77 * Use the __kuser_memory_barrier helper in the CPU helper page. See 77 * Use the __kuser_memory_barrier helper in the CPU helper page. See
78 * arch/arm/kernel/entry-armv.S in the kernel source for details. 78 * arch/arm/kernel/entry-armv.S in the kernel source for details.
79 */ 79 */
80 #define rmb() ((void(*)(void))0xffff0fa0)() 80 #define rmb() ((void(*)(void))0xffff0fa0)()
81 #define cpu_relax() asm volatile("":::"memory") 81 #define cpu_relax() asm volatile("":::"memory")
82 #define CPUINFO_PROC "Processor" 82 #define CPUINFO_PROC "Processor"
83 #endif 83 #endif
84 84
85 #ifdef __mips__ 85 #ifdef __mips__
86 #include "../../arch/mips/include/asm/unistd.h" 86 #include "../../arch/mips/include/asm/unistd.h"
87 #define rmb() asm volatile( \ 87 #define rmb() asm volatile( \
88 ".set mips2\n\t" \ 88 ".set mips2\n\t" \
89 "sync\n\t" \ 89 "sync\n\t" \
90 ".set mips0" \ 90 ".set mips0" \
91 : /* no output */ \ 91 : /* no output */ \
92 : /* no input */ \ 92 : /* no input */ \
93 : "memory") 93 : "memory")
94 #define cpu_relax() asm volatile("" ::: "memory") 94 #define cpu_relax() asm volatile("" ::: "memory")
95 #define CPUINFO_PROC "cpu model" 95 #define CPUINFO_PROC "cpu model"
96 #endif 96 #endif
97 97
98 #include <time.h> 98 #include <time.h>
99 #include <unistd.h> 99 #include <unistd.h>
100 #include <sys/types.h> 100 #include <sys/types.h>
101 #include <sys/syscall.h> 101 #include <sys/syscall.h>
102 102
103 #include "../../include/linux/perf_event.h" 103 #include "../../include/linux/perf_event.h"
104 #include "util/types.h" 104 #include "util/types.h"
105 #include <stdbool.h> 105 #include <stdbool.h>
106 106
107 struct perf_mmap { 107 struct perf_mmap {
108 void *base; 108 void *base;
109 int mask; 109 int mask;
110 unsigned int prev; 110 unsigned int prev;
111 }; 111 };
112 112
113 static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) 113 static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
114 { 114 {
115 struct perf_event_mmap_page *pc = mm->base; 115 struct perf_event_mmap_page *pc = mm->base;
116 int head = pc->data_head; 116 int head = pc->data_head;
117 rmb(); 117 rmb();
118 return head; 118 return head;
119 } 119 }
120 120
121 static inline void perf_mmap__write_tail(struct perf_mmap *md, 121 static inline void perf_mmap__write_tail(struct perf_mmap *md,
122 unsigned long tail) 122 unsigned long tail)
123 { 123 {
124 struct perf_event_mmap_page *pc = md->base; 124 struct perf_event_mmap_page *pc = md->base;
125 125
126 /* 126 /*
127 * ensure all reads are done before we write the tail out. 127 * ensure all reads are done before we write the tail out.
128 */ 128 */
129 /* mb(); */ 129 /* mb(); */
130 pc->data_tail = tail; 130 pc->data_tail = tail;
131 } 131 }
132 132
133 /* 133 /*
134 * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all 134 * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
135 * counters in the current task. 135 * counters in the current task.
136 */ 136 */
137 #define PR_TASK_PERF_EVENTS_DISABLE 31 137 #define PR_TASK_PERF_EVENTS_DISABLE 31
138 #define PR_TASK_PERF_EVENTS_ENABLE 32 138 #define PR_TASK_PERF_EVENTS_ENABLE 32
139 139
140 #ifndef NSEC_PER_SEC 140 #ifndef NSEC_PER_SEC
141 # define NSEC_PER_SEC 1000000000ULL 141 # define NSEC_PER_SEC 1000000000ULL
142 #endif 142 #endif
143 143
144 static inline unsigned long long rdclock(void) 144 static inline unsigned long long rdclock(void)
145 { 145 {
146 struct timespec ts; 146 struct timespec ts;
147 147
148 clock_gettime(CLOCK_MONOTONIC, &ts); 148 clock_gettime(CLOCK_MONOTONIC, &ts);
149 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 149 return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
150 } 150 }
151 151
152 /* 152 /*
153 * Pick up some kernel type conventions: 153 * Pick up some kernel type conventions:
154 */ 154 */
155 #define __user 155 #define __user
156 #define asmlinkage 156 #define asmlinkage
157 157
158 #define unlikely(x) __builtin_expect(!!(x), 0) 158 #define unlikely(x) __builtin_expect(!!(x), 0)
159 #define min(x, y) ({ \ 159 #define min(x, y) ({ \
160 typeof(x) _min1 = (x); \ 160 typeof(x) _min1 = (x); \
161 typeof(y) _min2 = (y); \ 161 typeof(y) _min2 = (y); \
162 (void) (&_min1 == &_min2); \ 162 (void) (&_min1 == &_min2); \
163 _min1 < _min2 ? _min1 : _min2; }) 163 _min1 < _min2 ? _min1 : _min2; })
164 164
165 static inline int 165 static inline int
166 sys_perf_event_open(struct perf_event_attr *attr, 166 sys_perf_event_open(struct perf_event_attr *attr,
167 pid_t pid, int cpu, int group_fd, 167 pid_t pid, int cpu, int group_fd,
168 unsigned long flags) 168 unsigned long flags)
169 { 169 {
170 attr->size = sizeof(*attr); 170 attr->size = sizeof(*attr);
171 return syscall(__NR_perf_event_open, attr, pid, cpu, 171 return syscall(__NR_perf_event_open, attr, pid, cpu,
172 group_fd, flags); 172 group_fd, flags);
173 } 173 }
174 174
175 #define MAX_COUNTERS 256 175 #define MAX_COUNTERS 256
176 #define MAX_NR_CPUS 256 176 #define MAX_NR_CPUS 256
177 177
178 struct ip_callchain { 178 struct ip_callchain {
179 u64 nr; 179 u64 nr;
180 u64 ips[0]; 180 u64 ips[0];
181 }; 181 };
182 182
183 extern bool perf_host, perf_guest; 183 extern bool perf_host, perf_guest;
184 extern const char perf_version_string[]; 184 extern const char perf_version_string[];
185 185
186 void pthread__unblock_sigwinch(void); 186 void pthread__unblock_sigwinch(void);
187 187
188 struct perf_record_opts { 188 struct perf_record_opts {
189 pid_t target_pid; 189 pid_t target_pid;
190 pid_t target_tid; 190 pid_t target_tid;
191 bool call_graph; 191 bool call_graph;
192 bool group; 192 bool group;
193 bool inherit_stat; 193 bool inherit_stat;
194 bool no_delay; 194 bool no_delay;
195 bool no_inherit; 195 bool no_inherit;
196 bool no_samples; 196 bool no_samples;
197 bool pipe_output; 197 bool pipe_output;
198 bool raw_samples; 198 bool raw_samples;
199 bool sample_address; 199 bool sample_address;
200 bool sample_time; 200 bool sample_time;
201 bool sample_id_all_avail; 201 bool sample_id_all_avail;
202 bool system_wide; 202 bool system_wide;
203 bool period;
203 unsigned int freq; 204 unsigned int freq;
204 unsigned int mmap_pages; 205 unsigned int mmap_pages;
205 unsigned int user_freq; 206 unsigned int user_freq;
206 u64 default_interval; 207 u64 default_interval;
207 u64 user_interval; 208 u64 user_interval;
208 const char *cpu_list; 209 const char *cpu_list;
209 }; 210 };
210 211
211 #endif 212 #endif
212 213
tools/perf/util/evsel.c
1 /* 1 /*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 * 3 *
4 * Parts came from builtin-{top,stat,record}.c, see those files for further 4 * Parts came from builtin-{top,stat,record}.c, see those files for further
5 * copyright notes. 5 * copyright notes.
6 * 6 *
7 * Released under the GPL v2. (and only v2, not any later version) 7 * Released under the GPL v2. (and only v2, not any later version)
8 */ 8 */
9 9
10 #include <byteswap.h> 10 #include <byteswap.h>
11 #include "asm/bug.h" 11 #include "asm/bug.h"
12 #include "evsel.h" 12 #include "evsel.h"
13 #include "evlist.h" 13 #include "evlist.h"
14 #include "util.h" 14 #include "util.h"
15 #include "cpumap.h" 15 #include "cpumap.h"
16 #include "thread_map.h" 16 #include "thread_map.h"
17 17
18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) 19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
20 20
21 int __perf_evsel__sample_size(u64 sample_type) 21 int __perf_evsel__sample_size(u64 sample_type)
22 { 22 {
23 u64 mask = sample_type & PERF_SAMPLE_MASK; 23 u64 mask = sample_type & PERF_SAMPLE_MASK;
24 int size = 0; 24 int size = 0;
25 int i; 25 int i;
26 26
27 for (i = 0; i < 64; i++) { 27 for (i = 0; i < 64; i++) {
28 if (mask & (1ULL << i)) 28 if (mask & (1ULL << i))
29 size++; 29 size++;
30 } 30 }
31 31
32 size *= sizeof(u64); 32 size *= sizeof(u64);
33 33
34 return size; 34 return size;
35 } 35 }
36 36
37 static void hists__init(struct hists *hists) 37 static void hists__init(struct hists *hists)
38 { 38 {
39 memset(hists, 0, sizeof(*hists)); 39 memset(hists, 0, sizeof(*hists));
40 hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; 40 hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
41 hists->entries_in = &hists->entries_in_array[0]; 41 hists->entries_in = &hists->entries_in_array[0];
42 hists->entries_collapsed = RB_ROOT; 42 hists->entries_collapsed = RB_ROOT;
43 hists->entries = RB_ROOT; 43 hists->entries = RB_ROOT;
44 pthread_mutex_init(&hists->lock, NULL); 44 pthread_mutex_init(&hists->lock, NULL);
45 } 45 }
46 46
47 void perf_evsel__init(struct perf_evsel *evsel, 47 void perf_evsel__init(struct perf_evsel *evsel,
48 struct perf_event_attr *attr, int idx) 48 struct perf_event_attr *attr, int idx)
49 { 49 {
50 evsel->idx = idx; 50 evsel->idx = idx;
51 evsel->attr = *attr; 51 evsel->attr = *attr;
52 INIT_LIST_HEAD(&evsel->node); 52 INIT_LIST_HEAD(&evsel->node);
53 hists__init(&evsel->hists); 53 hists__init(&evsel->hists);
54 } 54 }
55 55
56 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 56 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
57 { 57 {
58 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 58 struct perf_evsel *evsel = zalloc(sizeof(*evsel));
59 59
60 if (evsel != NULL) 60 if (evsel != NULL)
61 perf_evsel__init(evsel, attr, idx); 61 perf_evsel__init(evsel, attr, idx);
62 62
63 return evsel; 63 return evsel;
64 } 64 }
65 65
66 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) 66 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
67 { 67 {
68 struct perf_event_attr *attr = &evsel->attr; 68 struct perf_event_attr *attr = &evsel->attr;
69 int track = !evsel->idx; /* only the first counter needs these */ 69 int track = !evsel->idx; /* only the first counter needs these */
70 70
71 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 71 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
72 attr->inherit = !opts->no_inherit; 72 attr->inherit = !opts->no_inherit;
73 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 73 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
74 PERF_FORMAT_TOTAL_TIME_RUNNING | 74 PERF_FORMAT_TOTAL_TIME_RUNNING |
75 PERF_FORMAT_ID; 75 PERF_FORMAT_ID;
76 76
77 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 77 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
78 78
79 /* 79 /*
80 * We default some events to a 1 default interval. But keep 80 * We default some events to a 1 default interval. But keep
81 * it a weak assumption overridable by the user. 81 * it a weak assumption overridable by the user.
82 */ 82 */
83 if (!attr->sample_period || (opts->user_freq != UINT_MAX && 83 if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
84 opts->user_interval != ULLONG_MAX)) { 84 opts->user_interval != ULLONG_MAX)) {
85 if (opts->freq) { 85 if (opts->freq) {
86 attr->sample_type |= PERF_SAMPLE_PERIOD; 86 attr->sample_type |= PERF_SAMPLE_PERIOD;
87 attr->freq = 1; 87 attr->freq = 1;
88 attr->sample_freq = opts->freq; 88 attr->sample_freq = opts->freq;
89 } else { 89 } else {
90 attr->sample_period = opts->default_interval; 90 attr->sample_period = opts->default_interval;
91 } 91 }
92 } 92 }
93 93
94 if (opts->no_samples) 94 if (opts->no_samples)
95 attr->sample_freq = 0; 95 attr->sample_freq = 0;
96 96
97 if (opts->inherit_stat) 97 if (opts->inherit_stat)
98 attr->inherit_stat = 1; 98 attr->inherit_stat = 1;
99 99
100 if (opts->sample_address) { 100 if (opts->sample_address) {
101 attr->sample_type |= PERF_SAMPLE_ADDR; 101 attr->sample_type |= PERF_SAMPLE_ADDR;
102 attr->mmap_data = track; 102 attr->mmap_data = track;
103 } 103 }
104 104
105 if (opts->call_graph) 105 if (opts->call_graph)
106 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 106 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
107 107
108 if (opts->system_wide) 108 if (opts->system_wide)
109 attr->sample_type |= PERF_SAMPLE_CPU; 109 attr->sample_type |= PERF_SAMPLE_CPU;
110 110
111 if (opts->period)
112 attr->sample_type |= PERF_SAMPLE_PERIOD;
113
111 if (opts->sample_id_all_avail && 114 if (opts->sample_id_all_avail &&
112 (opts->sample_time || opts->system_wide || 115 (opts->sample_time || opts->system_wide ||
113 !opts->no_inherit || opts->cpu_list)) 116 !opts->no_inherit || opts->cpu_list))
114 attr->sample_type |= PERF_SAMPLE_TIME; 117 attr->sample_type |= PERF_SAMPLE_TIME;
115 118
116 if (opts->raw_samples) { 119 if (opts->raw_samples) {
117 attr->sample_type |= PERF_SAMPLE_TIME; 120 attr->sample_type |= PERF_SAMPLE_TIME;
118 attr->sample_type |= PERF_SAMPLE_RAW; 121 attr->sample_type |= PERF_SAMPLE_RAW;
119 attr->sample_type |= PERF_SAMPLE_CPU; 122 attr->sample_type |= PERF_SAMPLE_CPU;
120 } 123 }
121 124
122 if (opts->no_delay) { 125 if (opts->no_delay) {
123 attr->watermark = 0; 126 attr->watermark = 0;
124 attr->wakeup_events = 1; 127 attr->wakeup_events = 1;
125 } 128 }
126 129
127 attr->mmap = track; 130 attr->mmap = track;
128 attr->comm = track; 131 attr->comm = track;
129 132
130 if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { 133 if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) {
131 attr->disabled = 1; 134 attr->disabled = 1;
132 attr->enable_on_exec = 1; 135 attr->enable_on_exec = 1;
133 } 136 }
134 } 137 }
135 138
136 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 139 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
137 { 140 {
138 int cpu, thread; 141 int cpu, thread;
139 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 142 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
140 143
141 if (evsel->fd) { 144 if (evsel->fd) {
142 for (cpu = 0; cpu < ncpus; cpu++) { 145 for (cpu = 0; cpu < ncpus; cpu++) {
143 for (thread = 0; thread < nthreads; thread++) { 146 for (thread = 0; thread < nthreads; thread++) {
144 FD(evsel, cpu, thread) = -1; 147 FD(evsel, cpu, thread) = -1;
145 } 148 }
146 } 149 }
147 } 150 }
148 151
149 return evsel->fd != NULL ? 0 : -ENOMEM; 152 return evsel->fd != NULL ? 0 : -ENOMEM;
150 } 153 }
151 154
152 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) 155 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
153 { 156 {
154 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); 157 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
155 if (evsel->sample_id == NULL) 158 if (evsel->sample_id == NULL)
156 return -ENOMEM; 159 return -ENOMEM;
157 160
158 evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); 161 evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
159 if (evsel->id == NULL) { 162 if (evsel->id == NULL) {
160 xyarray__delete(evsel->sample_id); 163 xyarray__delete(evsel->sample_id);
161 evsel->sample_id = NULL; 164 evsel->sample_id = NULL;
162 return -ENOMEM; 165 return -ENOMEM;
163 } 166 }
164 167
165 return 0; 168 return 0;
166 } 169 }
167 170
168 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 171 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
169 { 172 {
170 evsel->counts = zalloc((sizeof(*evsel->counts) + 173 evsel->counts = zalloc((sizeof(*evsel->counts) +
171 (ncpus * sizeof(struct perf_counts_values)))); 174 (ncpus * sizeof(struct perf_counts_values))));
172 return evsel->counts != NULL ? 0 : -ENOMEM; 175 return evsel->counts != NULL ? 0 : -ENOMEM;
173 } 176 }
174 177
175 void perf_evsel__free_fd(struct perf_evsel *evsel) 178 void perf_evsel__free_fd(struct perf_evsel *evsel)
176 { 179 {
177 xyarray__delete(evsel->fd); 180 xyarray__delete(evsel->fd);
178 evsel->fd = NULL; 181 evsel->fd = NULL;
179 } 182 }
180 183
181 void perf_evsel__free_id(struct perf_evsel *evsel) 184 void perf_evsel__free_id(struct perf_evsel *evsel)
182 { 185 {
183 xyarray__delete(evsel->sample_id); 186 xyarray__delete(evsel->sample_id);
184 evsel->sample_id = NULL; 187 evsel->sample_id = NULL;
185 free(evsel->id); 188 free(evsel->id);
186 evsel->id = NULL; 189 evsel->id = NULL;
187 } 190 }
188 191
189 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 192 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
190 { 193 {
191 int cpu, thread; 194 int cpu, thread;
192 195
193 for (cpu = 0; cpu < ncpus; cpu++) 196 for (cpu = 0; cpu < ncpus; cpu++)
194 for (thread = 0; thread < nthreads; ++thread) { 197 for (thread = 0; thread < nthreads; ++thread) {
195 close(FD(evsel, cpu, thread)); 198 close(FD(evsel, cpu, thread));
196 FD(evsel, cpu, thread) = -1; 199 FD(evsel, cpu, thread) = -1;
197 } 200 }
198 } 201 }
199 202
200 void perf_evsel__exit(struct perf_evsel *evsel) 203 void perf_evsel__exit(struct perf_evsel *evsel)
201 { 204 {
202 assert(list_empty(&evsel->node)); 205 assert(list_empty(&evsel->node));
203 xyarray__delete(evsel->fd); 206 xyarray__delete(evsel->fd);
204 xyarray__delete(evsel->sample_id); 207 xyarray__delete(evsel->sample_id);
205 free(evsel->id); 208 free(evsel->id);
206 } 209 }
207 210
208 void perf_evsel__delete(struct perf_evsel *evsel) 211 void perf_evsel__delete(struct perf_evsel *evsel)
209 { 212 {
210 perf_evsel__exit(evsel); 213 perf_evsel__exit(evsel);
211 close_cgroup(evsel->cgrp); 214 close_cgroup(evsel->cgrp);
212 free(evsel->name); 215 free(evsel->name);
213 free(evsel); 216 free(evsel);
214 } 217 }
215 218
216 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 219 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
217 int cpu, int thread, bool scale) 220 int cpu, int thread, bool scale)
218 { 221 {
219 struct perf_counts_values count; 222 struct perf_counts_values count;
220 size_t nv = scale ? 3 : 1; 223 size_t nv = scale ? 3 : 1;
221 224
222 if (FD(evsel, cpu, thread) < 0) 225 if (FD(evsel, cpu, thread) < 0)
223 return -EINVAL; 226 return -EINVAL;
224 227
225 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) 228 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
226 return -ENOMEM; 229 return -ENOMEM;
227 230
228 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 231 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
229 return -errno; 232 return -errno;
230 233
231 if (scale) { 234 if (scale) {
232 if (count.run == 0) 235 if (count.run == 0)
233 count.val = 0; 236 count.val = 0;
234 else if (count.run < count.ena) 237 else if (count.run < count.ena)
235 count.val = (u64)((double)count.val * count.ena / count.run + 0.5); 238 count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
236 } else 239 } else
237 count.ena = count.run = 0; 240 count.ena = count.run = 0;
238 241
239 evsel->counts->cpu[cpu] = count; 242 evsel->counts->cpu[cpu] = count;
240 return 0; 243 return 0;
241 } 244 }
242 245
243 int __perf_evsel__read(struct perf_evsel *evsel, 246 int __perf_evsel__read(struct perf_evsel *evsel,
244 int ncpus, int nthreads, bool scale) 247 int ncpus, int nthreads, bool scale)
245 { 248 {
246 size_t nv = scale ? 3 : 1; 249 size_t nv = scale ? 3 : 1;
247 int cpu, thread; 250 int cpu, thread;
248 struct perf_counts_values *aggr = &evsel->counts->aggr, count; 251 struct perf_counts_values *aggr = &evsel->counts->aggr, count;
249 252
250 aggr->val = aggr->ena = aggr->run = 0; 253 aggr->val = aggr->ena = aggr->run = 0;
251 254
252 for (cpu = 0; cpu < ncpus; cpu++) { 255 for (cpu = 0; cpu < ncpus; cpu++) {
253 for (thread = 0; thread < nthreads; thread++) { 256 for (thread = 0; thread < nthreads; thread++) {
254 if (FD(evsel, cpu, thread) < 0) 257 if (FD(evsel, cpu, thread) < 0)
255 continue; 258 continue;
256 259
257 if (readn(FD(evsel, cpu, thread), 260 if (readn(FD(evsel, cpu, thread),
258 &count, nv * sizeof(u64)) < 0) 261 &count, nv * sizeof(u64)) < 0)
259 return -errno; 262 return -errno;
260 263
261 aggr->val += count.val; 264 aggr->val += count.val;
262 if (scale) { 265 if (scale) {
263 aggr->ena += count.ena; 266 aggr->ena += count.ena;
264 aggr->run += count.run; 267 aggr->run += count.run;
265 } 268 }
266 } 269 }
267 } 270 }
268 271
269 evsel->counts->scaled = 0; 272 evsel->counts->scaled = 0;
270 if (scale) { 273 if (scale) {
271 if (aggr->run == 0) { 274 if (aggr->run == 0) {
272 evsel->counts->scaled = -1; 275 evsel->counts->scaled = -1;
273 aggr->val = 0; 276 aggr->val = 0;
274 return 0; 277 return 0;
275 } 278 }
276 279
277 if (aggr->run < aggr->ena) { 280 if (aggr->run < aggr->ena) {
278 evsel->counts->scaled = 1; 281 evsel->counts->scaled = 1;
279 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); 282 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
280 } 283 }
281 } else 284 } else
282 aggr->ena = aggr->run = 0; 285 aggr->ena = aggr->run = 0;
283 286
284 return 0; 287 return 0;
285 } 288 }
286 289
287 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 290 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
288 struct thread_map *threads, bool group, 291 struct thread_map *threads, bool group,
289 struct xyarray *group_fds) 292 struct xyarray *group_fds)
290 { 293 {
291 int cpu, thread; 294 int cpu, thread;
292 unsigned long flags = 0; 295 unsigned long flags = 0;
293 int pid = -1, err; 296 int pid = -1, err;
294 297
295 if (evsel->fd == NULL && 298 if (evsel->fd == NULL &&
296 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 299 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
297 return -ENOMEM; 300 return -ENOMEM;
298 301
299 if (evsel->cgrp) { 302 if (evsel->cgrp) {
300 flags = PERF_FLAG_PID_CGROUP; 303 flags = PERF_FLAG_PID_CGROUP;
301 pid = evsel->cgrp->fd; 304 pid = evsel->cgrp->fd;
302 } 305 }
303 306
304 for (cpu = 0; cpu < cpus->nr; cpu++) { 307 for (cpu = 0; cpu < cpus->nr; cpu++) {
305 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1; 308 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1;
306 309
307 for (thread = 0; thread < threads->nr; thread++) { 310 for (thread = 0; thread < threads->nr; thread++) {
308 311
309 if (!evsel->cgrp) 312 if (!evsel->cgrp)
310 pid = threads->map[thread]; 313 pid = threads->map[thread];
311 314
312 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 315 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
313 pid, 316 pid,
314 cpus->map[cpu], 317 cpus->map[cpu],
315 group_fd, flags); 318 group_fd, flags);
316 if (FD(evsel, cpu, thread) < 0) { 319 if (FD(evsel, cpu, thread) < 0) {
317 err = -errno; 320 err = -errno;
318 goto out_close; 321 goto out_close;
319 } 322 }
320 323
321 if (group && group_fd == -1) 324 if (group && group_fd == -1)
322 group_fd = FD(evsel, cpu, thread); 325 group_fd = FD(evsel, cpu, thread);
323 } 326 }
324 } 327 }
325 328
326 return 0; 329 return 0;
327 330
328 out_close: 331 out_close:
329 do { 332 do {
330 while (--thread >= 0) { 333 while (--thread >= 0) {
331 close(FD(evsel, cpu, thread)); 334 close(FD(evsel, cpu, thread));
332 FD(evsel, cpu, thread) = -1; 335 FD(evsel, cpu, thread) = -1;
333 } 336 }
334 thread = threads->nr; 337 thread = threads->nr;
335 } while (--cpu >= 0); 338 } while (--cpu >= 0);
336 return err; 339 return err;
337 } 340 }
338 341
339 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads) 342 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
340 { 343 {
341 if (evsel->fd == NULL) 344 if (evsel->fd == NULL)
342 return; 345 return;
343 346
344 perf_evsel__close_fd(evsel, ncpus, nthreads); 347 perf_evsel__close_fd(evsel, ncpus, nthreads);
345 perf_evsel__free_fd(evsel); 348 perf_evsel__free_fd(evsel);
346 evsel->fd = NULL; 349 evsel->fd = NULL;
347 } 350 }
348 351
349 static struct { 352 static struct {
350 struct cpu_map map; 353 struct cpu_map map;
351 int cpus[1]; 354 int cpus[1];
352 } empty_cpu_map = { 355 } empty_cpu_map = {
353 .map.nr = 1, 356 .map.nr = 1,
354 .cpus = { -1, }, 357 .cpus = { -1, },
355 }; 358 };
356 359
357 static struct { 360 static struct {
358 struct thread_map map; 361 struct thread_map map;
359 int threads[1]; 362 int threads[1];
360 } empty_thread_map = { 363 } empty_thread_map = {
361 .map.nr = 1, 364 .map.nr = 1,
362 .threads = { -1, }, 365 .threads = { -1, },
363 }; 366 };
364 367
365 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 368 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
366 struct thread_map *threads, bool group, 369 struct thread_map *threads, bool group,
367 struct xyarray *group_fd) 370 struct xyarray *group_fd)
368 { 371 {
369 if (cpus == NULL) { 372 if (cpus == NULL) {
370 /* Work around old compiler warnings about strict aliasing */ 373 /* Work around old compiler warnings about strict aliasing */
371 cpus = &empty_cpu_map.map; 374 cpus = &empty_cpu_map.map;
372 } 375 }
373 376
374 if (threads == NULL) 377 if (threads == NULL)
375 threads = &empty_thread_map.map; 378 threads = &empty_thread_map.map;
376 379
377 return __perf_evsel__open(evsel, cpus, threads, group, group_fd); 380 return __perf_evsel__open(evsel, cpus, threads, group, group_fd);
378 } 381 }
379 382
380 int perf_evsel__open_per_cpu(struct perf_evsel *evsel, 383 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
381 struct cpu_map *cpus, bool group, 384 struct cpu_map *cpus, bool group,
382 struct xyarray *group_fd) 385 struct xyarray *group_fd)
383 { 386 {
384 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, 387 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group,
385 group_fd); 388 group_fd);
386 } 389 }
387 390
388 int perf_evsel__open_per_thread(struct perf_evsel *evsel, 391 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
389 struct thread_map *threads, bool group, 392 struct thread_map *threads, bool group,
390 struct xyarray *group_fd) 393 struct xyarray *group_fd)
391 { 394 {
392 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, 395 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group,
393 group_fd); 396 group_fd);
394 } 397 }
395 398
396 static int perf_event__parse_id_sample(const union perf_event *event, u64 type, 399 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
397 struct perf_sample *sample) 400 struct perf_sample *sample)
398 { 401 {
399 const u64 *array = event->sample.array; 402 const u64 *array = event->sample.array;
400 403
401 array += ((event->header.size - 404 array += ((event->header.size -
402 sizeof(event->header)) / sizeof(u64)) - 1; 405 sizeof(event->header)) / sizeof(u64)) - 1;
403 406
404 if (type & PERF_SAMPLE_CPU) { 407 if (type & PERF_SAMPLE_CPU) {
405 u32 *p = (u32 *)array; 408 u32 *p = (u32 *)array;
406 sample->cpu = *p; 409 sample->cpu = *p;
407 array--; 410 array--;
408 } 411 }
409 412
410 if (type & PERF_SAMPLE_STREAM_ID) { 413 if (type & PERF_SAMPLE_STREAM_ID) {
411 sample->stream_id = *array; 414 sample->stream_id = *array;
412 array--; 415 array--;
413 } 416 }
414 417
415 if (type & PERF_SAMPLE_ID) { 418 if (type & PERF_SAMPLE_ID) {
416 sample->id = *array; 419 sample->id = *array;
417 array--; 420 array--;
418 } 421 }
419 422
420 if (type & PERF_SAMPLE_TIME) { 423 if (type & PERF_SAMPLE_TIME) {
421 sample->time = *array; 424 sample->time = *array;
422 array--; 425 array--;
423 } 426 }
424 427
425 if (type & PERF_SAMPLE_TID) { 428 if (type & PERF_SAMPLE_TID) {
426 u32 *p = (u32 *)array; 429 u32 *p = (u32 *)array;
427 sample->pid = p[0]; 430 sample->pid = p[0];
428 sample->tid = p[1]; 431 sample->tid = p[1];
429 } 432 }
430 433
431 return 0; 434 return 0;
432 } 435 }
433 436
434 static bool sample_overlap(const union perf_event *event, 437 static bool sample_overlap(const union perf_event *event,
435 const void *offset, u64 size) 438 const void *offset, u64 size)
436 { 439 {
437 const void *base = event; 440 const void *base = event;
438 441
439 if (offset + size > base + event->header.size) 442 if (offset + size > base + event->header.size)
440 return true; 443 return true;
441 444
442 return false; 445 return false;
443 } 446 }
444 447
445 int perf_event__parse_sample(const union perf_event *event, u64 type, 448 int perf_event__parse_sample(const union perf_event *event, u64 type,
446 int sample_size, bool sample_id_all, 449 int sample_size, bool sample_id_all,
447 struct perf_sample *data, bool swapped) 450 struct perf_sample *data, bool swapped)
448 { 451 {
449 const u64 *array; 452 const u64 *array;
450 453
451 /* 454 /*
452 * used for cross-endian analysis. See git commit 65014ab3 455 * used for cross-endian analysis. See git commit 65014ab3
453 * for why this goofiness is needed. 456 * for why this goofiness is needed.
454 */ 457 */
455 union { 458 union {
456 u64 val64; 459 u64 val64;
457 u32 val32[2]; 460 u32 val32[2];
458 } u; 461 } u;
459 462
460 463
461 data->cpu = data->pid = data->tid = -1; 464 data->cpu = data->pid = data->tid = -1;
462 data->stream_id = data->id = data->time = -1ULL; 465 data->stream_id = data->id = data->time = -1ULL;
463 466
464 if (event->header.type != PERF_RECORD_SAMPLE) { 467 if (event->header.type != PERF_RECORD_SAMPLE) {
465 if (!sample_id_all) 468 if (!sample_id_all)
466 return 0; 469 return 0;
467 return perf_event__parse_id_sample(event, type, data); 470 return perf_event__parse_id_sample(event, type, data);
468 } 471 }
469 472
470 array = event->sample.array; 473 array = event->sample.array;
471 474
472 if (sample_size + sizeof(event->header) > event->header.size) 475 if (sample_size + sizeof(event->header) > event->header.size)
473 return -EFAULT; 476 return -EFAULT;
474 477
475 if (type & PERF_SAMPLE_IP) { 478 if (type & PERF_SAMPLE_IP) {
476 data->ip = event->ip.ip; 479 data->ip = event->ip.ip;
477 array++; 480 array++;
478 } 481 }
479 482
480 if (type & PERF_SAMPLE_TID) { 483 if (type & PERF_SAMPLE_TID) {
481 u.val64 = *array; 484 u.val64 = *array;
482 if (swapped) { 485 if (swapped) {
483 /* undo swap of u64, then swap on individual u32s */ 486 /* undo swap of u64, then swap on individual u32s */
484 u.val64 = bswap_64(u.val64); 487 u.val64 = bswap_64(u.val64);
485 u.val32[0] = bswap_32(u.val32[0]); 488 u.val32[0] = bswap_32(u.val32[0]);
486 u.val32[1] = bswap_32(u.val32[1]); 489 u.val32[1] = bswap_32(u.val32[1]);
487 } 490 }
488 491
489 data->pid = u.val32[0]; 492 data->pid = u.val32[0];
490 data->tid = u.val32[1]; 493 data->tid = u.val32[1];
491 array++; 494 array++;
492 } 495 }
493 496
494 if (type & PERF_SAMPLE_TIME) { 497 if (type & PERF_SAMPLE_TIME) {
495 data->time = *array; 498 data->time = *array;
496 array++; 499 array++;
497 } 500 }
498 501
499 data->addr = 0; 502 data->addr = 0;
500 if (type & PERF_SAMPLE_ADDR) { 503 if (type & PERF_SAMPLE_ADDR) {
501 data->addr = *array; 504 data->addr = *array;
502 array++; 505 array++;
503 } 506 }
504 507
505 data->id = -1ULL; 508 data->id = -1ULL;
506 if (type & PERF_SAMPLE_ID) { 509 if (type & PERF_SAMPLE_ID) {
507 data->id = *array; 510 data->id = *array;
508 array++; 511 array++;
509 } 512 }
510 513
511 if (type & PERF_SAMPLE_STREAM_ID) { 514 if (type & PERF_SAMPLE_STREAM_ID) {
512 data->stream_id = *array; 515 data->stream_id = *array;
513 array++; 516 array++;
514 } 517 }
515 518
516 if (type & PERF_SAMPLE_CPU) { 519 if (type & PERF_SAMPLE_CPU) {
517 520
518 u.val64 = *array; 521 u.val64 = *array;
519 if (swapped) { 522 if (swapped) {
520 /* undo swap of u64, then swap on individual u32s */ 523 /* undo swap of u64, then swap on individual u32s */
521 u.val64 = bswap_64(u.val64); 524 u.val64 = bswap_64(u.val64);
522 u.val32[0] = bswap_32(u.val32[0]); 525 u.val32[0] = bswap_32(u.val32[0]);
523 } 526 }
524 527
525 data->cpu = u.val32[0]; 528 data->cpu = u.val32[0];
526 array++; 529 array++;
527 } 530 }
528 531
529 if (type & PERF_SAMPLE_PERIOD) { 532 if (type & PERF_SAMPLE_PERIOD) {
530 data->period = *array; 533 data->period = *array;
531 array++; 534 array++;
532 } 535 }
533 536
534 if (type & PERF_SAMPLE_READ) { 537 if (type & PERF_SAMPLE_READ) {
535 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); 538 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
536 return -1; 539 return -1;
537 } 540 }
538 541
539 if (type & PERF_SAMPLE_CALLCHAIN) { 542 if (type & PERF_SAMPLE_CALLCHAIN) {
540 if (sample_overlap(event, array, sizeof(data->callchain->nr))) 543 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
541 return -EFAULT; 544 return -EFAULT;
542 545
543 data->callchain = (struct ip_callchain *)array; 546 data->callchain = (struct ip_callchain *)array;
544 547
545 if (sample_overlap(event, array, data->callchain->nr)) 548 if (sample_overlap(event, array, data->callchain->nr))
546 return -EFAULT; 549 return -EFAULT;
547 550
548 array += 1 + data->callchain->nr; 551 array += 1 + data->callchain->nr;
549 } 552 }
550 553
551 if (type & PERF_SAMPLE_RAW) { 554 if (type & PERF_SAMPLE_RAW) {
552 const u64 *pdata; 555 const u64 *pdata;
553 556
554 u.val64 = *array; 557 u.val64 = *array;
555 if (WARN_ONCE(swapped, 558 if (WARN_ONCE(swapped,
556 "Endianness of raw data not corrected!\n")) { 559 "Endianness of raw data not corrected!\n")) {
557 /* undo swap of u64, then swap on individual u32s */ 560 /* undo swap of u64, then swap on individual u32s */
558 u.val64 = bswap_64(u.val64); 561 u.val64 = bswap_64(u.val64);
559 u.val32[0] = bswap_32(u.val32[0]); 562 u.val32[0] = bswap_32(u.val32[0]);
560 u.val32[1] = bswap_32(u.val32[1]); 563 u.val32[1] = bswap_32(u.val32[1]);
561 } 564 }
562 565
563 if (sample_overlap(event, array, sizeof(u32))) 566 if (sample_overlap(event, array, sizeof(u32)))
564 return -EFAULT; 567 return -EFAULT;
565 568
566 data->raw_size = u.val32[0]; 569 data->raw_size = u.val32[0];
567 pdata = (void *) array + sizeof(u32); 570 pdata = (void *) array + sizeof(u32);
568 571
569 if (sample_overlap(event, pdata, data->raw_size)) 572 if (sample_overlap(event, pdata, data->raw_size))
570 return -EFAULT; 573 return -EFAULT;
571 574
572 data->raw_data = (void *) pdata; 575 data->raw_data = (void *) pdata;
573 } 576 }
574 577
575 return 0; 578 return 0;
576 } 579 }
577 580
578 int perf_event__synthesize_sample(union perf_event *event, u64 type, 581 int perf_event__synthesize_sample(union perf_event *event, u64 type,
579 const struct perf_sample *sample, 582 const struct perf_sample *sample,
580 bool swapped) 583 bool swapped)
581 { 584 {
582 u64 *array; 585 u64 *array;
583 586
584 /* 587 /*
585 * used for cross-endian analysis. See git commit 65014ab3 588 * used for cross-endian analysis. See git commit 65014ab3
586 * for why this goofiness is needed. 589 * for why this goofiness is needed.
587 */ 590 */
588 union { 591 union {
589 u64 val64; 592 u64 val64;
590 u32 val32[2]; 593 u32 val32[2];
591 } u; 594 } u;
592 595
593 array = event->sample.array; 596 array = event->sample.array;
594 597
595 if (type & PERF_SAMPLE_IP) { 598 if (type & PERF_SAMPLE_IP) {
596 event->ip.ip = sample->ip; 599 event->ip.ip = sample->ip;
597 array++; 600 array++;
598 } 601 }
599 602
600 if (type & PERF_SAMPLE_TID) { 603 if (type & PERF_SAMPLE_TID) {
601 u.val32[0] = sample->pid; 604 u.val32[0] = sample->pid;
602 u.val32[1] = sample->tid; 605 u.val32[1] = sample->tid;
603 if (swapped) { 606 if (swapped) {
604 /* 607 /*
605 * Inverse of what is done in perf_event__parse_sample 608 * Inverse of what is done in perf_event__parse_sample
606 */ 609 */
607 u.val32[0] = bswap_32(u.val32[0]); 610 u.val32[0] = bswap_32(u.val32[0]);
608 u.val32[1] = bswap_32(u.val32[1]); 611 u.val32[1] = bswap_32(u.val32[1]);
609 u.val64 = bswap_64(u.val64); 612 u.val64 = bswap_64(u.val64);
610 } 613 }
611 614
612 *array = u.val64; 615 *array = u.val64;
613 array++; 616 array++;
614 } 617 }
615 618
616 if (type & PERF_SAMPLE_TIME) { 619 if (type & PERF_SAMPLE_TIME) {
617 *array = sample->time; 620 *array = sample->time;
618 array++; 621 array++;
619 } 622 }
620 623
621 if (type & PERF_SAMPLE_ADDR) { 624 if (type & PERF_SAMPLE_ADDR) {
622 *array = sample->addr; 625 *array = sample->addr;
623 array++; 626 array++;
624 } 627 }
625 628
626 if (type & PERF_SAMPLE_ID) { 629 if (type & PERF_SAMPLE_ID) {
627 *array = sample->id; 630 *array = sample->id;
628 array++; 631 array++;
629 } 632 }
630 633
631 if (type & PERF_SAMPLE_STREAM_ID) { 634 if (type & PERF_SAMPLE_STREAM_ID) {
632 *array = sample->stream_id; 635 *array = sample->stream_id;
633 array++; 636 array++;
634 } 637 }
635 638
636 if (type & PERF_SAMPLE_CPU) { 639 if (type & PERF_SAMPLE_CPU) {
637 u.val32[0] = sample->cpu; 640 u.val32[0] = sample->cpu;
638 if (swapped) { 641 if (swapped) {
639 /* 642 /*
640 * Inverse of what is done in perf_event__parse_sample 643 * Inverse of what is done in perf_event__parse_sample
641 */ 644 */
642 u.val32[0] = bswap_32(u.val32[0]); 645 u.val32[0] = bswap_32(u.val32[0]);
643 u.val64 = bswap_64(u.val64); 646 u.val64 = bswap_64(u.val64);
644 } 647 }
645 *array = u.val64; 648 *array = u.val64;
646 array++; 649 array++;
647 } 650 }
648 651
649 if (type & PERF_SAMPLE_PERIOD) { 652 if (type & PERF_SAMPLE_PERIOD) {
650 *array = sample->period; 653 *array = sample->period;
651 array++; 654 array++;
652 } 655 }
653 656
654 return 0; 657 return 0;
655 } 658 }
656 659