Commit d6d901c23a9c4c7361aa901b5b2dda69703dd5e0
Committed by
Ingo Molnar
1 parent
46be604b5b
Exists in
master
and in
39 other branches
perf events: Change perf parameter --pid to process-wide collection instead of thread-wide
Parameter --pid (or -p) of perf currently means a thread-wide collection. For exmaple, if a process whose id is 8888 has 10 threads, 'perf top -p 8888' just collects the main thread statistics. That's misleading. Users are used to attach a whole process when debugging a process by gdb. To follow normal usage style, the patch change --pid to process-wide collection and add --tid (-t) to mean a thread-wide collection. Usage example is: # perf top -p 8888 # perf record -p 8888 -f sleep 10 # perf stat -p 8888 -f sleep 10 Above commands collect the statistics of all threads of process 8888. Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Avi Kivity <avi@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Sheng Yang <sheng@linux.intel.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Jes Sorensen <Jes.Sorensen@redhat.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Cc: zhiteng.huang@intel.com Cc: Zachary Amsden <zamsden@redhat.com> LKML-Reference: <1268922965-14774-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 5 changed files with 372 additions and 193 deletions Side-by-side Diff
tools/perf/builtin-record.c
... | ... | @@ -27,7 +27,7 @@ |
27 | 27 | #include <unistd.h> |
28 | 28 | #include <sched.h> |
29 | 29 | |
30 | -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | |
30 | +static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | |
31 | 31 | |
32 | 32 | static long default_interval = 0; |
33 | 33 | |
... | ... | @@ -43,6 +43,9 @@ |
43 | 43 | static int system_wide = 0; |
44 | 44 | static int profile_cpu = -1; |
45 | 45 | static pid_t target_pid = -1; |
46 | +static pid_t target_tid = -1; | |
47 | +static pid_t *all_tids = NULL; | |
48 | +static int thread_num = 0; | |
46 | 49 | static pid_t child_pid = -1; |
47 | 50 | static int inherit = 1; |
48 | 51 | static int force = 0; |
... | ... | @@ -60,7 +63,7 @@ |
60 | 63 | |
61 | 64 | static u64 bytes_written = 0; |
62 | 65 | |
63 | -static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | |
66 | +static struct pollfd *event_array; | |
64 | 67 | |
65 | 68 | static int nr_poll = 0; |
66 | 69 | static int nr_cpu = 0; |
... | ... | @@ -77,7 +80,7 @@ |
77 | 80 | unsigned int prev; |
78 | 81 | }; |
79 | 82 | |
80 | -static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | |
83 | +static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | |
81 | 84 | |
82 | 85 | static unsigned long mmap_read_head(struct mmap_data *md) |
83 | 86 | { |
84 | 87 | |
... | ... | @@ -225,12 +228,13 @@ |
225 | 228 | return h_attr; |
226 | 229 | } |
227 | 230 | |
228 | -static void create_counter(int counter, int cpu, pid_t pid) | |
231 | +static void create_counter(int counter, int cpu) | |
229 | 232 | { |
230 | 233 | char *filter = filters[counter]; |
231 | 234 | struct perf_event_attr *attr = attrs + counter; |
232 | 235 | struct perf_header_attr *h_attr; |
233 | 236 | int track = !counter; /* only the first counter needs these */ |
237 | + int thread_index; | |
234 | 238 | int ret; |
235 | 239 | struct { |
236 | 240 | u64 count; |
237 | 241 | |
238 | 242 | |
239 | 243 | |
240 | 244 | |
241 | 245 | |
242 | 246 | |
243 | 247 | |
244 | 248 | |
245 | 249 | |
246 | 250 | |
247 | 251 | |
248 | 252 | |
249 | 253 | |
250 | 254 | |
251 | 255 | |
252 | 256 | |
253 | 257 | |
254 | 258 | |
255 | 259 | |
256 | 260 | |
... | ... | @@ -280,115 +284,124 @@ |
280 | 284 | attr->enable_on_exec = 1; |
281 | 285 | } |
282 | 286 | |
287 | + for (thread_index = 0; thread_index < thread_num; thread_index++) { | |
283 | 288 | try_again: |
284 | - fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0); | |
289 | + fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr, | |
290 | + all_tids[thread_index], cpu, group_fd, 0); | |
285 | 291 | |
286 | - if (fd[nr_cpu][counter] < 0) { | |
287 | - int err = errno; | |
292 | + if (fd[nr_cpu][counter][thread_index] < 0) { | |
293 | + int err = errno; | |
288 | 294 | |
289 | - if (err == EPERM || err == EACCES) | |
290 | - die("Permission error - are you root?\n" | |
291 | - "\t Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n"); | |
292 | - else if (err == ENODEV && profile_cpu != -1) | |
293 | - die("No such device - did you specify an out-of-range profile CPU?\n"); | |
295 | + if (err == EPERM || err == EACCES) | |
296 | + die("Permission error - are you root?\n" | |
297 | + "\t Consider tweaking" | |
298 | + " /proc/sys/kernel/perf_event_paranoid.\n"); | |
299 | + else if (err == ENODEV && profile_cpu != -1) { | |
300 | + die("No such device - did you specify" | |
301 | + " an out-of-range profile CPU?\n"); | |
302 | + } | |
294 | 303 | |
295 | - /* | |
296 | - * If it's cycles then fall back to hrtimer | |
297 | - * based cpu-clock-tick sw counter, which | |
298 | - * is always available even if no PMU support: | |
299 | - */ | |
300 | - if (attr->type == PERF_TYPE_HARDWARE | |
301 | - && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | |
304 | + /* | |
305 | + * If it's cycles then fall back to hrtimer | |
306 | + * based cpu-clock-tick sw counter, which | |
307 | + * is always available even if no PMU support: | |
308 | + */ | |
309 | + if (attr->type == PERF_TYPE_HARDWARE | |
310 | + && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | |
302 | 311 | |
303 | - if (verbose) | |
304 | - warning(" ... trying to fall back to cpu-clock-ticks\n"); | |
305 | - attr->type = PERF_TYPE_SOFTWARE; | |
306 | - attr->config = PERF_COUNT_SW_CPU_CLOCK; | |
307 | - goto try_again; | |
308 | - } | |
309 | - printf("\n"); | |
310 | - error("perfcounter syscall returned with %d (%s)\n", | |
311 | - fd[nr_cpu][counter], strerror(err)); | |
312 | + if (verbose) | |
313 | + warning(" ... trying to fall back to cpu-clock-ticks\n"); | |
314 | + attr->type = PERF_TYPE_SOFTWARE; | |
315 | + attr->config = PERF_COUNT_SW_CPU_CLOCK; | |
316 | + goto try_again; | |
317 | + } | |
318 | + printf("\n"); | |
319 | + error("perfcounter syscall returned with %d (%s)\n", | |
320 | + fd[nr_cpu][counter][thread_index], strerror(err)); | |
312 | 321 | |
313 | 322 | #if defined(__i386__) || defined(__x86_64__) |
314 | - if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) | |
315 | - die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n"); | |
323 | + if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) | |
324 | + die("No hardware sampling interrupt available." | |
325 | + " No APIC? If so then you can boot the kernel" | |
326 | + " with the \"lapic\" boot parameter to" | |
327 | + " force-enable it.\n"); | |
316 | 328 | #endif |
317 | 329 | |
318 | - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | |
319 | - exit(-1); | |
320 | - } | |
330 | + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | |
331 | + exit(-1); | |
332 | + } | |
321 | 333 | |
322 | - h_attr = get_header_attr(attr, counter); | |
323 | - if (h_attr == NULL) | |
324 | - die("nomem\n"); | |
334 | + h_attr = get_header_attr(attr, counter); | |
335 | + if (h_attr == NULL) | |
336 | + die("nomem\n"); | |
325 | 337 | |
326 | - if (!file_new) { | |
327 | - if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { | |
328 | - fprintf(stderr, "incompatible append\n"); | |
338 | + if (!file_new) { | |
339 | + if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { | |
340 | + fprintf(stderr, "incompatible append\n"); | |
341 | + exit(-1); | |
342 | + } | |
343 | + } | |
344 | + | |
345 | + if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) { | |
346 | + perror("Unable to read perf file descriptor\n"); | |
329 | 347 | exit(-1); |
330 | 348 | } |
331 | - } | |
332 | 349 | |
333 | - if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) { | |
334 | - perror("Unable to read perf file descriptor\n"); | |
335 | - exit(-1); | |
336 | - } | |
350 | + if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { | |
351 | + pr_warning("Not enough memory to add id\n"); | |
352 | + exit(-1); | |
353 | + } | |
337 | 354 | |
338 | - if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { | |
339 | - pr_warning("Not enough memory to add id\n"); | |
340 | - exit(-1); | |
341 | - } | |
355 | + assert(fd[nr_cpu][counter][thread_index] >= 0); | |
356 | + fcntl(fd[nr_cpu][counter][thread_index], F_SETFL, O_NONBLOCK); | |
342 | 357 | |
343 | - assert(fd[nr_cpu][counter] >= 0); | |
344 | - fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); | |
358 | + /* | |
359 | + * First counter acts as the group leader: | |
360 | + */ | |
361 | + if (group && group_fd == -1) | |
362 | + group_fd = fd[nr_cpu][counter][thread_index]; | |
363 | + if (multiplex && multiplex_fd == -1) | |
364 | + multiplex_fd = fd[nr_cpu][counter][thread_index]; | |
345 | 365 | |
346 | - /* | |
347 | - * First counter acts as the group leader: | |
348 | - */ | |
349 | - if (group && group_fd == -1) | |
350 | - group_fd = fd[nr_cpu][counter]; | |
351 | - if (multiplex && multiplex_fd == -1) | |
352 | - multiplex_fd = fd[nr_cpu][counter]; | |
366 | + if (multiplex && fd[nr_cpu][counter][thread_index] != multiplex_fd) { | |
353 | 367 | |
354 | - if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { | |
368 | + ret = ioctl(fd[nr_cpu][counter][thread_index], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); | |
369 | + assert(ret != -1); | |
370 | + } else { | |
371 | + event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index]; | |
372 | + event_array[nr_poll].events = POLLIN; | |
373 | + nr_poll++; | |
355 | 374 | |
356 | - ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); | |
357 | - assert(ret != -1); | |
358 | - } else { | |
359 | - event_array[nr_poll].fd = fd[nr_cpu][counter]; | |
360 | - event_array[nr_poll].events = POLLIN; | |
361 | - nr_poll++; | |
362 | - | |
363 | - mmap_array[nr_cpu][counter].counter = counter; | |
364 | - mmap_array[nr_cpu][counter].prev = 0; | |
365 | - mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; | |
366 | - mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | |
367 | - PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); | |
368 | - if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { | |
369 | - error("failed to mmap with %d (%s)\n", errno, strerror(errno)); | |
370 | - exit(-1); | |
375 | + mmap_array[nr_cpu][counter][thread_index].counter = counter; | |
376 | + mmap_array[nr_cpu][counter][thread_index].prev = 0; | |
377 | + mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1; | |
378 | + mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, | |
379 | + PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0); | |
380 | + if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) { | |
381 | + error("failed to mmap with %d (%s)\n", errno, strerror(errno)); | |
382 | + exit(-1); | |
383 | + } | |
371 | 384 | } |
372 | - } | |
373 | 385 | |
374 | - if (filter != NULL) { | |
375 | - ret = ioctl(fd[nr_cpu][counter], | |
376 | - PERF_EVENT_IOC_SET_FILTER, filter); | |
377 | - if (ret) { | |
378 | - error("failed to set filter with %d (%s)\n", errno, | |
379 | - strerror(errno)); | |
380 | - exit(-1); | |
386 | + if (filter != NULL) { | |
387 | + ret = ioctl(fd[nr_cpu][counter][thread_index], | |
388 | + PERF_EVENT_IOC_SET_FILTER, filter); | |
389 | + if (ret) { | |
390 | + error("failed to set filter with %d (%s)\n", errno, | |
391 | + strerror(errno)); | |
392 | + exit(-1); | |
393 | + } | |
381 | 394 | } |
382 | 395 | } |
383 | 396 | } |
384 | 397 | |
385 | -static void open_counters(int cpu, pid_t pid) | |
398 | +static void open_counters(int cpu) | |
386 | 399 | { |
387 | 400 | int counter; |
388 | 401 | |
389 | 402 | group_fd = -1; |
390 | 403 | for (counter = 0; counter < nr_counters; counter++) |
391 | - create_counter(counter, cpu, pid); | |
404 | + create_counter(counter, cpu); | |
392 | 405 | |
393 | 406 | nr_cpu++; |
394 | 407 | } |
... | ... | @@ -529,6 +542,9 @@ |
529 | 542 | exit(-1); |
530 | 543 | } |
531 | 544 | |
545 | + if (!system_wide && target_tid == -1 && target_pid == -1) | |
546 | + all_tids[0] = child_pid; | |
547 | + | |
532 | 548 | close(child_ready_pipe[1]); |
533 | 549 | close(go_pipe[0]); |
534 | 550 | /* |
535 | 551 | |
536 | 552 | |
... | ... | @@ -541,17 +557,12 @@ |
541 | 557 | close(child_ready_pipe[0]); |
542 | 558 | } |
543 | 559 | |
544 | - if (forks && target_pid == -1 && !system_wide) | |
545 | - pid = child_pid; | |
546 | - else | |
547 | - pid = target_pid; | |
548 | - | |
549 | 560 | if ((!system_wide && !inherit) || profile_cpu != -1) { |
550 | - open_counters(profile_cpu, pid); | |
561 | + open_counters(profile_cpu); | |
551 | 562 | } else { |
552 | 563 | nr_cpus = read_cpu_map(); |
553 | 564 | for (i = 0; i < nr_cpus; i++) |
554 | - open_counters(cpumap[i], pid); | |
565 | + open_counters(cpumap[i]); | |
555 | 566 | } |
556 | 567 | |
557 | 568 | if (file_new) { |
... | ... | @@ -576,7 +587,7 @@ |
576 | 587 | } |
577 | 588 | |
578 | 589 | if (!system_wide && profile_cpu == -1) |
579 | - event__synthesize_thread(target_pid, process_synthesized_event, | |
590 | + event__synthesize_thread(target_tid, process_synthesized_event, | |
580 | 591 | session); |
581 | 592 | else |
582 | 593 | event__synthesize_threads(process_synthesized_event, session); |
583 | 594 | |
... | ... | @@ -599,11 +610,16 @@ |
599 | 610 | |
600 | 611 | for (;;) { |
601 | 612 | int hits = samples; |
613 | + int thread; | |
602 | 614 | |
603 | 615 | for (i = 0; i < nr_cpu; i++) { |
604 | 616 | for (counter = 0; counter < nr_counters; counter++) { |
605 | - if (mmap_array[i][counter].base) | |
606 | - mmap_read(&mmap_array[i][counter]); | |
617 | + for (thread = 0; | |
618 | + thread < thread_num; thread++) { | |
619 | + if (mmap_array[i][counter][thread].base) | |
620 | + mmap_read(&mmap_array[i][counter][thread]); | |
621 | + } | |
622 | + | |
607 | 623 | } |
608 | 624 | } |
609 | 625 | |
... | ... | @@ -616,8 +632,15 @@ |
616 | 632 | |
617 | 633 | if (done) { |
618 | 634 | for (i = 0; i < nr_cpu; i++) { |
619 | - for (counter = 0; counter < nr_counters; counter++) | |
620 | - ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE); | |
635 | + for (counter = 0; | |
636 | + counter < nr_counters; | |
637 | + counter++) { | |
638 | + for (thread = 0; | |
639 | + thread < thread_num; | |
640 | + thread++) | |
641 | + ioctl(fd[i][counter][thread], | |
642 | + PERF_EVENT_IOC_DISABLE); | |
643 | + } | |
621 | 644 | } |
622 | 645 | } |
623 | 646 | } |
... | ... | @@ -649,7 +672,9 @@ |
649 | 672 | OPT_CALLBACK(0, "filter", NULL, "filter", |
650 | 673 | "event filter", parse_filter), |
651 | 674 | OPT_INTEGER('p', "pid", &target_pid, |
652 | - "record events on existing pid"), | |
675 | + "record events on existing process id"), | |
676 | + OPT_INTEGER('t', "tid", &target_tid, | |
677 | + "record events on existing thread id"), | |
653 | 678 | OPT_INTEGER('r', "realtime", &realtime_prio, |
654 | 679 | "collect data with this RT SCHED_FIFO priority"), |
655 | 680 | OPT_BOOLEAN('R', "raw-samples", &raw_samples, |
656 | 681 | |
... | ... | @@ -690,10 +715,12 @@ |
690 | 715 | int cmd_record(int argc, const char **argv, const char *prefix __used) |
691 | 716 | { |
692 | 717 | int counter; |
718 | + int i,j; | |
693 | 719 | |
694 | 720 | argc = parse_options(argc, argv, options, record_usage, |
695 | 721 | PARSE_OPT_STOP_AT_NON_OPTION); |
696 | - if (!argc && target_pid == -1 && !system_wide && profile_cpu == -1) | |
722 | + if (!argc && target_pid == -1 && target_tid == -1 && | |
723 | + !system_wide && profile_cpu == -1) | |
697 | 724 | usage_with_options(record_usage, options); |
698 | 725 | |
699 | 726 | symbol__init(); |
... | ... | @@ -703,6 +730,37 @@ |
703 | 730 | attrs[0].type = PERF_TYPE_HARDWARE; |
704 | 731 | attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; |
705 | 732 | } |
733 | + | |
734 | + if (target_pid != -1) { | |
735 | + target_tid = target_pid; | |
736 | + thread_num = find_all_tid(target_pid, &all_tids); | |
737 | + if (thread_num <= 0) { | |
738 | + fprintf(stderr, "Can't find all threads of pid %d\n", | |
739 | + target_pid); | |
740 | + usage_with_options(record_usage, options); | |
741 | + } | |
742 | + } else { | |
743 | + all_tids=malloc(sizeof(pid_t)); | |
744 | + if (!all_tids) | |
745 | + return -ENOMEM; | |
746 | + | |
747 | + all_tids[0] = target_tid; | |
748 | + thread_num = 1; | |
749 | + } | |
750 | + | |
751 | + for (i = 0; i < MAX_NR_CPUS; i++) { | |
752 | + for (j = 0; j < MAX_COUNTERS; j++) { | |
753 | + fd[i][j] = malloc(sizeof(int)*thread_num); | |
754 | + mmap_array[i][j] = malloc( | |
755 | + sizeof(struct mmap_data)*thread_num); | |
756 | + if (!fd[i][j] || !mmap_array[i][j]) | |
757 | + return -ENOMEM; | |
758 | + } | |
759 | + } | |
760 | + event_array = malloc( | |
761 | + sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); | |
762 | + if (!event_array) | |
763 | + return -ENOMEM; | |
706 | 764 | |
707 | 765 | /* |
708 | 766 | * User specified count overrides default frequency. |
tools/perf/builtin-stat.c
... | ... | @@ -46,6 +46,7 @@ |
46 | 46 | #include "util/debug.h" |
47 | 47 | #include "util/header.h" |
48 | 48 | #include "util/cpumap.h" |
49 | +#include "util/thread.h" | |
49 | 50 | |
50 | 51 | #include <sys/prctl.h> |
51 | 52 | #include <math.h> |
52 | 53 | |
... | ... | @@ -74,10 +75,13 @@ |
74 | 75 | static int inherit = 1; |
75 | 76 | static int scale = 1; |
76 | 77 | static pid_t target_pid = -1; |
78 | +static pid_t target_tid = -1; | |
79 | +static pid_t *all_tids = NULL; | |
80 | +static int thread_num = 0; | |
77 | 81 | static pid_t child_pid = -1; |
78 | 82 | static int null_run = 0; |
79 | 83 | |
80 | -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | |
84 | +static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | |
81 | 85 | |
82 | 86 | static int event_scaled[MAX_COUNTERS]; |
83 | 87 | |
84 | 88 | |
... | ... | @@ -140,9 +144,10 @@ |
140 | 144 | #define ERR_PERF_OPEN \ |
141 | 145 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" |
142 | 146 | |
143 | -static void create_perf_stat_counter(int counter, int pid) | |
147 | +static void create_perf_stat_counter(int counter) | |
144 | 148 | { |
145 | 149 | struct perf_event_attr *attr = attrs + counter; |
150 | + int thread; | |
146 | 151 | |
147 | 152 | if (scale) |
148 | 153 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
149 | 154 | |
... | ... | @@ -152,10 +157,11 @@ |
152 | 157 | unsigned int cpu; |
153 | 158 | |
154 | 159 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
155 | - fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); | |
156 | - if (fd[cpu][counter] < 0 && verbose) | |
160 | + fd[cpu][counter][0] = sys_perf_event_open(attr, | |
161 | + -1, cpumap[cpu], -1, 0); | |
162 | + if (fd[cpu][counter][0] < 0 && verbose) | |
157 | 163 | fprintf(stderr, ERR_PERF_OPEN, counter, |
158 | - fd[cpu][counter], strerror(errno)); | |
164 | + fd[cpu][counter][0], strerror(errno)); | |
159 | 165 | } |
160 | 166 | } else { |
161 | 167 | attr->inherit = inherit; |
... | ... | @@ -163,11 +169,14 @@ |
163 | 169 | attr->disabled = 1; |
164 | 170 | attr->enable_on_exec = 1; |
165 | 171 | } |
166 | - | |
167 | - fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); | |
168 | - if (fd[0][counter] < 0 && verbose) | |
169 | - fprintf(stderr, ERR_PERF_OPEN, counter, | |
170 | - fd[0][counter], strerror(errno)); | |
172 | + for (thread = 0; thread < thread_num; thread++) { | |
173 | + fd[0][counter][thread] = sys_perf_event_open(attr, | |
174 | + all_tids[thread], -1, -1, 0); | |
175 | + if (fd[0][counter][thread] < 0 && verbose) | |
176 | + fprintf(stderr, ERR_PERF_OPEN, counter, | |
177 | + fd[0][counter][thread], | |
178 | + strerror(errno)); | |
179 | + } | |
171 | 180 | } |
172 | 181 | } |
173 | 182 | |
174 | 183 | |
175 | 184 | |
176 | 185 | |
177 | 186 | |
... | ... | @@ -192,25 +201,28 @@ |
192 | 201 | unsigned int cpu; |
193 | 202 | size_t res, nv; |
194 | 203 | int scaled; |
195 | - int i; | |
204 | + int i, thread; | |
196 | 205 | |
197 | 206 | count[0] = count[1] = count[2] = 0; |
198 | 207 | |
199 | 208 | nv = scale ? 3 : 1; |
200 | 209 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
201 | - if (fd[cpu][counter] < 0) | |
202 | - continue; | |
210 | + for (thread = 0; thread < thread_num; thread++) { | |
211 | + if (fd[cpu][counter][thread] < 0) | |
212 | + continue; | |
203 | 213 | |
204 | - res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); | |
205 | - assert(res == nv * sizeof(u64)); | |
214 | + res = read(fd[cpu][counter][thread], | |
215 | + single_count, nv * sizeof(u64)); | |
216 | + assert(res == nv * sizeof(u64)); | |
206 | 217 | |
207 | - close(fd[cpu][counter]); | |
208 | - fd[cpu][counter] = -1; | |
218 | + close(fd[cpu][counter][thread]); | |
219 | + fd[cpu][counter][thread] = -1; | |
209 | 220 | |
210 | - count[0] += single_count[0]; | |
211 | - if (scale) { | |
212 | - count[1] += single_count[1]; | |
213 | - count[2] += single_count[2]; | |
221 | + count[0] += single_count[0]; | |
222 | + if (scale) { | |
223 | + count[1] += single_count[1]; | |
224 | + count[2] += single_count[2]; | |
225 | + } | |
214 | 226 | } |
215 | 227 | } |
216 | 228 | |
... | ... | @@ -253,7 +265,6 @@ |
253 | 265 | unsigned long long t0, t1; |
254 | 266 | int status = 0; |
255 | 267 | int counter; |
256 | - int pid; | |
257 | 268 | int child_ready_pipe[2], go_pipe[2]; |
258 | 269 | const bool forks = (argc > 0); |
259 | 270 | char buf; |
... | ... | @@ -299,6 +310,9 @@ |
299 | 310 | exit(-1); |
300 | 311 | } |
301 | 312 | |
313 | + if (target_tid == -1 && target_pid == -1 && !system_wide) | |
314 | + all_tids[0] = child_pid; | |
315 | + | |
302 | 316 | /* |
303 | 317 | * Wait for the child to be ready to exec. |
304 | 318 | */ |
305 | 319 | |
... | ... | @@ -309,12 +323,8 @@ |
309 | 323 | close(child_ready_pipe[0]); |
310 | 324 | } |
311 | 325 | |
312 | - if (target_pid == -1) | |
313 | - pid = child_pid; | |
314 | - else | |
315 | - pid = target_pid; | |
316 | 326 | for (counter = 0; counter < nr_counters; counter++) |
317 | - create_perf_stat_counter(counter, pid); | |
327 | + create_perf_stat_counter(counter); | |
318 | 328 | |
319 | 329 | /* |
320 | 330 | * Enable counters and exec the command: |
321 | 331 | |
... | ... | @@ -433,12 +443,14 @@ |
433 | 443 | |
434 | 444 | fprintf(stderr, "\n"); |
435 | 445 | fprintf(stderr, " Performance counter stats for "); |
436 | - if(target_pid == -1) { | |
446 | + if(target_pid == -1 && target_tid == -1) { | |
437 | 447 | fprintf(stderr, "\'%s", argv[0]); |
438 | 448 | for (i = 1; i < argc; i++) |
439 | 449 | fprintf(stderr, " %s", argv[i]); |
440 | - }else | |
441 | - fprintf(stderr, "task pid \'%d", target_pid); | |
450 | + } else if (target_pid != -1) | |
451 | + fprintf(stderr, "process id \'%d", target_pid); | |
452 | + else | |
453 | + fprintf(stderr, "thread id \'%d", target_tid); | |
442 | 454 | |
443 | 455 | fprintf(stderr, "\'"); |
444 | 456 | if (run_count > 1) |
... | ... | @@ -493,7 +505,9 @@ |
493 | 505 | OPT_BOOLEAN('i', "inherit", &inherit, |
494 | 506 | "child tasks inherit counters"), |
495 | 507 | OPT_INTEGER('p', "pid", &target_pid, |
496 | - "stat events on existing pid"), | |
508 | + "stat events on existing process id"), | |
509 | + OPT_INTEGER('t', "tid", &target_tid, | |
510 | + "stat events on existing thread id"), | |
497 | 511 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
498 | 512 | "system-wide collection from all CPUs"), |
499 | 513 | OPT_BOOLEAN('c', "scale", &scale, |
500 | 514 | |
... | ... | @@ -510,10 +524,11 @@ |
510 | 524 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
511 | 525 | { |
512 | 526 | int status; |
527 | + int i,j; | |
513 | 528 | |
514 | 529 | argc = parse_options(argc, argv, options, stat_usage, |
515 | 530 | PARSE_OPT_STOP_AT_NON_OPTION); |
516 | - if (!argc && target_pid == -1) | |
531 | + if (!argc && target_pid == -1 && target_tid == -1) | |
517 | 532 | usage_with_options(stat_usage, options); |
518 | 533 | if (run_count <= 0) |
519 | 534 | usage_with_options(stat_usage, options); |
... | ... | @@ -528,6 +543,31 @@ |
528 | 543 | nr_cpus = read_cpu_map(); |
529 | 544 | else |
530 | 545 | nr_cpus = 1; |
546 | + | |
547 | + if (target_pid != -1) { | |
548 | + target_tid = target_pid; | |
549 | + thread_num = find_all_tid(target_pid, &all_tids); | |
550 | + if (thread_num <= 0) { | |
551 | + fprintf(stderr, "Can't find all threads of pid %d\n", | |
552 | + target_pid); | |
553 | + usage_with_options(stat_usage, options); | |
554 | + } | |
555 | + } else { | |
556 | + all_tids=malloc(sizeof(pid_t)); | |
557 | + if (!all_tids) | |
558 | + return -ENOMEM; | |
559 | + | |
560 | + all_tids[0] = target_tid; | |
561 | + thread_num = 1; | |
562 | + } | |
563 | + | |
564 | + for (i = 0; i < MAX_NR_CPUS; i++) { | |
565 | + for (j = 0; j < MAX_COUNTERS; j++) { | |
566 | + fd[i][j] = malloc(sizeof(int)*thread_num); | |
567 | + if (!fd[i][j]) | |
568 | + return -ENOMEM; | |
569 | + } | |
570 | + } | |
531 | 571 | |
532 | 572 | /* |
533 | 573 | * We dont want to block the signals - that would cause |
tools/perf/builtin-top.c
... | ... | @@ -55,7 +55,7 @@ |
55 | 55 | #include <linux/unistd.h> |
56 | 56 | #include <linux/types.h> |
57 | 57 | |
58 | -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | |
58 | +static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | |
59 | 59 | |
60 | 60 | static int system_wide = 0; |
61 | 61 | |
... | ... | @@ -65,6 +65,9 @@ |
65 | 65 | static int print_entries; |
66 | 66 | |
67 | 67 | static int target_pid = -1; |
68 | +static int target_tid = -1; | |
69 | +static pid_t *all_tids = NULL; | |
70 | +static int thread_num = 0; | |
68 | 71 | static int inherit = 0; |
69 | 72 | static int profile_cpu = -1; |
70 | 73 | static int nr_cpus = 0; |
71 | 74 | |
... | ... | @@ -524,13 +527,15 @@ |
524 | 527 | |
525 | 528 | if (target_pid != -1) |
526 | 529 | printf(" (target_pid: %d", target_pid); |
530 | + else if (target_tid != -1) | |
531 | + printf(" (target_tid: %d", target_tid); | |
527 | 532 | else |
528 | 533 | printf(" (all"); |
529 | 534 | |
530 | 535 | if (profile_cpu != -1) |
531 | 536 | printf(", cpu: %d)\n", profile_cpu); |
532 | 537 | else { |
533 | - if (target_pid != -1) | |
538 | + if (target_tid != -1) | |
534 | 539 | printf(")\n"); |
535 | 540 | else |
536 | 541 | printf(", %d CPUs)\n", nr_cpus); |
537 | 542 | |
538 | 543 | |
... | ... | @@ -1129,16 +1134,21 @@ |
1129 | 1134 | md->prev = old; |
1130 | 1135 | } |
1131 | 1136 | |
1132 | -static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | |
1133 | -static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | |
1137 | +static struct pollfd *event_array; | |
1138 | +static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | |
1134 | 1139 | |
1135 | 1140 | static void perf_session__mmap_read(struct perf_session *self) |
1136 | 1141 | { |
1137 | - int i, counter; | |
1142 | + int i, counter, thread_index; | |
1138 | 1143 | |
1139 | 1144 | for (i = 0; i < nr_cpus; i++) { |
1140 | 1145 | for (counter = 0; counter < nr_counters; counter++) |
1141 | - perf_session__mmap_read_counter(self, &mmap_array[i][counter]); | |
1146 | + for (thread_index = 0; | |
1147 | + thread_index < thread_num; | |
1148 | + thread_index++) { | |
1149 | + perf_session__mmap_read_counter(self, | |
1150 | + &mmap_array[i][counter][thread_index]); | |
1151 | + } | |
1142 | 1152 | } |
1143 | 1153 | } |
1144 | 1154 | |
1145 | 1155 | |
... | ... | @@ -1149,9 +1159,10 @@ |
1149 | 1159 | { |
1150 | 1160 | struct perf_event_attr *attr; |
1151 | 1161 | int cpu; |
1162 | + int thread_index; | |
1152 | 1163 | |
1153 | 1164 | cpu = profile_cpu; |
1154 | - if (target_pid == -1 && profile_cpu == -1) | |
1165 | + if (target_tid == -1 && profile_cpu == -1) | |
1155 | 1166 | cpu = cpumap[i]; |
1156 | 1167 | |
1157 | 1168 | attr = attrs + counter; |
1158 | 1169 | |
1159 | 1170 | |
1160 | 1171 | |
1161 | 1172 | |
1162 | 1173 | |
1163 | 1174 | |
1164 | 1175 | |
1165 | 1176 | |
1166 | 1177 | |
... | ... | @@ -1167,55 +1178,58 @@ |
1167 | 1178 | attr->inherit = (cpu < 0) && inherit; |
1168 | 1179 | attr->mmap = 1; |
1169 | 1180 | |
1181 | + for (thread_index = 0; thread_index < thread_num; thread_index++) { | |
1170 | 1182 | try_again: |
1171 | - fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); | |
1183 | + fd[i][counter][thread_index] = sys_perf_event_open(attr, | |
1184 | + all_tids[thread_index], cpu, group_fd, 0); | |
1172 | 1185 | |
1173 | - if (fd[i][counter] < 0) { | |
1174 | - int err = errno; | |
1186 | + if (fd[i][counter][thread_index] < 0) { | |
1187 | + int err = errno; | |
1175 | 1188 | |
1176 | - if (err == EPERM || err == EACCES) | |
1177 | - die("No permission - are you root?\n"); | |
1178 | - /* | |
1179 | - * If it's cycles then fall back to hrtimer | |
1180 | - * based cpu-clock-tick sw counter, which | |
1181 | - * is always available even if no PMU support: | |
1182 | - */ | |
1183 | - if (attr->type == PERF_TYPE_HARDWARE | |
1184 | - && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | |
1189 | + if (err == EPERM || err == EACCES) | |
1190 | + die("No permission - are you root?\n"); | |
1191 | + /* | |
1192 | + * If it's cycles then fall back to hrtimer | |
1193 | + * based cpu-clock-tick sw counter, which | |
1194 | + * is always available even if no PMU support: | |
1195 | + */ | |
1196 | + if (attr->type == PERF_TYPE_HARDWARE | |
1197 | + && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | |
1185 | 1198 | |
1186 | - if (verbose) | |
1187 | - warning(" ... trying to fall back to cpu-clock-ticks\n"); | |
1199 | + if (verbose) | |
1200 | + warning(" ... trying to fall back to cpu-clock-ticks\n"); | |
1188 | 1201 | |
1189 | - attr->type = PERF_TYPE_SOFTWARE; | |
1190 | - attr->config = PERF_COUNT_SW_CPU_CLOCK; | |
1191 | - goto try_again; | |
1202 | + attr->type = PERF_TYPE_SOFTWARE; | |
1203 | + attr->config = PERF_COUNT_SW_CPU_CLOCK; | |
1204 | + goto try_again; | |
1205 | + } | |
1206 | + printf("\n"); | |
1207 | + error("perfcounter syscall returned with %d (%s)\n", | |
1208 | + fd[i][counter][thread_index], strerror(err)); | |
1209 | + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | |
1210 | + exit(-1); | |
1192 | 1211 | } |
1193 | - printf("\n"); | |
1194 | - error("perfcounter syscall returned with %d (%s)\n", | |
1195 | - fd[i][counter], strerror(err)); | |
1196 | - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | |
1197 | - exit(-1); | |
1198 | - } | |
1199 | - assert(fd[i][counter] >= 0); | |
1200 | - fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | |
1212 | + assert(fd[i][counter][thread_index] >= 0); | |
1213 | + fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK); | |
1201 | 1214 | |
1202 | - /* | |
1203 | - * First counter acts as the group leader: | |
1204 | - */ | |
1205 | - if (group && group_fd == -1) | |
1206 | - group_fd = fd[i][counter]; | |
1215 | + /* | |
1216 | + * First counter acts as the group leader: | |
1217 | + */ | |
1218 | + if (group && group_fd == -1) | |
1219 | + group_fd = fd[i][counter][thread_index]; | |
1207 | 1220 | |
1208 | - event_array[nr_poll].fd = fd[i][counter]; | |
1209 | - event_array[nr_poll].events = POLLIN; | |
1210 | - nr_poll++; | |
1221 | + event_array[nr_poll].fd = fd[i][counter][thread_index]; | |
1222 | + event_array[nr_poll].events = POLLIN; | |
1223 | + nr_poll++; | |
1211 | 1224 | |
1212 | - mmap_array[i][counter].counter = counter; | |
1213 | - mmap_array[i][counter].prev = 0; | |
1214 | - mmap_array[i][counter].mask = mmap_pages*page_size - 1; | |
1215 | - mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | |
1216 | - PROT_READ, MAP_SHARED, fd[i][counter], 0); | |
1217 | - if (mmap_array[i][counter].base == MAP_FAILED) | |
1218 | - die("failed to mmap with %d (%s)\n", errno, strerror(errno)); | |
1225 | + mmap_array[i][counter][thread_index].counter = counter; | |
1226 | + mmap_array[i][counter][thread_index].prev = 0; | |
1227 | + mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1; | |
1228 | + mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, | |
1229 | + PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0); | |
1230 | + if (mmap_array[i][counter][thread_index].base == MAP_FAILED) | |
1231 | + die("failed to mmap with %d (%s)\n", errno, strerror(errno)); | |
1232 | + } | |
1219 | 1233 | } |
1220 | 1234 | |
1221 | 1235 | static int __cmd_top(void) |
... | ... | @@ -1231,8 +1245,8 @@ |
1231 | 1245 | if (session == NULL) |
1232 | 1246 | return -ENOMEM; |
1233 | 1247 | |
1234 | - if (target_pid != -1) | |
1235 | - event__synthesize_thread(target_pid, event__process, session); | |
1248 | + if (target_tid != -1) | |
1249 | + event__synthesize_thread(target_tid, event__process, session); | |
1236 | 1250 | else |
1237 | 1251 | event__synthesize_threads(event__process, session); |
1238 | 1252 | |
... | ... | @@ -1243,7 +1257,7 @@ |
1243 | 1257 | } |
1244 | 1258 | |
1245 | 1259 | /* Wait for a minimal set of events before starting the snapshot */ |
1246 | - poll(event_array, nr_poll, 100); | |
1260 | + poll(&event_array[0], nr_poll, 100); | |
1247 | 1261 | |
1248 | 1262 | perf_session__mmap_read(session); |
1249 | 1263 | |
... | ... | @@ -1286,7 +1300,9 @@ |
1286 | 1300 | OPT_INTEGER('c', "count", &default_interval, |
1287 | 1301 | "event period to sample"), |
1288 | 1302 | OPT_INTEGER('p', "pid", &target_pid, |
1289 | - "profile events on existing pid"), | |
1303 | + "profile events on existing process id"), | |
1304 | + OPT_INTEGER('t', "tid", &target_tid, | |
1305 | + "profile events on existing thread id"), | |
1290 | 1306 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
1291 | 1307 | "system-wide collection from all CPUs"), |
1292 | 1308 | OPT_INTEGER('C', "CPU", &profile_cpu, |
... | ... | @@ -1327,6 +1343,7 @@ |
1327 | 1343 | int cmd_top(int argc, const char **argv, const char *prefix __used) |
1328 | 1344 | { |
1329 | 1345 | int counter; |
1346 | + int i,j; | |
1330 | 1347 | |
1331 | 1348 | page_size = sysconf(_SC_PAGE_SIZE); |
1332 | 1349 | |
1333 | 1350 | |
... | ... | @@ -1334,8 +1351,39 @@ |
1334 | 1351 | if (argc) |
1335 | 1352 | usage_with_options(top_usage, options); |
1336 | 1353 | |
1354 | + if (target_pid != -1) { | |
1355 | + target_tid = target_pid; | |
1356 | + thread_num = find_all_tid(target_pid, &all_tids); | |
1357 | + if (thread_num <= 0) { | |
1358 | + fprintf(stderr, "Can't find all threads of pid %d\n", | |
1359 | + target_pid); | |
1360 | + usage_with_options(top_usage, options); | |
1361 | + } | |
1362 | + } else { | |
1363 | + all_tids=malloc(sizeof(pid_t)); | |
1364 | + if (!all_tids) | |
1365 | + return -ENOMEM; | |
1366 | + | |
1367 | + all_tids[0] = target_tid; | |
1368 | + thread_num = 1; | |
1369 | + } | |
1370 | + | |
1371 | + for (i = 0; i < MAX_NR_CPUS; i++) { | |
1372 | + for (j = 0; j < MAX_COUNTERS; j++) { | |
1373 | + fd[i][j] = malloc(sizeof(int)*thread_num); | |
1374 | + mmap_array[i][j] = malloc( | |
1375 | + sizeof(struct mmap_data)*thread_num); | |
1376 | + if (!fd[i][j] || !mmap_array[i][j]) | |
1377 | + return -ENOMEM; | |
1378 | + } | |
1379 | + } | |
1380 | + event_array = malloc( | |
1381 | + sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); | |
1382 | + if (!event_array) | |
1383 | + return -ENOMEM; | |
1384 | + | |
1337 | 1385 | /* CPU and PID are mutually exclusive */ |
1338 | - if (target_pid != -1 && profile_cpu != -1) { | |
1386 | + if (target_tid > 0 && profile_cpu != -1) { | |
1339 | 1387 | printf("WARNING: PID switch overriding CPU\n"); |
1340 | 1388 | sleep(1); |
1341 | 1389 | profile_cpu = -1; |
... | ... | @@ -1376,7 +1424,7 @@ |
1376 | 1424 | attrs[counter].sample_period = default_interval; |
1377 | 1425 | } |
1378 | 1426 | |
1379 | - if (target_pid != -1 || profile_cpu != -1) | |
1427 | + if (target_tid != -1 || profile_cpu != -1) | |
1380 | 1428 | nr_cpus = 1; |
1381 | 1429 | else |
1382 | 1430 | nr_cpus = read_cpu_map(); |
tools/perf/util/thread.c
... | ... | @@ -7,6 +7,37 @@ |
7 | 7 | #include "util.h" |
8 | 8 | #include "debug.h" |
9 | 9 | |
10 | +int find_all_tid(int pid, pid_t ** all_tid) | |
11 | +{ | |
12 | + char name[256]; | |
13 | + int items; | |
14 | + struct dirent **namelist = NULL; | |
15 | + int ret = 0; | |
16 | + int i; | |
17 | + | |
18 | + sprintf(name, "/proc/%d/task", pid); | |
19 | + items = scandir(name, &namelist, NULL, NULL); | |
20 | + if (items <= 0) | |
21 | + return -ENOENT; | |
22 | + *all_tid = malloc(sizeof(pid_t) * items); | |
23 | + if (!*all_tid) { | |
24 | + ret = -ENOMEM; | |
25 | + goto failure; | |
26 | + } | |
27 | + | |
28 | + for (i = 0; i < items; i++) | |
29 | + (*all_tid)[i] = atoi(namelist[i]->d_name); | |
30 | + | |
31 | + ret = items; | |
32 | + | |
33 | +failure: | |
34 | + for (i=0; i<items; i++) | |
35 | + free(namelist[i]); | |
36 | + free(namelist); | |
37 | + | |
38 | + return ret; | |
39 | +} | |
40 | + | |
10 | 41 | void map_groups__init(struct map_groups *self) |
11 | 42 | { |
12 | 43 | int i; |
tools/perf/util/thread.h