Commit e4b546a3643fbfc510d5ef7db538e4d3ab00effb
Exists in
master
and in
39 other branches
Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/l…
…inux-2.6 into perf/core
Showing 15 changed files Inline Diff
- tools/perf/Documentation/perf-annotate.txt
- tools/perf/Documentation/perf-buildid-list.txt
- tools/perf/Documentation/perf-diff.txt
- tools/perf/Documentation/perf-kvm.txt
- tools/perf/Documentation/perf-lock.txt
- tools/perf/Documentation/perf-probe.txt
- tools/perf/Documentation/perf-record.txt
- tools/perf/Documentation/perf-report.txt
- tools/perf/Documentation/perf-sched.txt
- tools/perf/Documentation/perf-script.txt
- tools/perf/Documentation/perf-stat.txt
- tools/perf/Documentation/perf-test.txt
- tools/perf/Documentation/perf-top.txt
- tools/perf/builtin-diff.c
- tools/perf/builtin-stat.c
tools/perf/Documentation/perf-annotate.txt
1 | perf-annotate(1) | 1 | perf-annotate(1) |
2 | ================ | 2 | ================ |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-annotate - Read perf.data (created by perf record) and display annotated code | 6 | perf-annotate - Read perf.data (created by perf record) and display annotated code |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf annotate' [-i <file> | --input=file] [symbol_name] | 11 | 'perf annotate' [-i <file> | --input=file] [symbol_name] |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | This command reads the input file and displays an annotated version of the | 15 | This command reads the input file and displays an annotated version of the |
16 | code. If the object file has debug symbols then the source code will be | 16 | code. If the object file has debug symbols then the source code will be |
17 | displayed alongside assembly code. | 17 | displayed alongside assembly code. |
18 | 18 | ||
19 | If there is no debug info in the object, then annotated assembly is displayed. | 19 | If there is no debug info in the object, then annotated assembly is displayed. |
20 | 20 | ||
21 | OPTIONS | 21 | OPTIONS |
22 | ------- | 22 | ------- |
23 | -i:: | 23 | -i:: |
24 | --input=:: | 24 | --input=:: |
25 | Input file name. (default: perf.data) | 25 | Input file name. (default: perf.data) |
26 | 26 | ||
27 | -d:: | ||
28 | --dsos=<dso[,dso...]>:: | ||
29 | Only consider symbols in these dsos. | ||
30 | -s:: | ||
31 | --symbol=<symbol>:: | ||
32 | Symbol to annotate. | ||
33 | |||
34 | -f:: | ||
35 | --force:: | ||
36 | Don't complain, do it. | ||
37 | |||
38 | -v:: | ||
39 | --verbose:: | ||
40 | Be more verbose. (Show symbol address, etc) | ||
41 | |||
42 | -D:: | ||
43 | --dump-raw-trace:: | ||
44 | Dump raw trace in ASCII. | ||
45 | |||
46 | -k:: | ||
47 | --vmlinux=<file>:: | ||
48 | vmlinux pathname. | ||
49 | |||
50 | -m:: | ||
51 | --modules:: | ||
52 | Load module symbols. WARNING: use only with -k and LIVE kernel. | ||
53 | |||
54 | -l:: | ||
55 | --print-line:: | ||
56 | Print matching source lines (may be slow). | ||
57 | |||
58 | -P:: | ||
59 | --full-paths:: | ||
60 | Don't shorten the displayed pathnames. | ||
61 | |||
27 | --stdio:: Use the stdio interface. | 62 | --stdio:: Use the stdio interface. |
28 | 63 | ||
29 | --tui:: Use the TUI interface Use of --tui requires a tty, if one is not | 64 | --tui:: Use the TUI interface Use of --tui requires a tty, if one is not |
30 | present, as when piping to other commands, the stdio interface is | 65 | present, as when piping to other commands, the stdio interface is |
31 | used. This interfaces starts by centering on the line with more | 66 | used. This interfaces starts by centering on the line with more |
32 | samples, TAB/UNTAB cycles thru the lines with more samples. | 67 | samples, TAB/UNTAB cycles through the lines with more samples. |
33 | 68 | ||
34 | SEE ALSO | 69 | SEE ALSO |
35 | -------- | 70 | -------- |
36 | linkperf:perf-record[1], linkperf:perf-report[1] | 71 | linkperf:perf-record[1], linkperf:perf-report[1] |
37 | 72 |
tools/perf/Documentation/perf-buildid-list.txt
1 | perf-buildid-list(1) | 1 | perf-buildid-list(1) |
2 | ==================== | 2 | ==================== |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-buildid-list - List the buildids in a perf.data file | 6 | perf-buildid-list - List the buildids in a perf.data file |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf buildid-list <options>' | 11 | 'perf buildid-list <options>' |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | This command displays the buildids found in a perf.data file, so that other | 15 | This command displays the buildids found in a perf.data file, so that other |
16 | tools can be used to fetch packages with matching symbol tables for use by | 16 | tools can be used to fetch packages with matching symbol tables for use by |
17 | perf report. | 17 | perf report. |
18 | 18 | ||
19 | OPTIONS | 19 | OPTIONS |
20 | ------- | 20 | ------- |
21 | -H:: | ||
22 | --with-hits:: | ||
23 | Show only DSOs with hits. | ||
21 | -i:: | 24 | -i:: |
22 | --input=:: | 25 | --input=:: |
23 | Input file name. (default: perf.data) | 26 | Input file name. (default: perf.data) |
24 | -f:: | 27 | -f:: |
25 | --force:: | 28 | --force:: |
26 | Don't do ownership validation. | 29 | Don't do ownership validation. |
27 | -v:: | 30 | -v:: |
28 | --verbose:: | 31 | --verbose:: |
29 | Be more verbose. | 32 | Be more verbose. |
30 | 33 | ||
31 | SEE ALSO | 34 | SEE ALSO |
32 | -------- | 35 | -------- |
33 | linkperf:perf-record[1], linkperf:perf-top[1], | 36 | linkperf:perf-record[1], linkperf:perf-top[1], |
34 | linkperf:perf-report[1] | 37 | linkperf:perf-report[1] |
35 | 38 |
tools/perf/Documentation/perf-diff.txt
1 | perf-diff(1) | 1 | perf-diff(1) |
2 | ============ | 2 | ============ |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-diff - Read two perf.data files and display the differential profile | 6 | perf-diff - Read two perf.data files and display the differential profile |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf diff' [oldfile] [newfile] | 11 | 'perf diff' [oldfile] [newfile] |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | This command displays the performance difference amongst two perf.data files | 15 | This command displays the performance difference amongst two perf.data files |
16 | captured via perf record. | 16 | captured via perf record. |
17 | 17 | ||
18 | If no parameters are passed it will assume perf.data.old and perf.data. | 18 | If no parameters are passed it will assume perf.data.old and perf.data. |
19 | 19 | ||
20 | OPTIONS | 20 | OPTIONS |
21 | ------- | 21 | ------- |
22 | -M:: | ||
23 | --displacement:: | ||
24 | Show position displacement relative to baseline. | ||
25 | |||
26 | -D:: | ||
27 | --dump-raw-trace:: | ||
28 | Dump raw trace in ASCII. | ||
29 | |||
30 | -m:: | ||
31 | --modules:: | ||
32 | Load module symbols. WARNING: use only with -k and LIVE kernel | ||
33 | |||
22 | -d:: | 34 | -d:: |
23 | --dsos=:: | 35 | --dsos=:: |
24 | Only consider symbols in these dsos. CSV that understands | 36 | Only consider symbols in these dsos. CSV that understands |
25 | file://filename entries. | 37 | file://filename entries. |
26 | 38 | ||
27 | -C:: | 39 | -C:: |
28 | --comms=:: | 40 | --comms=:: |
29 | Only consider symbols in these comms. CSV that understands | 41 | Only consider symbols in these comms. CSV that understands |
30 | file://filename entries. | 42 | file://filename entries. |
31 | 43 | ||
32 | -S:: | 44 | -S:: |
33 | --symbols=:: | 45 | --symbols=:: |
34 | Only consider these symbols. CSV that understands | 46 | Only consider these symbols. CSV that understands |
35 | file://filename entries. | 47 | file://filename entries. |
36 | 48 | ||
37 | -s:: | 49 | -s:: |
38 | --sort=:: | 50 | --sort=:: |
39 | Sort by key(s): pid, comm, dso, symbol. | 51 | Sort by key(s): pid, comm, dso, symbol. |
40 | 52 | ||
41 | -t:: | 53 | -t:: |
42 | --field-separator=:: | 54 | --field-separator=:: |
43 | 55 | ||
44 | Use a special separator character and don't pad with spaces, replacing | 56 | Use a special separator character and don't pad with spaces, replacing |
45 | all occurances of this separator in symbol names (and other output) | 57 | all occurrences of this separator in symbol names (and other output) |
46 | with a '.' character, that thus it's the only non valid separator. | 58 | with a '.' character, that thus it's the only non valid separator. |
47 | 59 | ||
48 | -v:: | 60 | -v:: |
49 | --verbose:: | 61 | --verbose:: |
50 | Be verbose, for instance, show the raw counts in addition to the | 62 | Be verbose, for instance, show the raw counts in addition to the |
51 | diff. | 63 | diff. |
64 | |||
65 | -f:: | ||
66 | --force:: | ||
67 | Don't complain, do it. | ||
68 | |||
52 | 69 | ||
53 | SEE ALSO | 70 | SEE ALSO |
54 | -------- | 71 | -------- |
55 | linkperf:perf-record[1] | 72 | linkperf:perf-record[1] |
56 | 73 |
tools/perf/Documentation/perf-kvm.txt
1 | perf-kvm(1) | 1 | perf-kvm(1) |
2 | =========== | 2 | =========== |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-kvm - Tool to trace/measure kvm guest os | 6 | perf-kvm - Tool to trace/measure kvm guest os |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf kvm' [--host] [--guest] [--guestmount=<path> | 11 | 'perf kvm' [--host] [--guest] [--guestmount=<path> |
12 | [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]] | 12 | [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]] |
13 | {top|record|report|diff|buildid-list} | 13 | {top|record|report|diff|buildid-list} |
14 | 'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path> | 14 | 'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path> |
15 | | --guestvmlinux=<path>] {top|record|report|diff|buildid-list} | 15 | | --guestvmlinux=<path>] {top|record|report|diff|buildid-list} |
16 | 16 | ||
17 | DESCRIPTION | 17 | DESCRIPTION |
18 | ----------- | 18 | ----------- |
19 | There are a couple of variants of perf kvm: | 19 | There are a couple of variants of perf kvm: |
20 | 20 | ||
21 | 'perf kvm [options] top <command>' to generates and displays | 21 | 'perf kvm [options] top <command>' to generates and displays |
22 | a performance counter profile of guest os in realtime | 22 | a performance counter profile of guest os in realtime |
23 | of an arbitrary workload. | 23 | of an arbitrary workload. |
24 | 24 | ||
25 | 'perf kvm record <command>' to record the performance couinter profile | 25 | 'perf kvm record <command>' to record the performance counter profile |
26 | of an arbitrary workload and save it into a perf data file. If both | 26 | of an arbitrary workload and save it into a perf data file. If both |
27 | --host and --guest are input, the perf data file name is perf.data.kvm. | 27 | --host and --guest are input, the perf data file name is perf.data.kvm. |
28 | If there is no --host but --guest, the file name is perf.data.guest. | 28 | If there is no --host but --guest, the file name is perf.data.guest. |
29 | If there is no --guest but --host, the file name is perf.data.host. | 29 | If there is no --guest but --host, the file name is perf.data.host. |
30 | 30 | ||
31 | 'perf kvm report' to display the performance counter profile information | 31 | 'perf kvm report' to display the performance counter profile information |
32 | recorded via perf kvm record. | 32 | recorded via perf kvm record. |
33 | 33 | ||
34 | 'perf kvm diff' to displays the performance difference amongst two perf.data | 34 | 'perf kvm diff' to displays the performance difference amongst two perf.data |
35 | files captured via perf record. | 35 | files captured via perf record. |
36 | 36 | ||
37 | 'perf kvm buildid-list' to display the buildids found in a perf data file, | 37 | 'perf kvm buildid-list' to display the buildids found in a perf data file, |
38 | so that other tools can be used to fetch packages with matching symbol tables | 38 | so that other tools can be used to fetch packages with matching symbol tables |
39 | for use by perf report. | 39 | for use by perf report. |
40 | 40 | ||
41 | OPTIONS | 41 | OPTIONS |
42 | ------- | 42 | ------- |
43 | -i:: | ||
44 | --input=:: | ||
45 | Input file name. | ||
46 | -o:: | ||
47 | --output:: | ||
48 | Output file name. | ||
43 | --host=:: | 49 | --host=:: |
44 | Collect host side performance profile. | 50 | Collect host side performance profile. |
45 | --guest=:: | 51 | --guest=:: |
46 | Collect guest side performance profile. | 52 | Collect guest side performance profile. |
47 | --guestmount=<path>:: | 53 | --guestmount=<path>:: |
48 | Guest os root file system mount directory. Users mounts guest os | 54 | Guest os root file system mount directory. Users mounts guest os |
49 | root directories under <path> by a specific filesystem access method, | 55 | root directories under <path> by a specific filesystem access method, |
50 | typically, sshfs. For example, start 2 guest os. The one's pid is 8888 | 56 | typically, sshfs. For example, start 2 guest os. The one's pid is 8888 |
51 | and the other's is 9999. | 57 | and the other's is 9999. |
52 | #mkdir ~/guestmount; cd ~/guestmount | 58 | #mkdir ~/guestmount; cd ~/guestmount |
53 | #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/ | 59 | #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/ |
54 | #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/ | 60 | #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/ |
55 | #perf kvm --host --guest --guestmount=~/guestmount top | 61 | #perf kvm --host --guest --guestmount=~/guestmount top |
56 | --guestkallsyms=<path>:: | 62 | --guestkallsyms=<path>:: |
57 | Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest | 63 | Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest |
58 | kernel symbols. Users copy it out from guest os. | 64 | kernel symbols. Users copy it out from guest os. |
59 | --guestmodules=<path>:: | 65 | --guestmodules=<path>:: |
60 | Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest | 66 | Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest |
61 | kernel module information. Users copy it out from guest os. | 67 | kernel module information. Users copy it out from guest os. |
62 | --guestvmlinux=<path>:: | 68 | --guestvmlinux=<path>:: |
63 | Guest os kernel vmlinux. | 69 | Guest os kernel vmlinux. |
64 | 70 | ||
65 | SEE ALSO | 71 | SEE ALSO |
66 | -------- | 72 | -------- |
67 | linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], | 73 | linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], |
68 | linkperf:perf-diff[1], linkperf:perf-buildid-list[1] | 74 | linkperf:perf-diff[1], linkperf:perf-buildid-list[1] |
69 | 75 |
tools/perf/Documentation/perf-lock.txt
1 | perf-lock(1) | 1 | perf-lock(1) |
2 | ============ | 2 | ============ |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-lock - Analyze lock events | 6 | perf-lock - Analyze lock events |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf lock' {record|report|trace} | 11 | 'perf lock' {record|report|trace} |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | You can analyze various lock behaviours | 15 | You can analyze various lock behaviours |
16 | and statistics with this 'perf lock' command. | 16 | and statistics with this 'perf lock' command. |
17 | 17 | ||
18 | 'perf lock record <command>' records lock events | 18 | 'perf lock record <command>' records lock events |
19 | between start and end <command>. And this command | 19 | between start and end <command>. And this command |
20 | produces the file "perf.data" which contains tracing | 20 | produces the file "perf.data" which contains tracing |
21 | results of lock events. | 21 | results of lock events. |
22 | 22 | ||
23 | 'perf lock trace' shows raw lock events. | 23 | 'perf lock trace' shows raw lock events. |
24 | 24 | ||
25 | 'perf lock report' reports statistical data. | 25 | 'perf lock report' reports statistical data. |
26 | 26 | ||
27 | OPTIONS | ||
28 | ------- | ||
29 | |||
30 | -i:: | ||
31 | --input=<file>:: | ||
32 | Input file name. | ||
33 | |||
34 | -v:: | ||
35 | --verbose:: | ||
36 | Be more verbose (show symbol address, etc). | ||
37 | |||
38 | -D:: | ||
39 | --dump-raw-trace:: | ||
40 | Dump raw trace in ASCII. | ||
41 | |||
27 | SEE ALSO | 42 | SEE ALSO |
28 | -------- | 43 | -------- |
29 | linkperf:perf[1] | 44 | linkperf:perf[1] |
30 | 45 |
tools/perf/Documentation/perf-probe.txt
1 | perf-probe(1) | 1 | perf-probe(1) |
2 | ============= | 2 | ============= |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-probe - Define new dynamic tracepoints | 6 | perf-probe - Define new dynamic tracepoints |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf probe' [options] --add='PROBE' [...] | 11 | 'perf probe' [options] --add='PROBE' [...] |
12 | or | 12 | or |
13 | 'perf probe' [options] PROBE | 13 | 'perf probe' [options] PROBE |
14 | or | 14 | or |
15 | 'perf probe' [options] --del='[GROUP:]EVENT' [...] | 15 | 'perf probe' [options] --del='[GROUP:]EVENT' [...] |
16 | or | 16 | or |
17 | 'perf probe' --list | 17 | 'perf probe' --list |
18 | or | 18 | or |
19 | 'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' | 19 | 'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' |
20 | or | 20 | or |
21 | 'perf probe' [options] --vars='PROBEPOINT' | 21 | 'perf probe' [options] --vars='PROBEPOINT' |
22 | 22 | ||
23 | DESCRIPTION | 23 | DESCRIPTION |
24 | ----------- | 24 | ----------- |
25 | This command defines dynamic tracepoint events, by symbol and registers | 25 | This command defines dynamic tracepoint events, by symbol and registers |
26 | without debuginfo, or by C expressions (C line numbers, C function names, | 26 | without debuginfo, or by C expressions (C line numbers, C function names, |
27 | and C local variables) with debuginfo. | 27 | and C local variables) with debuginfo. |
28 | 28 | ||
29 | 29 | ||
30 | OPTIONS | 30 | OPTIONS |
31 | ------- | 31 | ------- |
32 | -k:: | 32 | -k:: |
33 | --vmlinux=PATH:: | 33 | --vmlinux=PATH:: |
34 | Specify vmlinux path which has debuginfo (Dwarf binary). | 34 | Specify vmlinux path which has debuginfo (Dwarf binary). |
35 | 35 | ||
36 | -m:: | 36 | -m:: |
37 | --module=MODNAME:: | 37 | --module=MODNAME:: |
38 | Specify module name in which perf-probe searches probe points | 38 | Specify module name in which perf-probe searches probe points |
39 | or lines. | 39 | or lines. |
40 | 40 | ||
41 | -s:: | 41 | -s:: |
42 | --source=PATH:: | 42 | --source=PATH:: |
43 | Specify path to kernel source. | 43 | Specify path to kernel source. |
44 | 44 | ||
45 | -v:: | 45 | -v:: |
46 | --verbose:: | 46 | --verbose:: |
47 | Be more verbose (show parsed arguments, etc). | 47 | Be more verbose (show parsed arguments, etc). |
48 | 48 | ||
49 | -a:: | 49 | -a:: |
50 | --add=:: | 50 | --add=:: |
51 | Define a probe event (see PROBE SYNTAX for detail). | 51 | Define a probe event (see PROBE SYNTAX for detail). |
52 | 52 | ||
53 | -d:: | 53 | -d:: |
54 | --del=:: | 54 | --del=:: |
55 | Delete probe events. This accepts glob wildcards('*', '?') and character | 55 | Delete probe events. This accepts glob wildcards('*', '?') and character |
56 | classes(e.g. [a-z], [!A-Z]). | 56 | classes(e.g. [a-z], [!A-Z]). |
57 | 57 | ||
58 | -l:: | 58 | -l:: |
59 | --list:: | 59 | --list:: |
60 | List up current probe events. | 60 | List up current probe events. |
61 | 61 | ||
62 | -L:: | 62 | -L:: |
63 | --line=:: | 63 | --line=:: |
64 | Show source code lines which can be probed. This needs an argument | 64 | Show source code lines which can be probed. This needs an argument |
65 | which specifies a range of the source code. (see LINE SYNTAX for detail) | 65 | which specifies a range of the source code. (see LINE SYNTAX for detail) |
66 | 66 | ||
67 | -V:: | 67 | -V:: |
68 | --vars=:: | 68 | --vars=:: |
69 | Show available local variables at given probe point. The argument | 69 | Show available local variables at given probe point. The argument |
70 | syntax is same as PROBE SYNTAX, but NO ARGs. | 70 | syntax is same as PROBE SYNTAX, but NO ARGs. |
71 | 71 | ||
72 | --externs:: | 72 | --externs:: |
73 | (Only for --vars) Show external defined variables in addition to local | 73 | (Only for --vars) Show external defined variables in addition to local |
74 | variables. | 74 | variables. |
75 | 75 | ||
76 | -f:: | 76 | -f:: |
77 | --force:: | 77 | --force:: |
78 | Forcibly add events with existing name. | 78 | Forcibly add events with existing name. |
79 | 79 | ||
80 | -n:: | 80 | -n:: |
81 | --dry-run:: | 81 | --dry-run:: |
82 | Dry run. With this option, --add and --del doesn't execute actual | 82 | Dry run. With this option, --add and --del doesn't execute actual |
83 | adding and removal operations. | 83 | adding and removal operations. |
84 | 84 | ||
85 | --max-probes:: | 85 | --max-probes:: |
86 | Set the maximum number of probe points for an event. Default is 128. | 86 | Set the maximum number of probe points for an event. Default is 128. |
87 | 87 | ||
88 | PROBE SYNTAX | 88 | PROBE SYNTAX |
89 | ------------ | 89 | ------------ |
90 | Probe points are defined by following syntax. | 90 | Probe points are defined by following syntax. |
91 | 91 | ||
92 | 1) Define event based on function name | 92 | 1) Define event based on function name |
93 | [EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...] | 93 | [EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...] |
94 | 94 | ||
95 | 2) Define event based on source file with line number | 95 | 2) Define event based on source file with line number |
96 | [EVENT=]SRC:ALN [ARG ...] | 96 | [EVENT=]SRC:ALN [ARG ...] |
97 | 97 | ||
98 | 3) Define event based on source file with lazy pattern | 98 | 3) Define event based on source file with lazy pattern |
99 | [EVENT=]SRC;PTN [ARG ...] | 99 | [EVENT=]SRC;PTN [ARG ...] |
100 | 100 | ||
101 | 101 | ||
102 | 'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'. | 102 | 'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'. |
103 | 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. | 103 | 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. |
104 | It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern. | 104 | It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern. |
105 | 'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT). | 105 | 'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT). |
106 | 106 | ||
107 | PROBE ARGUMENT | 107 | PROBE ARGUMENT |
108 | -------------- | 108 | -------------- |
109 | Each probe argument follows below syntax. | 109 | Each probe argument follows below syntax. |
110 | 110 | ||
111 | [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] | 111 | [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] |
112 | 112 | ||
113 | 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) | 113 | 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) |
114 | 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. | 114 | 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. |
115 | 115 | ||
116 | LINE SYNTAX | 116 | LINE SYNTAX |
117 | ----------- | 117 | ----------- |
118 | Line range is descripted by following syntax. | 118 | Line range is described by following syntax. |
119 | 119 | ||
120 | "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" | 120 | "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" |
121 | 121 | ||
122 | FUNC specifies the function name of showing lines. 'RLN' is the start line | 122 | FUNC specifies the function name of showing lines. 'RLN' is the start line |
123 | number from function entry line, and 'RLN2' is the end line number. As same as | 123 | number from function entry line, and 'RLN2' is the end line number. As same as |
124 | probe syntax, 'SRC' means the source file path, 'ALN' is start line number, | 124 | probe syntax, 'SRC' means the source file path, 'ALN' is start line number, |
125 | and 'ALN2' is end line number in the file. It is also possible to specify how | 125 | and 'ALN2' is end line number in the file. It is also possible to specify how |
126 | many lines to show by using 'NUM'. | 126 | many lines to show by using 'NUM'. |
127 | So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. | 127 | So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. |
128 | 128 | ||
129 | LAZY MATCHING | 129 | LAZY MATCHING |
130 | ------------- | 130 | ------------- |
131 | The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]). | 131 | The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]). |
132 | 132 | ||
133 | e.g. | 133 | e.g. |
134 | 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on. | 134 | 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on. |
135 | 135 | ||
136 | This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.) | 136 | This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.) |
137 | 137 | ||
138 | 138 | ||
139 | EXAMPLES | 139 | EXAMPLES |
140 | -------- | 140 | -------- |
141 | Display which lines in schedule() can be probed: | 141 | Display which lines in schedule() can be probed: |
142 | 142 | ||
143 | ./perf probe --line schedule | 143 | ./perf probe --line schedule |
144 | 144 | ||
145 | Add a probe on schedule() function 12th line with recording cpu local variable: | 145 | Add a probe on schedule() function 12th line with recording cpu local variable: |
146 | 146 | ||
147 | ./perf probe schedule:12 cpu | 147 | ./perf probe schedule:12 cpu |
148 | or | 148 | or |
149 | ./perf probe --add='schedule:12 cpu' | 149 | ./perf probe --add='schedule:12 cpu' |
150 | 150 | ||
151 | this will add one or more probes which has the name start with "schedule". | 151 | this will add one or more probes which has the name start with "schedule". |
152 | 152 | ||
153 | Add probes on lines in schedule() function which calls update_rq_clock(). | 153 | Add probes on lines in schedule() function which calls update_rq_clock(). |
154 | 154 | ||
155 | ./perf probe 'schedule;update_rq_clock*' | 155 | ./perf probe 'schedule;update_rq_clock*' |
156 | or | 156 | or |
157 | ./perf probe --add='schedule;update_rq_clock*' | 157 | ./perf probe --add='schedule;update_rq_clock*' |
158 | 158 | ||
159 | Delete all probes on schedule(). | 159 | Delete all probes on schedule(). |
160 | 160 | ||
161 | ./perf probe --del='schedule*' | 161 | ./perf probe --del='schedule*' |
162 | 162 | ||
163 | 163 | ||
164 | SEE ALSO | 164 | SEE ALSO |
165 | -------- | 165 | -------- |
166 | linkperf:perf-trace[1], linkperf:perf-record[1] | 166 | linkperf:perf-trace[1], linkperf:perf-record[1] |
167 | 167 |
tools/perf/Documentation/perf-record.txt
1 | perf-record(1) | 1 | perf-record(1) |
2 | ============== | 2 | ============== |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-record - Run a command and record its profile into perf.data | 6 | perf-record - Run a command and record its profile into perf.data |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | 11 | 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> |
12 | 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>] | 12 | 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>] |
13 | 13 | ||
14 | DESCRIPTION | 14 | DESCRIPTION |
15 | ----------- | 15 | ----------- |
16 | This command runs a command and gathers a performance counter profile | 16 | This command runs a command and gathers a performance counter profile |
17 | from it, into perf.data - without displaying anything. | 17 | from it, into perf.data - without displaying anything. |
18 | 18 | ||
19 | This file can then be inspected later on, using 'perf report'. | 19 | This file can then be inspected later on, using 'perf report'. |
20 | 20 | ||
21 | 21 | ||
22 | OPTIONS | 22 | OPTIONS |
23 | ------- | 23 | ------- |
24 | <command>...:: | 24 | <command>...:: |
25 | Any command you can specify in a shell. | 25 | Any command you can specify in a shell. |
26 | 26 | ||
27 | -e:: | 27 | -e:: |
28 | --event=:: | 28 | --event=:: |
29 | Select the PMU event. Selection can be: | 29 | Select the PMU event. Selection can be: |
30 | 30 | ||
31 | - a symbolic event name (use 'perf list' to list all events) | 31 | - a symbolic event name (use 'perf list' to list all events) |
32 | 32 | ||
33 | - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a | 33 | - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a |
34 | hexadecimal event descriptor. | 34 | hexadecimal event descriptor. |
35 | 35 | ||
36 | - a hardware breakpoint event in the form of '\mem:addr[:access]' | 36 | - a hardware breakpoint event in the form of '\mem:addr[:access]' |
37 | where addr is the address in memory you want to break in. | 37 | where addr is the address in memory you want to break in. |
38 | Access is the memory access type (read, write, execute) it can | 38 | Access is the memory access type (read, write, execute) it can |
39 | be passed as follows: '\mem:addr[:[r][w][x]]'. | 39 | be passed as follows: '\mem:addr[:[r][w][x]]'. |
40 | If you want to profile read-write accesses in 0x1000, just set | 40 | If you want to profile read-write accesses in 0x1000, just set |
41 | 'mem:0x1000:rw'. | 41 | 'mem:0x1000:rw'. |
42 | |||
43 | --filter=<filter>:: | ||
44 | Event filter. | ||
45 | |||
42 | -a:: | 46 | -a:: |
43 | System-wide collection. | 47 | --all-cpus:: |
48 | System-wide collection from all CPUs. | ||
44 | 49 | ||
45 | -l:: | 50 | -l:: |
46 | Scale counter values. | 51 | Scale counter values. |
47 | 52 | ||
48 | -p:: | 53 | -p:: |
49 | --pid=:: | 54 | --pid=:: |
50 | Record events on existing pid. | 55 | Record events on existing process ID. |
51 | 56 | ||
57 | -t:: | ||
58 | --tid=:: | ||
59 | Record events on existing thread ID. | ||
60 | |||
52 | -r:: | 61 | -r:: |
53 | --realtime=:: | 62 | --realtime=:: |
54 | Collect data with this RT SCHED_FIFO priority. | 63 | Collect data with this RT SCHED_FIFO priority. |
55 | -A:: | 64 | -A:: |
56 | --append:: | 65 | --append:: |
57 | Append to the output file to do incremental profiling. | 66 | Append to the output file to do incremental profiling. |
58 | 67 | ||
59 | -f:: | 68 | -f:: |
60 | --force:: | 69 | --force:: |
61 | Overwrite existing data file. (deprecated) | 70 | Overwrite existing data file. (deprecated) |
62 | 71 | ||
63 | -c:: | 72 | -c:: |
64 | --count=:: | 73 | --count=:: |
65 | Event period to sample. | 74 | Event period to sample. |
66 | 75 | ||
67 | -o:: | 76 | -o:: |
68 | --output=:: | 77 | --output=:: |
69 | Output file name. | 78 | Output file name. |
70 | 79 | ||
71 | -i:: | 80 | -i:: |
72 | --no-inherit:: | 81 | --no-inherit:: |
73 | Child tasks do not inherit counters. | 82 | Child tasks do not inherit counters. |
74 | -F:: | 83 | -F:: |
75 | --freq=:: | 84 | --freq=:: |
76 | Profile at this frequency. | 85 | Profile at this frequency. |
77 | 86 | ||
78 | -m:: | 87 | -m:: |
79 | --mmap-pages=:: | 88 | --mmap-pages=:: |
80 | Number of mmap data pages. | 89 | Number of mmap data pages. |
81 | 90 | ||
82 | -g:: | 91 | -g:: |
83 | --call-graph:: | 92 | --call-graph:: |
84 | Do call-graph (stack chain/backtrace) recording. | 93 | Do call-graph (stack chain/backtrace) recording. |
85 | 94 | ||
86 | -q:: | 95 | -q:: |
87 | --quiet:: | 96 | --quiet:: |
88 | Don't print any message, useful for scripting. | 97 | Don't print any message, useful for scripting. |
89 | 98 | ||
90 | -v:: | 99 | -v:: |
91 | --verbose:: | 100 | --verbose:: |
92 | Be more verbose (show counter open errors, etc). | 101 | Be more verbose (show counter open errors, etc). |
93 | 102 | ||
94 | -s:: | 103 | -s:: |
95 | --stat:: | 104 | --stat:: |
96 | Per thread counts. | 105 | Per thread counts. |
97 | 106 | ||
98 | -d:: | 107 | -d:: |
99 | --data:: | 108 | --data:: |
100 | Sample addresses. | 109 | Sample addresses. |
101 | 110 | ||
102 | -n:: | 111 | -n:: |
103 | --no-samples:: | 112 | --no-samples:: |
104 | Don't sample. | 113 | Don't sample. |
105 | 114 | ||
106 | -R:: | 115 | -R:: |
107 | --raw-samples:: | 116 | --raw-samples:: |
108 | Collect raw sample records from all opened counters (default for tracepoint counters). | 117 | Collect raw sample records from all opened counters (default for tracepoint counters). |
109 | 118 | ||
110 | -C:: | 119 | -C:: |
111 | --cpu:: | 120 | --cpu:: |
112 | Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a | 121 | Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a |
113 | comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. | 122 | comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. |
114 | In per-thread mode with inheritance mode on (default), samples are captured only when | 123 | In per-thread mode with inheritance mode on (default), samples are captured only when |
115 | the thread executes on the designated CPUs. Default is to monitor all CPUs. | 124 | the thread executes on the designated CPUs. Default is to monitor all CPUs. |
116 | 125 | ||
117 | -N:: | 126 | -N:: |
118 | --no-buildid-cache:: | 127 | --no-buildid-cache:: |
119 | Do not update the builid cache. This saves some overhead in situations | 128 | Do not update the builid cache. This saves some overhead in situations |
120 | where the information in the perf.data file (which includes buildids) | 129 | where the information in the perf.data file (which includes buildids) |
121 | is sufficient. | 130 | is sufficient. |
122 | 131 | ||
123 | SEE ALSO | 132 | SEE ALSO |
124 | -------- | 133 | -------- |
125 | linkperf:perf-stat[1], linkperf:perf-list[1] | 134 | linkperf:perf-stat[1], linkperf:perf-list[1] |
126 | 135 |
tools/perf/Documentation/perf-report.txt
1 | perf-report(1) | 1 | perf-report(1) |
2 | ============== | 2 | ============== |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-report - Read perf.data (created by perf record) and display the profile | 6 | perf-report - Read perf.data (created by perf record) and display the profile |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf report' [-i <file> | --input=file] | 11 | 'perf report' [-i <file> | --input=file] |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | This command displays the performance counter profile information recorded | 15 | This command displays the performance counter profile information recorded |
16 | via perf record. | 16 | via perf record. |
17 | 17 | ||
18 | OPTIONS | 18 | OPTIONS |
19 | ------- | 19 | ------- |
20 | -i:: | 20 | -i:: |
21 | --input=:: | 21 | --input=:: |
22 | Input file name. (default: perf.data) | 22 | Input file name. (default: perf.data) |
23 | |||
24 | -v:: | ||
25 | --verbose:: | ||
26 | Be more verbose. (show symbol address, etc) | ||
27 | |||
23 | -d:: | 28 | -d:: |
24 | --dsos=:: | 29 | --dsos=:: |
25 | Only consider symbols in these dsos. CSV that understands | 30 | Only consider symbols in these dsos. CSV that understands |
26 | file://filename entries. | 31 | file://filename entries. |
27 | -n:: | 32 | -n:: |
28 | --show-nr-samples:: | 33 | --show-nr-samples:: |
29 | Show the number of samples for each symbol | 34 | Show the number of samples for each symbol |
35 | |||
36 | --showcpuutilization:: | ||
37 | Show sample percentage for different cpu modes. | ||
38 | |||
30 | -T:: | 39 | -T:: |
31 | --threads:: | 40 | --threads:: |
32 | Show per-thread event counters | 41 | Show per-thread event counters |
33 | -C:: | 42 | -C:: |
34 | --comms=:: | 43 | --comms=:: |
35 | Only consider symbols in these comms. CSV that understands | 44 | Only consider symbols in these comms. CSV that understands |
36 | file://filename entries. | 45 | file://filename entries. |
37 | -S:: | 46 | -S:: |
38 | --symbols=:: | 47 | --symbols=:: |
39 | Only consider these symbols. CSV that understands | 48 | Only consider these symbols. CSV that understands |
40 | file://filename entries. | 49 | file://filename entries. |
41 | 50 | ||
51 | -U:: | ||
52 | --hide-unresolved:: | ||
53 | Only display entries resolved to a symbol. | ||
54 | |||
42 | -s:: | 55 | -s:: |
43 | --sort=:: | 56 | --sort=:: |
44 | Sort by key(s): pid, comm, dso, symbol, parent. | 57 | Sort by key(s): pid, comm, dso, symbol, parent. |
45 | 58 | ||
59 | -p:: | ||
60 | --parent=<regex>:: | ||
61 | regex filter to identify parent, see: '--sort parent' | ||
62 | |||
63 | -x:: | ||
64 | --exclude-other:: | ||
65 | Only display entries with parent-match. | ||
66 | |||
46 | -w:: | 67 | -w:: |
47 | --field-width=:: | 68 | --column-widths=<width[,width...]>:: |
48 | Force each column width to the provided list, for large terminal | 69 | Force each column width to the provided list, for large terminal |
49 | readability. | 70 | readability. |
50 | 71 | ||
51 | -t:: | 72 | -t:: |
52 | --field-separator=:: | 73 | --field-separator=:: |
53 | 74 | ||
54 | Use a special separator character and don't pad with spaces, replacing | 75 | Use a special separator character and don't pad with spaces, replacing |
55 | all occurances of this separator in symbol names (and other output) | 76 | all occurrences of this separator in symbol names (and other output) |
56 | with a '.' character, that thus it's the only non valid separator. | 77 | with a '.' character, that thus it's the only non valid separator. |
57 | 78 | ||
79 | -D:: | ||
80 | --dump-raw-trace:: | ||
81 | Dump raw trace in ASCII. | ||
82 | |||
58 | -g [type,min]:: | 83 | -g [type,min]:: |
59 | --call-graph:: | 84 | --call-graph:: |
60 | Display callchains using type and min percent threshold. | 85 | Display call chains using type and min percent threshold. |
61 | type can be either: | 86 | type can be either: |
62 | - flat: single column, linear exposure of callchains. | 87 | - flat: single column, linear exposure of call chains. |
63 | - graph: use a graph tree, displaying absolute overhead rates. | 88 | - graph: use a graph tree, displaying absolute overhead rates. |
64 | - fractal: like graph, but displays relative rates. Each branch of | 89 | - fractal: like graph, but displays relative rates. Each branch of |
65 | the tree is considered as a new profiled object. + | 90 | the tree is considered as a new profiled object. + |
66 | Default: fractal,0.5. | 91 | Default: fractal,0.5. |
67 | 92 | ||
93 | --pretty=<key>:: | ||
94 | Pretty printing style. key: normal, raw | ||
95 | |||
68 | --stdio:: Use the stdio interface. | 96 | --stdio:: Use the stdio interface. |
69 | 97 | ||
70 | --tui:: Use the TUI interface, that is integrated with annotate and allows | 98 | --tui:: Use the TUI interface, that is integrated with annotate and allows |
71 | zooming into DSOs or threads, among other features. Use of --tui | 99 | zooming into DSOs or threads, among other features. Use of --tui |
72 | requires a tty, if one is not present, as when piping to other | 100 | requires a tty, if one is not present, as when piping to other |
73 | commands, the stdio interface is used. | 101 | commands, the stdio interface is used. |
102 | |||
103 | -k:: | ||
104 | --vmlinux=<file>:: | ||
105 | vmlinux pathname | ||
106 | |||
107 | -m:: | ||
108 | --modules:: | ||
109 | Load module symbols. WARNING: This should only be used with -k and | ||
110 | a LIVE kernel. | ||
111 | |||
112 | -f:: | ||
113 | --force:: | ||
114 | Don't complain, do it. | ||
74 | 115 | ||
75 | SEE ALSO | 116 | SEE ALSO |
76 | -------- | 117 | -------- |
77 | linkperf:perf-stat[1] | 118 | linkperf:perf-stat[1] |
78 | 119 |
tools/perf/Documentation/perf-sched.txt
1 | perf-sched(1) | 1 | perf-sched(1) |
2 | ============== | 2 | ============== |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-sched - Tool to trace/measure scheduler properties (latencies) | 6 | perf-sched - Tool to trace/measure scheduler properties (latencies) |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf sched' {record|latency|replay|trace} | 11 | 'perf sched' {record|latency|map|replay|trace} |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | There are four variants of perf sched: | 15 | There are five variants of perf sched: |
16 | 16 | ||
17 | 'perf sched record <command>' to record the scheduling events | 17 | 'perf sched record <command>' to record the scheduling events |
18 | of an arbitrary workload. | 18 | of an arbitrary workload. |
19 | 19 | ||
20 | 'perf sched latency' to report the per task scheduling latencies | 20 | 'perf sched latency' to report the per task scheduling latencies |
21 | and other scheduling properties of the workload. | 21 | and other scheduling properties of the workload. |
22 | 22 | ||
23 | 'perf sched trace' to see a detailed trace of the workload that | 23 | 'perf sched trace' to see a detailed trace of the workload that |
24 | was recorded. | 24 | was recorded. |
25 | 25 | ||
26 | 'perf sched replay' to simulate the workload that was recorded | 26 | 'perf sched replay' to simulate the workload that was recorded |
27 | via perf sched record. (this is done by starting up mockup threads | 27 | via perf sched record. (this is done by starting up mockup threads |
28 | that mimic the workload based on the events in the trace. These | 28 | that mimic the workload based on the events in the trace. These |
29 | threads can then replay the timings (CPU runtime and sleep patterns) | 29 | threads can then replay the timings (CPU runtime and sleep patterns) |
30 | of the workload as it occurred when it was recorded - and can repeat | 30 | of the workload as it occurred when it was recorded - and can repeat |
31 | it a number of times, measuring its performance.) | 31 | it a number of times, measuring its performance.) |
32 | 32 | ||
33 | 'perf sched map' to print a textual context-switching outline of | ||
34 | workload captured via perf sched record. Columns stand for | ||
35 | individual CPUs, and the two-letter shortcuts stand for tasks that | ||
36 | are running on a CPU. A '*' denotes the CPU that had the event, and | ||
37 | a dot signals an idle CPU. | ||
38 | |||
33 | OPTIONS | 39 | OPTIONS |
34 | ------- | 40 | ------- |
41 | -i:: | ||
42 | --input=<file>:: | ||
43 | Input file name. (default: perf.data) | ||
44 | |||
45 | -v:: | ||
46 | --verbose:: | ||
47 | Be more verbose. (show symbol address, etc) | ||
48 | |||
35 | -D:: | 49 | -D:: |
36 | --dump-raw-trace=:: | 50 | --dump-raw-trace=:: |
37 | Display verbose dump of the sched data. | 51 | Display verbose dump of the sched data. |
38 | 52 | ||
39 | SEE ALSO | 53 | SEE ALSO |
40 | -------- | 54 | -------- |
41 | linkperf:perf-record[1] | 55 | linkperf:perf-record[1] |
42 | 56 |
tools/perf/Documentation/perf-script.txt
1 | perf-script(1) | 1 | perf-script(1) |
2 | ============= | 2 | ============= |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-script - Read perf.data (created by perf record) and display trace output | 6 | perf-script - Read perf.data (created by perf record) and display trace output |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf script' [<options>] | 11 | 'perf script' [<options>] |
12 | 'perf script' [<options>] record <script> [<record-options>] <command> | 12 | 'perf script' [<options>] record <script> [<record-options>] <command> |
13 | 'perf script' [<options>] report <script> [script-args] | 13 | 'perf script' [<options>] report <script> [script-args] |
14 | 'perf script' [<options>] <script> <required-script-args> [<record-options>] <command> | 14 | 'perf script' [<options>] <script> <required-script-args> [<record-options>] <command> |
15 | 'perf script' [<options>] <top-script> [script-args] | 15 | 'perf script' [<options>] <top-script> [script-args] |
16 | 16 | ||
17 | DESCRIPTION | 17 | DESCRIPTION |
18 | ----------- | 18 | ----------- |
19 | This command reads the input file and displays the trace recorded. | 19 | This command reads the input file and displays the trace recorded. |
20 | 20 | ||
21 | There are several variants of perf script: | 21 | There are several variants of perf script: |
22 | 22 | ||
23 | 'perf script' to see a detailed trace of the workload that was | 23 | 'perf script' to see a detailed trace of the workload that was |
24 | recorded. | 24 | recorded. |
25 | 25 | ||
26 | You can also run a set of pre-canned scripts that aggregate and | 26 | You can also run a set of pre-canned scripts that aggregate and |
27 | summarize the raw trace data in various ways (the list of scripts is | 27 | summarize the raw trace data in various ways (the list of scripts is |
28 | available via 'perf script -l'). The following variants allow you to | 28 | available via 'perf script -l'). The following variants allow you to |
29 | record and run those scripts: | 29 | record and run those scripts: |
30 | 30 | ||
31 | 'perf script record <script> <command>' to record the events required | 31 | 'perf script record <script> <command>' to record the events required |
32 | for 'perf script report'. <script> is the name displayed in the | 32 | for 'perf script report'. <script> is the name displayed in the |
33 | output of 'perf script --list' i.e. the actual script name minus any | 33 | output of 'perf script --list' i.e. the actual script name minus any |
34 | language extension. If <command> is not specified, the events are | 34 | language extension. If <command> is not specified, the events are |
35 | recorded using the -a (system-wide) 'perf record' option. | 35 | recorded using the -a (system-wide) 'perf record' option. |
36 | 36 | ||
37 | 'perf script report <script> [args]' to run and display the results | 37 | 'perf script report <script> [args]' to run and display the results |
38 | of <script>. <script> is the name displayed in the output of 'perf | 38 | of <script>. <script> is the name displayed in the output of 'perf |
39 | trace --list' i.e. the actual script name minus any language | 39 | trace --list' i.e. the actual script name minus any language |
40 | extension. The perf.data output from a previous run of 'perf script | 40 | extension. The perf.data output from a previous run of 'perf script |
41 | record <script>' is used and should be present for this command to | 41 | record <script>' is used and should be present for this command to |
42 | succeed. [args] refers to the (mainly optional) args expected by | 42 | succeed. [args] refers to the (mainly optional) args expected by |
43 | the script. | 43 | the script. |
44 | 44 | ||
45 | 'perf script <script> <required-script-args> <command>' to both | 45 | 'perf script <script> <required-script-args> <command>' to both |
46 | record the events required for <script> and to run the <script> | 46 | record the events required for <script> and to run the <script> |
47 | using 'live-mode' i.e. without writing anything to disk. <script> | 47 | using 'live-mode' i.e. without writing anything to disk. <script> |
48 | is the name displayed in the output of 'perf script --list' i.e. the | 48 | is the name displayed in the output of 'perf script --list' i.e. the |
49 | actual script name minus any language extension. If <command> is | 49 | actual script name minus any language extension. If <command> is |
50 | not specified, the events are recorded using the -a (system-wide) | 50 | not specified, the events are recorded using the -a (system-wide) |
51 | 'perf record' option. If <script> has any required args, they | 51 | 'perf record' option. If <script> has any required args, they |
52 | should be specified before <command>. This mode doesn't allow for | 52 | should be specified before <command>. This mode doesn't allow for |
53 | optional script args to be specified; if optional script args are | 53 | optional script args to be specified; if optional script args are |
54 | desired, they can be specified using separate 'perf script record' | 54 | desired, they can be specified using separate 'perf script record' |
55 | and 'perf script report' commands, with the stdout of the record step | 55 | and 'perf script report' commands, with the stdout of the record step |
56 | piped to the stdin of the report script, using the '-o -' and '-i -' | 56 | piped to the stdin of the report script, using the '-o -' and '-i -' |
57 | options of the corresponding commands. | 57 | options of the corresponding commands. |
58 | 58 | ||
59 | 'perf script <top-script>' to both record the events required for | 59 | 'perf script <top-script>' to both record the events required for |
60 | <top-script> and to run the <top-script> using 'live-mode' | 60 | <top-script> and to run the <top-script> using 'live-mode' |
61 | i.e. without writing anything to disk. <top-script> is the name | 61 | i.e. without writing anything to disk. <top-script> is the name |
62 | displayed in the output of 'perf script --list' i.e. the actual | 62 | displayed in the output of 'perf script --list' i.e. the actual |
63 | script name minus any language extension; a <top-script> is defined | 63 | script name minus any language extension; a <top-script> is defined |
64 | as any script name ending with the string 'top'. | 64 | as any script name ending with the string 'top'. |
65 | 65 | ||
66 | [<record-options>] can be passed to the record steps of 'perf script | 66 | [<record-options>] can be passed to the record steps of 'perf script |
67 | record' and 'live-mode' variants; this isn't possible however for | 67 | record' and 'live-mode' variants; this isn't possible however for |
68 | <top-script> 'live-mode' or 'perf script report' variants. | 68 | <top-script> 'live-mode' or 'perf script report' variants. |
69 | 69 | ||
70 | See the 'SEE ALSO' section for links to language-specific | 70 | See the 'SEE ALSO' section for links to language-specific |
71 | information on how to write and run your own trace scripts. | 71 | information on how to write and run your own trace scripts. |
72 | 72 | ||
73 | OPTIONS | 73 | OPTIONS |
74 | ------- | 74 | ------- |
75 | <command>...:: | 75 | <command>...:: |
76 | Any command you can specify in a shell. | 76 | Any command you can specify in a shell. |
77 | 77 | ||
78 | -D:: | 78 | -D:: |
79 | --dump-raw-script=:: | 79 | --dump-raw-script=:: |
80 | Display verbose dump of the trace data. | 80 | Display verbose dump of the trace data. |
81 | 81 | ||
82 | -L:: | 82 | -L:: |
83 | --Latency=:: | 83 | --Latency=:: |
84 | Show latency attributes (irqs/preemption disabled, etc). | 84 | Show latency attributes (irqs/preemption disabled, etc). |
85 | 85 | ||
86 | -l:: | 86 | -l:: |
87 | --list=:: | 87 | --list=:: |
88 | Display a list of available trace scripts. | 88 | Display a list of available trace scripts. |
89 | 89 | ||
90 | -s ['lang']:: | 90 | -s ['lang']:: |
91 | --script=:: | 91 | --script=:: |
92 | Process trace data with the given script ([lang]:script[.ext]). | 92 | Process trace data with the given script ([lang]:script[.ext]). |
93 | If the string 'lang' is specified in place of a script name, a | 93 | If the string 'lang' is specified in place of a script name, a |
94 | list of supported languages will be displayed instead. | 94 | list of supported languages will be displayed instead. |
95 | 95 | ||
96 | -g:: | 96 | -g:: |
97 | --gen-script=:: | 97 | --gen-script=:: |
98 | Generate perf-script.[ext] starter script for given language, | 98 | Generate perf-script.[ext] starter script for given language, |
99 | using current perf.data. | 99 | using current perf.data. |
100 | 100 | ||
101 | -a:: | 101 | -a:: |
102 | Force system-wide collection. Scripts run without a <command> | 102 | Force system-wide collection. Scripts run without a <command> |
103 | normally use -a by default, while scripts run with a <command> | 103 | normally use -a by default, while scripts run with a <command> |
104 | normally don't - this option allows the latter to be run in | 104 | normally don't - this option allows the latter to be run in |
105 | system-wide mode. | 105 | system-wide mode. |
106 | 106 | ||
107 | -i:: | ||
108 | --input=:: | ||
109 | Input file name. | ||
110 | |||
111 | -d:: | ||
112 | --debug-mode:: | ||
113 | Do various checks like samples ordering and lost events. | ||
107 | 114 | ||
108 | SEE ALSO | 115 | SEE ALSO |
109 | -------- | 116 | -------- |
110 | linkperf:perf-record[1], linkperf:perf-script-perl[1], | 117 | linkperf:perf-record[1], linkperf:perf-script-perl[1], |
111 | linkperf:perf-script-python[1] | 118 | linkperf:perf-script-python[1] |
112 | 119 |
tools/perf/Documentation/perf-stat.txt
1 | perf-stat(1) | 1 | perf-stat(1) |
2 | ============ | 2 | ============ |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-stat - Run a command and gather performance counter statistics | 6 | perf-stat - Run a command and gather performance counter statistics |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> | 11 | 'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command> |
12 | 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] | 12 | 'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>] |
13 | 13 | ||
14 | DESCRIPTION | 14 | DESCRIPTION |
15 | ----------- | 15 | ----------- |
16 | This command runs a command and gathers performance counter statistics | 16 | This command runs a command and gathers performance counter statistics |
17 | from it. | 17 | from it. |
18 | 18 | ||
19 | 19 | ||
20 | OPTIONS | 20 | OPTIONS |
21 | ------- | 21 | ------- |
22 | <command>...:: | 22 | <command>...:: |
23 | Any command you can specify in a shell. | 23 | Any command you can specify in a shell. |
24 | 24 | ||
25 | 25 | ||
26 | -e:: | 26 | -e:: |
27 | --event=:: | 27 | --event=:: |
28 | Select the PMU event. Selection can be a symbolic event name | 28 | Select the PMU event. Selection can be a symbolic event name |
29 | (use 'perf list' to list all events) or a raw PMU | 29 | (use 'perf list' to list all events) or a raw PMU |
30 | event (eventsel+umask) in the form of rNNN where NNN is a | 30 | event (eventsel+umask) in the form of rNNN where NNN is a |
31 | hexadecimal event descriptor. | 31 | hexadecimal event descriptor. |
32 | 32 | ||
33 | -i:: | 33 | -i:: |
34 | --no-inherit:: | 34 | --no-inherit:: |
35 | child tasks do not inherit counters | 35 | child tasks do not inherit counters |
36 | -p:: | 36 | -p:: |
37 | --pid=<pid>:: | 37 | --pid=<pid>:: |
38 | stat events on existing pid | 38 | stat events on existing process id |
39 | 39 | ||
40 | -t:: | ||
41 | --tid=<tid>:: | ||
42 | stat events on existing thread id | ||
43 | |||
44 | |||
40 | -a:: | 45 | -a:: |
41 | system-wide collection | 46 | --all-cpus:: |
47 | system-wide collection from all CPUs | ||
42 | 48 | ||
43 | -c:: | 49 | -c:: |
44 | scale counter values | 50 | --scale:: |
51 | scale/normalize counter values | ||
45 | 52 | ||
53 | -r:: | ||
54 | --repeat=<n>:: | ||
55 | repeat command and print average + stddev (max: 100) | ||
56 | |||
46 | -B:: | 57 | -B:: |
58 | --big-num:: | ||
47 | print large numbers with thousands' separators according to locale | 59 | print large numbers with thousands' separators according to locale |
48 | 60 | ||
49 | -C:: | 61 | -C:: |
50 | --cpu=:: | 62 | --cpu=:: |
51 | Count only on the list of cpus provided. Multiple CPUs can be provided as a | 63 | Count only on the list of CPUs provided. Multiple CPUs can be provided as a |
52 | comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. | 64 | comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. |
53 | In per-thread mode, this option is ignored. The -a option is still necessary | 65 | In per-thread mode, this option is ignored. The -a option is still necessary |
54 | to activate system-wide monitoring. Default is to count on all CPUs. | 66 | to activate system-wide monitoring. Default is to count on all CPUs. |
55 | 67 | ||
56 | -A:: | 68 | -A:: |
57 | --no-aggr:: | 69 | --no-aggr:: |
58 | Do not aggregate counts across all monitored CPUs in system-wide mode (-a). | 70 | Do not aggregate counts across all monitored CPUs in system-wide mode (-a). |
59 | This option is only valid in system-wide mode. | 71 | This option is only valid in system-wide mode. |
72 | |||
73 | -n:: | ||
74 | --null:: | ||
75 | null run - don't start any counters | ||
76 | |||
77 | -v:: | ||
78 | --verbose:: | ||
79 | be more verbose (show counter open errors, etc) | ||
80 | |||
81 | -x SEP:: | ||
82 | --field-separator SEP:: | ||
83 | print counts using a CSV-style output to make it easy to import directly into | ||
84 | spreadsheets. Columns are separated by the string specified in SEP. | ||
60 | 85 | ||
61 | EXAMPLES | 86 | EXAMPLES |
62 | -------- | 87 | -------- |
63 | 88 | ||
64 | $ perf stat -- make -j | 89 | $ perf stat -- make -j |
65 | 90 | ||
66 | Performance counter stats for 'make -j': | 91 | Performance counter stats for 'make -j': |
67 | 92 | ||
68 | 8117.370256 task clock ticks # 11.281 CPU utilization factor | 93 | 8117.370256 task clock ticks # 11.281 CPU utilization factor |
69 | 678 context switches # 0.000 M/sec | 94 | 678 context switches # 0.000 M/sec |
70 | 133 CPU migrations # 0.000 M/sec | 95 | 133 CPU migrations # 0.000 M/sec |
71 | 235724 pagefaults # 0.029 M/sec | 96 | 235724 pagefaults # 0.029 M/sec |
72 | 24821162526 CPU cycles # 3057.784 M/sec | 97 | 24821162526 CPU cycles # 3057.784 M/sec |
73 | 18687303457 instructions # 2302.138 M/sec | 98 | 18687303457 instructions # 2302.138 M/sec |
74 | 172158895 cache references # 21.209 M/sec | 99 | 172158895 cache references # 21.209 M/sec |
75 | 27075259 cache misses # 3.335 M/sec | 100 | 27075259 cache misses # 3.335 M/sec |
76 | 101 | ||
77 | Wall-clock time elapsed: 719.554352 msecs | 102 | Wall-clock time elapsed: 719.554352 msecs |
78 | 103 | ||
79 | SEE ALSO | 104 | SEE ALSO |
80 | -------- | 105 | -------- |
81 | linkperf:perf-top[1], linkperf:perf-list[1] | 106 | linkperf:perf-top[1], linkperf:perf-list[1] |
82 | 107 |
tools/perf/Documentation/perf-test.txt
1 | perf-test(1) | 1 | perf-test(1) |
2 | ============ | 2 | ============ |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-test - Runs sanity tests. | 6 | perf-test - Runs sanity tests. |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf test <options>' | 11 | 'perf test <options>' |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | This command does assorted sanity tests, initially thru linked routines but | 15 | This command does assorted sanity tests, initially through linked routines but |
16 | also will look for a directory with more tests in the form of scripts. | 16 | also will look for a directory with more tests in the form of scripts. |
17 | 17 | ||
18 | OPTIONS | 18 | OPTIONS |
19 | ------- | 19 | ------- |
20 | -v:: | 20 | -v:: |
21 | --verbose:: | 21 | --verbose:: |
22 | Be more verbose. | 22 | Be more verbose. |
23 | 23 |
tools/perf/Documentation/perf-top.txt
1 | perf-top(1) | 1 | perf-top(1) |
2 | =========== | 2 | =========== |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-top - System profiling tool. | 6 | perf-top - System profiling tool. |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf top' [-e <EVENT> | --event=EVENT] [<options>] | 11 | 'perf top' [-e <EVENT> | --event=EVENT] [<options>] |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | This command generates and displays a performance counter profile in realtime. | 15 | This command generates and displays a performance counter profile in real time. |
16 | 16 | ||
17 | 17 | ||
18 | OPTIONS | 18 | OPTIONS |
19 | ------- | 19 | ------- |
20 | -a:: | 20 | -a:: |
21 | --all-cpus:: | 21 | --all-cpus:: |
22 | System-wide collection. (default) | 22 | System-wide collection. (default) |
23 | 23 | ||
24 | -c <count>:: | 24 | -c <count>:: |
25 | --count=<count>:: | 25 | --count=<count>:: |
26 | Event period to sample. | 26 | Event period to sample. |
27 | 27 | ||
28 | -C <cpu-list>:: | 28 | -C <cpu-list>:: |
29 | --cpu=<cpu>:: | 29 | --cpu=<cpu>:: |
30 | Monitor only on the list of cpus provided. Multiple CPUs can be provided as a | 30 | Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a |
31 | comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. | 31 | comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. |
32 | Default is to monitor all CPUS. | 32 | Default is to monitor all CPUS. |
33 | 33 | ||
34 | -d <seconds>:: | 34 | -d <seconds>:: |
35 | --delay=<seconds>:: | 35 | --delay=<seconds>:: |
36 | Number of seconds to delay between refreshes. | 36 | Number of seconds to delay between refreshes. |
37 | 37 | ||
38 | -e <event>:: | 38 | -e <event>:: |
39 | --event=<event>:: | 39 | --event=<event>:: |
40 | Select the PMU event. Selection can be a symbolic event name | 40 | Select the PMU event. Selection can be a symbolic event name |
41 | (use 'perf list' to list all events) or a raw PMU | 41 | (use 'perf list' to list all events) or a raw PMU |
42 | event (eventsel+umask) in the form of rNNN where NNN is a | 42 | event (eventsel+umask) in the form of rNNN where NNN is a |
43 | hexadecimal event descriptor. | 43 | hexadecimal event descriptor. |
44 | 44 | ||
45 | -E <entries>:: | 45 | -E <entries>:: |
46 | --entries=<entries>:: | 46 | --entries=<entries>:: |
47 | Display this many functions. | 47 | Display this many functions. |
48 | 48 | ||
49 | -f <count>:: | 49 | -f <count>:: |
50 | --count-filter=<count>:: | 50 | --count-filter=<count>:: |
51 | Only display functions with more events than this. | 51 | Only display functions with more events than this. |
52 | 52 | ||
53 | -g:: | ||
54 | --group:: | ||
55 | Put the counters into a counter group. | ||
56 | |||
53 | -F <freq>:: | 57 | -F <freq>:: |
54 | --freq=<freq>:: | 58 | --freq=<freq>:: |
55 | Profile at this frequency. | 59 | Profile at this frequency. |
56 | 60 | ||
57 | -i:: | 61 | -i:: |
58 | --inherit:: | 62 | --inherit:: |
59 | Child tasks inherit counters, only makes sens with -p option. | 63 | Child tasks inherit counters, only makes sens with -p option. |
60 | 64 | ||
61 | -k <path>:: | 65 | -k <path>:: |
62 | --vmlinux=<path>:: | 66 | --vmlinux=<path>:: |
63 | Path to vmlinux. Required for annotation functionality. | 67 | Path to vmlinux. Required for annotation functionality. |
64 | 68 | ||
65 | -m <pages>:: | 69 | -m <pages>:: |
66 | --mmap-pages=<pages>:: | 70 | --mmap-pages=<pages>:: |
67 | Number of mmapped data pages. | 71 | Number of mmapped data pages. |
68 | 72 | ||
69 | -p <pid>:: | 73 | -p <pid>:: |
70 | --pid=<pid>:: | 74 | --pid=<pid>:: |
71 | Profile events on existing pid. | 75 | Profile events on existing Process ID. |
72 | 76 | ||
77 | -t <tid>:: | ||
78 | --tid=<tid>:: | ||
79 | Profile events on existing thread ID. | ||
80 | |||
73 | -r <priority>:: | 81 | -r <priority>:: |
74 | --realtime=<priority>:: | 82 | --realtime=<priority>:: |
75 | Collect data with this RT SCHED_FIFO priority. | 83 | Collect data with this RT SCHED_FIFO priority. |
76 | 84 | ||
77 | -s <symbol>:: | 85 | -s <symbol>:: |
78 | --sym-annotate=<symbol>:: | 86 | --sym-annotate=<symbol>:: |
79 | Annotate this symbol. | 87 | Annotate this symbol. |
88 | |||
89 | -K:: | ||
90 | --hide_kernel_symbols:: | ||
91 | Hide kernel symbols. | ||
92 | |||
93 | -U:: | ||
94 | --hide_user_symbols:: | ||
95 | Hide user symbols. | ||
96 | |||
97 | -D:: | ||
98 | --dump-symtab:: | ||
99 | Dump the symbol table used for profiling. | ||
80 | 100 | ||
81 | -v:: | 101 | -v:: |
82 | --verbose:: | 102 | --verbose:: |
83 | Be more verbose (show counter open errors, etc). | 103 | Be more verbose (show counter open errors, etc). |
84 | 104 | ||
85 | -z:: | 105 | -z:: |
86 | --zero:: | 106 | --zero:: |
87 | Zero history across display updates. | 107 | Zero history across display updates. |
88 | 108 | ||
89 | INTERACTIVE PROMPTING KEYS | 109 | INTERACTIVE PROMPTING KEYS |
90 | -------------------------- | 110 | -------------------------- |
91 | 111 | ||
92 | [d]:: | 112 | [d]:: |
93 | Display refresh delay. | 113 | Display refresh delay. |
94 | 114 | ||
95 | [e]:: | 115 | [e]:: |
96 | Number of entries to display. | 116 | Number of entries to display. |
97 | 117 | ||
98 | [E]:: | 118 | [E]:: |
99 | Event to display when multiple counters are active. | 119 | Event to display when multiple counters are active. |
100 | 120 | ||
101 | [f]:: | 121 | [f]:: |
102 | Profile display filter (>= hit count). | 122 | Profile display filter (>= hit count). |
103 | 123 | ||
104 | [F]:: | 124 | [F]:: |
105 | Annotation display filter (>= % of total). | 125 | Annotation display filter (>= % of total). |
106 | 126 | ||
107 | [s]:: | 127 | [s]:: |
108 | Annotate symbol. | 128 | Annotate symbol. |
109 | 129 | ||
110 | [S]:: | 130 | [S]:: |
111 | Stop annotation, return to full profile display. | 131 | Stop annotation, return to full profile display. |
112 | 132 | ||
113 | [w]:: | 133 | [w]:: |
114 | Toggle between weighted sum and individual count[E]r profile. | 134 | Toggle between weighted sum and individual count[E]r profile. |
115 | 135 | ||
116 | [z]:: | 136 | [z]:: |
117 | Toggle event count zeroing across display updates. | 137 | Toggle event count zeroing across display updates. |
118 | 138 | ||
119 | [qQ]:: | 139 | [qQ]:: |
120 | Quit. | 140 | Quit. |
121 | 141 | ||
122 | Pressing any unmapped key displays a menu, and prompts for input. | 142 | Pressing any unmapped key displays a menu, and prompts for input. |
123 | 143 | ||
124 | 144 | ||
125 | SEE ALSO | 145 | SEE ALSO |
126 | -------- | 146 | -------- |
127 | linkperf:perf-stat[1], linkperf:perf-list[1] | 147 | linkperf:perf-stat[1], linkperf:perf-list[1] |
128 | 148 |
tools/perf/builtin-diff.c
1 | /* | 1 | /* |
2 | * builtin-diff.c | 2 | * builtin-diff.c |
3 | * | 3 | * |
4 | * Builtin diff command: Analyze two perf.data input files, look up and read | 4 | * Builtin diff command: Analyze two perf.data input files, look up and read |
5 | * DSOs and symbol information, sort them and produce a diff. | 5 | * DSOs and symbol information, sort them and produce a diff. |
6 | */ | 6 | */ |
7 | #include "builtin.h" | 7 | #include "builtin.h" |
8 | 8 | ||
9 | #include "util/debug.h" | 9 | #include "util/debug.h" |
10 | #include "util/event.h" | 10 | #include "util/event.h" |
11 | #include "util/hist.h" | 11 | #include "util/hist.h" |
12 | #include "util/session.h" | 12 | #include "util/session.h" |
13 | #include "util/sort.h" | 13 | #include "util/sort.h" |
14 | #include "util/symbol.h" | 14 | #include "util/symbol.h" |
15 | #include "util/util.h" | 15 | #include "util/util.h" |
16 | 16 | ||
17 | #include <stdlib.h> | 17 | #include <stdlib.h> |
18 | 18 | ||
19 | static char const *input_old = "perf.data.old", | 19 | static char const *input_old = "perf.data.old", |
20 | *input_new = "perf.data"; | 20 | *input_new = "perf.data"; |
21 | static char diff__default_sort_order[] = "dso,symbol"; | 21 | static char diff__default_sort_order[] = "dso,symbol"; |
22 | static bool force; | 22 | static bool force; |
23 | static bool show_displacement; | 23 | static bool show_displacement; |
24 | 24 | ||
25 | static int hists__add_entry(struct hists *self, | 25 | static int hists__add_entry(struct hists *self, |
26 | struct addr_location *al, u64 period) | 26 | struct addr_location *al, u64 period) |
27 | { | 27 | { |
28 | if (__hists__add_entry(self, al, NULL, period) != NULL) | 28 | if (__hists__add_entry(self, al, NULL, period) != NULL) |
29 | return 0; | 29 | return 0; |
30 | return -ENOMEM; | 30 | return -ENOMEM; |
31 | } | 31 | } |
32 | 32 | ||
33 | static int diff__process_sample_event(event_t *event, struct perf_session *session) | 33 | static int diff__process_sample_event(event_t *event, struct perf_session *session) |
34 | { | 34 | { |
35 | struct addr_location al; | 35 | struct addr_location al; |
36 | struct sample_data data = { .period = 1, }; | 36 | struct sample_data data = { .period = 1, }; |
37 | 37 | ||
38 | if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { | 38 | if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { |
39 | pr_warning("problem processing %d event, skipping it.\n", | 39 | pr_warning("problem processing %d event, skipping it.\n", |
40 | event->header.type); | 40 | event->header.type); |
41 | return -1; | 41 | return -1; |
42 | } | 42 | } |
43 | 43 | ||
44 | if (al.filtered || al.sym == NULL) | 44 | if (al.filtered || al.sym == NULL) |
45 | return 0; | 45 | return 0; |
46 | 46 | ||
47 | if (hists__add_entry(&session->hists, &al, data.period)) { | 47 | if (hists__add_entry(&session->hists, &al, data.period)) { |
48 | pr_warning("problem incrementing symbol period, skipping event\n"); | 48 | pr_warning("problem incrementing symbol period, skipping event\n"); |
49 | return -1; | 49 | return -1; |
50 | } | 50 | } |
51 | 51 | ||
52 | session->hists.stats.total_period += data.period; | 52 | session->hists.stats.total_period += data.period; |
53 | return 0; | 53 | return 0; |
54 | } | 54 | } |
55 | 55 | ||
56 | static struct perf_event_ops event_ops = { | 56 | static struct perf_event_ops event_ops = { |
57 | .sample = diff__process_sample_event, | 57 | .sample = diff__process_sample_event, |
58 | .mmap = event__process_mmap, | 58 | .mmap = event__process_mmap, |
59 | .comm = event__process_comm, | 59 | .comm = event__process_comm, |
60 | .exit = event__process_task, | 60 | .exit = event__process_task, |
61 | .fork = event__process_task, | 61 | .fork = event__process_task, |
62 | .lost = event__process_lost, | 62 | .lost = event__process_lost, |
63 | }; | 63 | }; |
64 | 64 | ||
65 | static void perf_session__insert_hist_entry_by_name(struct rb_root *root, | 65 | static void perf_session__insert_hist_entry_by_name(struct rb_root *root, |
66 | struct hist_entry *he) | 66 | struct hist_entry *he) |
67 | { | 67 | { |
68 | struct rb_node **p = &root->rb_node; | 68 | struct rb_node **p = &root->rb_node; |
69 | struct rb_node *parent = NULL; | 69 | struct rb_node *parent = NULL; |
70 | struct hist_entry *iter; | 70 | struct hist_entry *iter; |
71 | 71 | ||
72 | while (*p != NULL) { | 72 | while (*p != NULL) { |
73 | parent = *p; | 73 | parent = *p; |
74 | iter = rb_entry(parent, struct hist_entry, rb_node); | 74 | iter = rb_entry(parent, struct hist_entry, rb_node); |
75 | if (hist_entry__cmp(he, iter) < 0) | 75 | if (hist_entry__cmp(he, iter) < 0) |
76 | p = &(*p)->rb_left; | 76 | p = &(*p)->rb_left; |
77 | else | 77 | else |
78 | p = &(*p)->rb_right; | 78 | p = &(*p)->rb_right; |
79 | } | 79 | } |
80 | 80 | ||
81 | rb_link_node(&he->rb_node, parent, p); | 81 | rb_link_node(&he->rb_node, parent, p); |
82 | rb_insert_color(&he->rb_node, root); | 82 | rb_insert_color(&he->rb_node, root); |
83 | } | 83 | } |
84 | 84 | ||
85 | static void hists__resort_entries(struct hists *self) | 85 | static void hists__resort_entries(struct hists *self) |
86 | { | 86 | { |
87 | unsigned long position = 1; | 87 | unsigned long position = 1; |
88 | struct rb_root tmp = RB_ROOT; | 88 | struct rb_root tmp = RB_ROOT; |
89 | struct rb_node *next = rb_first(&self->entries); | 89 | struct rb_node *next = rb_first(&self->entries); |
90 | 90 | ||
91 | while (next != NULL) { | 91 | while (next != NULL) { |
92 | struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node); | 92 | struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node); |
93 | 93 | ||
94 | next = rb_next(&n->rb_node); | 94 | next = rb_next(&n->rb_node); |
95 | rb_erase(&n->rb_node, &self->entries); | 95 | rb_erase(&n->rb_node, &self->entries); |
96 | n->position = position++; | 96 | n->position = position++; |
97 | perf_session__insert_hist_entry_by_name(&tmp, n); | 97 | perf_session__insert_hist_entry_by_name(&tmp, n); |
98 | } | 98 | } |
99 | 99 | ||
100 | self->entries = tmp; | 100 | self->entries = tmp; |
101 | } | 101 | } |
102 | 102 | ||
103 | static void hists__set_positions(struct hists *self) | 103 | static void hists__set_positions(struct hists *self) |
104 | { | 104 | { |
105 | hists__output_resort(self); | 105 | hists__output_resort(self); |
106 | hists__resort_entries(self); | 106 | hists__resort_entries(self); |
107 | } | 107 | } |
108 | 108 | ||
109 | static struct hist_entry *hists__find_entry(struct hists *self, | 109 | static struct hist_entry *hists__find_entry(struct hists *self, |
110 | struct hist_entry *he) | 110 | struct hist_entry *he) |
111 | { | 111 | { |
112 | struct rb_node *n = self->entries.rb_node; | 112 | struct rb_node *n = self->entries.rb_node; |
113 | 113 | ||
114 | while (n) { | 114 | while (n) { |
115 | struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node); | 115 | struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node); |
116 | int64_t cmp = hist_entry__cmp(he, iter); | 116 | int64_t cmp = hist_entry__cmp(he, iter); |
117 | 117 | ||
118 | if (cmp < 0) | 118 | if (cmp < 0) |
119 | n = n->rb_left; | 119 | n = n->rb_left; |
120 | else if (cmp > 0) | 120 | else if (cmp > 0) |
121 | n = n->rb_right; | 121 | n = n->rb_right; |
122 | else | 122 | else |
123 | return iter; | 123 | return iter; |
124 | } | 124 | } |
125 | 125 | ||
126 | return NULL; | 126 | return NULL; |
127 | } | 127 | } |
128 | 128 | ||
129 | static void hists__match(struct hists *older, struct hists *newer) | 129 | static void hists__match(struct hists *older, struct hists *newer) |
130 | { | 130 | { |
131 | struct rb_node *nd; | 131 | struct rb_node *nd; |
132 | 132 | ||
133 | for (nd = rb_first(&newer->entries); nd; nd = rb_next(nd)) { | 133 | for (nd = rb_first(&newer->entries); nd; nd = rb_next(nd)) { |
134 | struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node); | 134 | struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node); |
135 | pos->pair = hists__find_entry(older, pos); | 135 | pos->pair = hists__find_entry(older, pos); |
136 | } | 136 | } |
137 | } | 137 | } |
138 | 138 | ||
139 | static int __cmd_diff(void) | 139 | static int __cmd_diff(void) |
140 | { | 140 | { |
141 | int ret, i; | 141 | int ret, i; |
142 | struct perf_session *session[2]; | 142 | struct perf_session *session[2]; |
143 | 143 | ||
144 | session[0] = perf_session__new(input_old, O_RDONLY, force, false); | 144 | session[0] = perf_session__new(input_old, O_RDONLY, force, false); |
145 | session[1] = perf_session__new(input_new, O_RDONLY, force, false); | 145 | session[1] = perf_session__new(input_new, O_RDONLY, force, false); |
146 | if (session[0] == NULL || session[1] == NULL) | 146 | if (session[0] == NULL || session[1] == NULL) |
147 | return -ENOMEM; | 147 | return -ENOMEM; |
148 | 148 | ||
149 | for (i = 0; i < 2; ++i) { | 149 | for (i = 0; i < 2; ++i) { |
150 | ret = perf_session__process_events(session[i], &event_ops); | 150 | ret = perf_session__process_events(session[i], &event_ops); |
151 | if (ret) | 151 | if (ret) |
152 | goto out_delete; | 152 | goto out_delete; |
153 | } | 153 | } |
154 | 154 | ||
155 | hists__output_resort(&session[1]->hists); | 155 | hists__output_resort(&session[1]->hists); |
156 | if (show_displacement) | 156 | if (show_displacement) |
157 | hists__set_positions(&session[0]->hists); | 157 | hists__set_positions(&session[0]->hists); |
158 | 158 | ||
159 | hists__match(&session[0]->hists, &session[1]->hists); | 159 | hists__match(&session[0]->hists, &session[1]->hists); |
160 | hists__fprintf(&session[1]->hists, &session[0]->hists, | 160 | hists__fprintf(&session[1]->hists, &session[0]->hists, |
161 | show_displacement, stdout); | 161 | show_displacement, stdout); |
162 | out_delete: | 162 | out_delete: |
163 | for (i = 0; i < 2; ++i) | 163 | for (i = 0; i < 2; ++i) |
164 | perf_session__delete(session[i]); | 164 | perf_session__delete(session[i]); |
165 | return ret; | 165 | return ret; |
166 | } | 166 | } |
167 | 167 | ||
168 | static const char * const diff_usage[] = { | 168 | static const char * const diff_usage[] = { |
169 | "perf diff [<options>] [old_file] [new_file]", | 169 | "perf diff [<options>] [old_file] [new_file]", |
170 | NULL, | 170 | NULL, |
171 | }; | 171 | }; |
172 | 172 | ||
173 | static const struct option options[] = { | 173 | static const struct option options[] = { |
174 | OPT_INCR('v', "verbose", &verbose, | 174 | OPT_INCR('v', "verbose", &verbose, |
175 | "be more verbose (show symbol address, etc)"), | 175 | "be more verbose (show symbol address, etc)"), |
176 | OPT_BOOLEAN('m', "displacement", &show_displacement, | 176 | OPT_BOOLEAN('M', "displacement", &show_displacement, |
177 | "Show position displacement relative to baseline"), | 177 | "Show position displacement relative to baseline"), |
178 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 178 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
179 | "dump raw trace in ASCII"), | 179 | "dump raw trace in ASCII"), |
180 | OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), | 180 | OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), |
181 | OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, | 181 | OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, |
182 | "load module symbols - WARNING: use only with -k and LIVE kernel"), | 182 | "load module symbols - WARNING: use only with -k and LIVE kernel"), |
183 | OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", | 183 | OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", |
184 | "only consider symbols in these dsos"), | 184 | "only consider symbols in these dsos"), |
185 | OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", | 185 | OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", |
186 | "only consider symbols in these comms"), | 186 | "only consider symbols in these comms"), |
187 | OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", | 187 | OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", |
188 | "only consider these symbols"), | 188 | "only consider these symbols"), |
189 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 189 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
190 | "sort by key(s): pid, comm, dso, symbol, parent"), | 190 | "sort by key(s): pid, comm, dso, symbol, parent"), |
191 | OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", | 191 | OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", |
192 | "separator for columns, no spaces will be added between " | 192 | "separator for columns, no spaces will be added between " |
193 | "columns '.' is reserved."), | 193 | "columns '.' is reserved."), |
194 | OPT_END() | 194 | OPT_END() |
195 | }; | 195 | }; |
196 | 196 | ||
197 | int cmd_diff(int argc, const char **argv, const char *prefix __used) | 197 | int cmd_diff(int argc, const char **argv, const char *prefix __used) |
198 | { | 198 | { |
199 | sort_order = diff__default_sort_order; | 199 | sort_order = diff__default_sort_order; |
200 | argc = parse_options(argc, argv, options, diff_usage, 0); | 200 | argc = parse_options(argc, argv, options, diff_usage, 0); |
201 | if (argc) { | 201 | if (argc) { |
202 | if (argc > 2) | 202 | if (argc > 2) |
203 | usage_with_options(diff_usage, options); | 203 | usage_with_options(diff_usage, options); |
204 | if (argc == 2) { | 204 | if (argc == 2) { |
205 | input_old = argv[0]; | 205 | input_old = argv[0]; |
206 | input_new = argv[1]; | 206 | input_new = argv[1]; |
207 | } else | 207 | } else |
208 | input_new = argv[0]; | 208 | input_new = argv[0]; |
209 | } else if (symbol_conf.default_guest_vmlinux_name || | 209 | } else if (symbol_conf.default_guest_vmlinux_name || |
210 | symbol_conf.default_guest_kallsyms) { | 210 | symbol_conf.default_guest_kallsyms) { |
211 | input_old = "perf.data.host"; | 211 | input_old = "perf.data.host"; |
212 | input_new = "perf.data.guest"; | 212 | input_new = "perf.data.guest"; |
213 | } | 213 | } |
214 | 214 | ||
215 | symbol_conf.exclude_other = false; | 215 | symbol_conf.exclude_other = false; |
216 | if (symbol__init() < 0) | 216 | if (symbol__init() < 0) |
217 | return -1; | 217 | return -1; |
218 | 218 | ||
219 | setup_sorting(diff_usage, options); | 219 | setup_sorting(diff_usage, options); |
220 | setup_pager(); | 220 | setup_pager(); |
221 | 221 | ||
222 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL); | 222 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL); |
223 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", NULL); | 223 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", NULL); |
224 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", NULL); | 224 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", NULL); |
225 | 225 | ||
226 | return __cmd_diff(); | 226 | return __cmd_diff(); |
227 | } | 227 | } |
228 | 228 |
tools/perf/builtin-stat.c
1 | /* | 1 | /* |
2 | * builtin-stat.c | 2 | * builtin-stat.c |
3 | * | 3 | * |
4 | * Builtin stat command: Give a precise performance counters summary | 4 | * Builtin stat command: Give a precise performance counters summary |
5 | * overview about any workload, CPU or specific PID. | 5 | * overview about any workload, CPU or specific PID. |
6 | * | 6 | * |
7 | * Sample output: | 7 | * Sample output: |
8 | 8 | ||
9 | $ perf stat ~/hackbench 10 | 9 | $ perf stat ~/hackbench 10 |
10 | Time: 0.104 | 10 | Time: 0.104 |
11 | 11 | ||
12 | Performance counter stats for '/home/mingo/hackbench': | 12 | Performance counter stats for '/home/mingo/hackbench': |
13 | 13 | ||
14 | 1255.538611 task clock ticks # 10.143 CPU utilization factor | 14 | 1255.538611 task clock ticks # 10.143 CPU utilization factor |
15 | 54011 context switches # 0.043 M/sec | 15 | 54011 context switches # 0.043 M/sec |
16 | 385 CPU migrations # 0.000 M/sec | 16 | 385 CPU migrations # 0.000 M/sec |
17 | 17755 pagefaults # 0.014 M/sec | 17 | 17755 pagefaults # 0.014 M/sec |
18 | 3808323185 CPU cycles # 3033.219 M/sec | 18 | 3808323185 CPU cycles # 3033.219 M/sec |
19 | 1575111190 instructions # 1254.530 M/sec | 19 | 1575111190 instructions # 1254.530 M/sec |
20 | 17367895 cache references # 13.833 M/sec | 20 | 17367895 cache references # 13.833 M/sec |
21 | 7674421 cache misses # 6.112 M/sec | 21 | 7674421 cache misses # 6.112 M/sec |
22 | 22 | ||
23 | Wall-clock time elapsed: 123.786620 msecs | 23 | Wall-clock time elapsed: 123.786620 msecs |
24 | 24 | ||
25 | * | 25 | * |
26 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | 26 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> |
27 | * | 27 | * |
28 | * Improvements and fixes by: | 28 | * Improvements and fixes by: |
29 | * | 29 | * |
30 | * Arjan van de Ven <arjan@linux.intel.com> | 30 | * Arjan van de Ven <arjan@linux.intel.com> |
31 | * Yanmin Zhang <yanmin.zhang@intel.com> | 31 | * Yanmin Zhang <yanmin.zhang@intel.com> |
32 | * Wu Fengguang <fengguang.wu@intel.com> | 32 | * Wu Fengguang <fengguang.wu@intel.com> |
33 | * Mike Galbraith <efault@gmx.de> | 33 | * Mike Galbraith <efault@gmx.de> |
34 | * Paul Mackerras <paulus@samba.org> | 34 | * Paul Mackerras <paulus@samba.org> |
35 | * Jaswinder Singh Rajput <jaswinder@kernel.org> | 35 | * Jaswinder Singh Rajput <jaswinder@kernel.org> |
36 | * | 36 | * |
37 | * Released under the GPL v2. (and only v2, not any later version) | 37 | * Released under the GPL v2. (and only v2, not any later version) |
38 | */ | 38 | */ |
39 | 39 | ||
40 | #include "perf.h" | 40 | #include "perf.h" |
41 | #include "builtin.h" | 41 | #include "builtin.h" |
42 | #include "util/util.h" | 42 | #include "util/util.h" |
43 | #include "util/parse-options.h" | 43 | #include "util/parse-options.h" |
44 | #include "util/parse-events.h" | 44 | #include "util/parse-events.h" |
45 | #include "util/event.h" | 45 | #include "util/event.h" |
46 | #include "util/debug.h" | 46 | #include "util/debug.h" |
47 | #include "util/header.h" | 47 | #include "util/header.h" |
48 | #include "util/cpumap.h" | 48 | #include "util/cpumap.h" |
49 | #include "util/thread.h" | 49 | #include "util/thread.h" |
50 | 50 | ||
51 | #include <sys/prctl.h> | 51 | #include <sys/prctl.h> |
52 | #include <math.h> | 52 | #include <math.h> |
53 | #include <locale.h> | 53 | #include <locale.h> |
54 | 54 | ||
55 | #define DEFAULT_SEPARATOR " " | ||
56 | |||
55 | static struct perf_event_attr default_attrs[] = { | 57 | static struct perf_event_attr default_attrs[] = { |
56 | 58 | ||
57 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | 59 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
58 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, | 60 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, |
59 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, | 61 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, |
60 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | 62 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, |
61 | 63 | ||
62 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | 64 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, |
63 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | 65 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, |
64 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | 66 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
65 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, | 67 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, |
66 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, | 68 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, |
67 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, | 69 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, |
68 | 70 | ||
69 | }; | 71 | }; |
70 | 72 | ||
71 | static bool system_wide = false; | 73 | static bool system_wide = false; |
72 | static int nr_cpus = 0; | 74 | static int nr_cpus = 0; |
73 | static int run_idx = 0; | 75 | static int run_idx = 0; |
74 | 76 | ||
75 | static int run_count = 1; | 77 | static int run_count = 1; |
76 | static bool no_inherit = false; | 78 | static bool no_inherit = false; |
77 | static bool scale = true; | 79 | static bool scale = true; |
78 | static bool no_aggr = false; | 80 | static bool no_aggr = false; |
79 | static pid_t target_pid = -1; | 81 | static pid_t target_pid = -1; |
80 | static pid_t target_tid = -1; | 82 | static pid_t target_tid = -1; |
81 | static pid_t *all_tids = NULL; | 83 | static pid_t *all_tids = NULL; |
82 | static int thread_num = 0; | 84 | static int thread_num = 0; |
83 | static pid_t child_pid = -1; | 85 | static pid_t child_pid = -1; |
84 | static bool null_run = false; | 86 | static bool null_run = false; |
85 | static bool big_num = false; | 87 | static bool big_num = true; |
88 | static int big_num_opt = -1; | ||
86 | static const char *cpu_list; | 89 | static const char *cpu_list; |
90 | static const char *csv_sep = NULL; | ||
91 | static bool csv_output = false; | ||
87 | 92 | ||
88 | 93 | ||
89 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | 94 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; |
90 | 95 | ||
91 | static int event_scaled[MAX_COUNTERS]; | 96 | static int event_scaled[MAX_COUNTERS]; |
92 | 97 | ||
93 | static struct { | 98 | static struct { |
94 | u64 val; | 99 | u64 val; |
95 | u64 ena; | 100 | u64 ena; |
96 | u64 run; | 101 | u64 run; |
97 | } cpu_counts[MAX_NR_CPUS][MAX_COUNTERS]; | 102 | } cpu_counts[MAX_NR_CPUS][MAX_COUNTERS]; |
98 | 103 | ||
99 | static volatile int done = 0; | 104 | static volatile int done = 0; |
100 | 105 | ||
101 | struct stats | 106 | struct stats |
102 | { | 107 | { |
103 | double n, mean, M2; | 108 | double n, mean, M2; |
104 | }; | 109 | }; |
105 | 110 | ||
106 | static void update_stats(struct stats *stats, u64 val) | 111 | static void update_stats(struct stats *stats, u64 val) |
107 | { | 112 | { |
108 | double delta; | 113 | double delta; |
109 | 114 | ||
110 | stats->n++; | 115 | stats->n++; |
111 | delta = val - stats->mean; | 116 | delta = val - stats->mean; |
112 | stats->mean += delta / stats->n; | 117 | stats->mean += delta / stats->n; |
113 | stats->M2 += delta*(val - stats->mean); | 118 | stats->M2 += delta*(val - stats->mean); |
114 | } | 119 | } |
115 | 120 | ||
116 | static double avg_stats(struct stats *stats) | 121 | static double avg_stats(struct stats *stats) |
117 | { | 122 | { |
118 | return stats->mean; | 123 | return stats->mean; |
119 | } | 124 | } |
120 | 125 | ||
121 | /* | 126 | /* |
122 | * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance | 127 | * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance |
123 | * | 128 | * |
124 | * (\Sum n_i^2) - ((\Sum n_i)^2)/n | 129 | * (\Sum n_i^2) - ((\Sum n_i)^2)/n |
125 | * s^2 = ------------------------------- | 130 | * s^2 = ------------------------------- |
126 | * n - 1 | 131 | * n - 1 |
127 | * | 132 | * |
128 | * http://en.wikipedia.org/wiki/Stddev | 133 | * http://en.wikipedia.org/wiki/Stddev |
129 | * | 134 | * |
130 | * The std dev of the mean is related to the std dev by: | 135 | * The std dev of the mean is related to the std dev by: |
131 | * | 136 | * |
132 | * s | 137 | * s |
133 | * s_mean = ------- | 138 | * s_mean = ------- |
134 | * sqrt(n) | 139 | * sqrt(n) |
135 | * | 140 | * |
136 | */ | 141 | */ |
137 | static double stddev_stats(struct stats *stats) | 142 | static double stddev_stats(struct stats *stats) |
138 | { | 143 | { |
139 | double variance = stats->M2 / (stats->n - 1); | 144 | double variance = stats->M2 / (stats->n - 1); |
140 | double variance_mean = variance / stats->n; | 145 | double variance_mean = variance / stats->n; |
141 | 146 | ||
142 | return sqrt(variance_mean); | 147 | return sqrt(variance_mean); |
143 | } | 148 | } |
144 | 149 | ||
145 | struct stats event_res_stats[MAX_COUNTERS][3]; | 150 | struct stats event_res_stats[MAX_COUNTERS][3]; |
146 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | 151 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
147 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; | 152 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; |
148 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | 153 | struct stats runtime_branches_stats[MAX_NR_CPUS]; |
149 | struct stats walltime_nsecs_stats; | 154 | struct stats walltime_nsecs_stats; |
150 | 155 | ||
151 | #define MATCH_EVENT(t, c, counter) \ | 156 | #define MATCH_EVENT(t, c, counter) \ |
152 | (attrs[counter].type == PERF_TYPE_##t && \ | 157 | (attrs[counter].type == PERF_TYPE_##t && \ |
153 | attrs[counter].config == PERF_COUNT_##c) | 158 | attrs[counter].config == PERF_COUNT_##c) |
154 | 159 | ||
155 | #define ERR_PERF_OPEN \ | 160 | #define ERR_PERF_OPEN \ |
156 | "counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information." | 161 | "counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information." |
157 | 162 | ||
158 | static int create_perf_stat_counter(int counter, bool *perm_err) | 163 | static int create_perf_stat_counter(int counter, bool *perm_err) |
159 | { | 164 | { |
160 | struct perf_event_attr *attr = attrs + counter; | 165 | struct perf_event_attr *attr = attrs + counter; |
161 | int thread; | 166 | int thread; |
162 | int ncreated = 0; | 167 | int ncreated = 0; |
163 | 168 | ||
164 | if (scale) | 169 | if (scale) |
165 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 170 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
166 | PERF_FORMAT_TOTAL_TIME_RUNNING; | 171 | PERF_FORMAT_TOTAL_TIME_RUNNING; |
167 | 172 | ||
168 | if (system_wide) { | 173 | if (system_wide) { |
169 | int cpu; | 174 | int cpu; |
170 | 175 | ||
171 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 176 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
172 | fd[cpu][counter][0] = sys_perf_event_open(attr, | 177 | fd[cpu][counter][0] = sys_perf_event_open(attr, |
173 | -1, cpumap[cpu], -1, 0); | 178 | -1, cpumap[cpu], -1, 0); |
174 | if (fd[cpu][counter][0] < 0) { | 179 | if (fd[cpu][counter][0] < 0) { |
175 | if (errno == EPERM || errno == EACCES) | 180 | if (errno == EPERM || errno == EACCES) |
176 | *perm_err = true; | 181 | *perm_err = true; |
177 | error(ERR_PERF_OPEN, counter, | 182 | error(ERR_PERF_OPEN, counter, |
178 | fd[cpu][counter][0], strerror(errno)); | 183 | fd[cpu][counter][0], strerror(errno)); |
179 | } else { | 184 | } else { |
180 | ++ncreated; | 185 | ++ncreated; |
181 | } | 186 | } |
182 | } | 187 | } |
183 | } else { | 188 | } else { |
184 | attr->inherit = !no_inherit; | 189 | attr->inherit = !no_inherit; |
185 | if (target_pid == -1 && target_tid == -1) { | 190 | if (target_pid == -1 && target_tid == -1) { |
186 | attr->disabled = 1; | 191 | attr->disabled = 1; |
187 | attr->enable_on_exec = 1; | 192 | attr->enable_on_exec = 1; |
188 | } | 193 | } |
189 | for (thread = 0; thread < thread_num; thread++) { | 194 | for (thread = 0; thread < thread_num; thread++) { |
190 | fd[0][counter][thread] = sys_perf_event_open(attr, | 195 | fd[0][counter][thread] = sys_perf_event_open(attr, |
191 | all_tids[thread], -1, -1, 0); | 196 | all_tids[thread], -1, -1, 0); |
192 | if (fd[0][counter][thread] < 0) { | 197 | if (fd[0][counter][thread] < 0) { |
193 | if (errno == EPERM || errno == EACCES) | 198 | if (errno == EPERM || errno == EACCES) |
194 | *perm_err = true; | 199 | *perm_err = true; |
195 | error(ERR_PERF_OPEN, counter, | 200 | error(ERR_PERF_OPEN, counter, |
196 | fd[0][counter][thread], | 201 | fd[0][counter][thread], |
197 | strerror(errno)); | 202 | strerror(errno)); |
198 | } else { | 203 | } else { |
199 | ++ncreated; | 204 | ++ncreated; |
200 | } | 205 | } |
201 | } | 206 | } |
202 | } | 207 | } |
203 | 208 | ||
204 | return ncreated; | 209 | return ncreated; |
205 | } | 210 | } |
206 | 211 | ||
207 | /* | 212 | /* |
208 | * Does the counter have nsecs as a unit? | 213 | * Does the counter have nsecs as a unit? |
209 | */ | 214 | */ |
210 | static inline int nsec_counter(int counter) | 215 | static inline int nsec_counter(int counter) |
211 | { | 216 | { |
212 | if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || | 217 | if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || |
213 | MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 218 | MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) |
214 | return 1; | 219 | return 1; |
215 | 220 | ||
216 | return 0; | 221 | return 0; |
217 | } | 222 | } |
218 | 223 | ||
219 | /* | 224 | /* |
220 | * Read out the results of a single counter: | 225 | * Read out the results of a single counter: |
221 | * aggregate counts across CPUs in system-wide mode | 226 | * aggregate counts across CPUs in system-wide mode |
222 | */ | 227 | */ |
223 | static void read_counter_aggr(int counter) | 228 | static void read_counter_aggr(int counter) |
224 | { | 229 | { |
225 | u64 count[3], single_count[3]; | 230 | u64 count[3], single_count[3]; |
226 | int cpu; | 231 | int cpu; |
227 | size_t res, nv; | 232 | size_t res, nv; |
228 | int scaled; | 233 | int scaled; |
229 | int i, thread; | 234 | int i, thread; |
230 | 235 | ||
231 | count[0] = count[1] = count[2] = 0; | 236 | count[0] = count[1] = count[2] = 0; |
232 | 237 | ||
233 | nv = scale ? 3 : 1; | 238 | nv = scale ? 3 : 1; |
234 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 239 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
235 | for (thread = 0; thread < thread_num; thread++) { | 240 | for (thread = 0; thread < thread_num; thread++) { |
236 | if (fd[cpu][counter][thread] < 0) | 241 | if (fd[cpu][counter][thread] < 0) |
237 | continue; | 242 | continue; |
238 | 243 | ||
239 | res = read(fd[cpu][counter][thread], | 244 | res = read(fd[cpu][counter][thread], |
240 | single_count, nv * sizeof(u64)); | 245 | single_count, nv * sizeof(u64)); |
241 | assert(res == nv * sizeof(u64)); | 246 | assert(res == nv * sizeof(u64)); |
242 | 247 | ||
243 | close(fd[cpu][counter][thread]); | 248 | close(fd[cpu][counter][thread]); |
244 | fd[cpu][counter][thread] = -1; | 249 | fd[cpu][counter][thread] = -1; |
245 | 250 | ||
246 | count[0] += single_count[0]; | 251 | count[0] += single_count[0]; |
247 | if (scale) { | 252 | if (scale) { |
248 | count[1] += single_count[1]; | 253 | count[1] += single_count[1]; |
249 | count[2] += single_count[2]; | 254 | count[2] += single_count[2]; |
250 | } | 255 | } |
251 | } | 256 | } |
252 | } | 257 | } |
253 | 258 | ||
254 | scaled = 0; | 259 | scaled = 0; |
255 | if (scale) { | 260 | if (scale) { |
256 | if (count[2] == 0) { | 261 | if (count[2] == 0) { |
257 | event_scaled[counter] = -1; | 262 | event_scaled[counter] = -1; |
258 | count[0] = 0; | 263 | count[0] = 0; |
259 | return; | 264 | return; |
260 | } | 265 | } |
261 | 266 | ||
262 | if (count[2] < count[1]) { | 267 | if (count[2] < count[1]) { |
263 | event_scaled[counter] = 1; | 268 | event_scaled[counter] = 1; |
264 | count[0] = (unsigned long long) | 269 | count[0] = (unsigned long long) |
265 | ((double)count[0] * count[1] / count[2] + 0.5); | 270 | ((double)count[0] * count[1] / count[2] + 0.5); |
266 | } | 271 | } |
267 | } | 272 | } |
268 | 273 | ||
269 | for (i = 0; i < 3; i++) | 274 | for (i = 0; i < 3; i++) |
270 | update_stats(&event_res_stats[counter][i], count[i]); | 275 | update_stats(&event_res_stats[counter][i], count[i]); |
271 | 276 | ||
272 | if (verbose) { | 277 | if (verbose) { |
273 | fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), | 278 | fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), |
274 | count[0], count[1], count[2]); | 279 | count[0], count[1], count[2]); |
275 | } | 280 | } |
276 | 281 | ||
277 | /* | 282 | /* |
278 | * Save the full runtime - to allow normalization during printout: | 283 | * Save the full runtime - to allow normalization during printout: |
279 | */ | 284 | */ |
280 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 285 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) |
281 | update_stats(&runtime_nsecs_stats[0], count[0]); | 286 | update_stats(&runtime_nsecs_stats[0], count[0]); |
282 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | 287 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) |
283 | update_stats(&runtime_cycles_stats[0], count[0]); | 288 | update_stats(&runtime_cycles_stats[0], count[0]); |
284 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) | 289 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) |
285 | update_stats(&runtime_branches_stats[0], count[0]); | 290 | update_stats(&runtime_branches_stats[0], count[0]); |
286 | } | 291 | } |
287 | 292 | ||
288 | /* | 293 | /* |
289 | * Read out the results of a single counter: | 294 | * Read out the results of a single counter: |
290 | * do not aggregate counts across CPUs in system-wide mode | 295 | * do not aggregate counts across CPUs in system-wide mode |
291 | */ | 296 | */ |
292 | static void read_counter(int counter) | 297 | static void read_counter(int counter) |
293 | { | 298 | { |
294 | u64 count[3]; | 299 | u64 count[3]; |
295 | int cpu; | 300 | int cpu; |
296 | size_t res, nv; | 301 | size_t res, nv; |
297 | 302 | ||
298 | count[0] = count[1] = count[2] = 0; | 303 | count[0] = count[1] = count[2] = 0; |
299 | 304 | ||
300 | nv = scale ? 3 : 1; | 305 | nv = scale ? 3 : 1; |
301 | 306 | ||
302 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 307 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
303 | 308 | ||
304 | if (fd[cpu][counter][0] < 0) | 309 | if (fd[cpu][counter][0] < 0) |
305 | continue; | 310 | continue; |
306 | 311 | ||
307 | res = read(fd[cpu][counter][0], count, nv * sizeof(u64)); | 312 | res = read(fd[cpu][counter][0], count, nv * sizeof(u64)); |
308 | 313 | ||
309 | assert(res == nv * sizeof(u64)); | 314 | assert(res == nv * sizeof(u64)); |
310 | 315 | ||
311 | close(fd[cpu][counter][0]); | 316 | close(fd[cpu][counter][0]); |
312 | fd[cpu][counter][0] = -1; | 317 | fd[cpu][counter][0] = -1; |
313 | 318 | ||
314 | if (scale) { | 319 | if (scale) { |
315 | if (count[2] == 0) { | 320 | if (count[2] == 0) { |
316 | count[0] = 0; | 321 | count[0] = 0; |
317 | } else if (count[2] < count[1]) { | 322 | } else if (count[2] < count[1]) { |
318 | count[0] = (unsigned long long) | 323 | count[0] = (unsigned long long) |
319 | ((double)count[0] * count[1] / count[2] + 0.5); | 324 | ((double)count[0] * count[1] / count[2] + 0.5); |
320 | } | 325 | } |
321 | } | 326 | } |
322 | cpu_counts[cpu][counter].val = count[0]; /* scaled count */ | 327 | cpu_counts[cpu][counter].val = count[0]; /* scaled count */ |
323 | cpu_counts[cpu][counter].ena = count[1]; | 328 | cpu_counts[cpu][counter].ena = count[1]; |
324 | cpu_counts[cpu][counter].run = count[2]; | 329 | cpu_counts[cpu][counter].run = count[2]; |
325 | 330 | ||
326 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 331 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) |
327 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | 332 | update_stats(&runtime_nsecs_stats[cpu], count[0]); |
328 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | 333 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) |
329 | update_stats(&runtime_cycles_stats[cpu], count[0]); | 334 | update_stats(&runtime_cycles_stats[cpu], count[0]); |
330 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) | 335 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) |
331 | update_stats(&runtime_branches_stats[cpu], count[0]); | 336 | update_stats(&runtime_branches_stats[cpu], count[0]); |
332 | } | 337 | } |
333 | } | 338 | } |
334 | 339 | ||
335 | static int run_perf_stat(int argc __used, const char **argv) | 340 | static int run_perf_stat(int argc __used, const char **argv) |
336 | { | 341 | { |
337 | unsigned long long t0, t1; | 342 | unsigned long long t0, t1; |
338 | int status = 0; | 343 | int status = 0; |
339 | int counter, ncreated = 0; | 344 | int counter, ncreated = 0; |
340 | int child_ready_pipe[2], go_pipe[2]; | 345 | int child_ready_pipe[2], go_pipe[2]; |
341 | bool perm_err = false; | 346 | bool perm_err = false; |
342 | const bool forks = (argc > 0); | 347 | const bool forks = (argc > 0); |
343 | char buf; | 348 | char buf; |
344 | 349 | ||
345 | if (!system_wide) | 350 | if (!system_wide) |
346 | nr_cpus = 1; | 351 | nr_cpus = 1; |
347 | 352 | ||
348 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { | 353 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { |
349 | perror("failed to create pipes"); | 354 | perror("failed to create pipes"); |
350 | exit(1); | 355 | exit(1); |
351 | } | 356 | } |
352 | 357 | ||
353 | if (forks) { | 358 | if (forks) { |
354 | if ((child_pid = fork()) < 0) | 359 | if ((child_pid = fork()) < 0) |
355 | perror("failed to fork"); | 360 | perror("failed to fork"); |
356 | 361 | ||
357 | if (!child_pid) { | 362 | if (!child_pid) { |
358 | close(child_ready_pipe[0]); | 363 | close(child_ready_pipe[0]); |
359 | close(go_pipe[1]); | 364 | close(go_pipe[1]); |
360 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); | 365 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); |
361 | 366 | ||
362 | /* | 367 | /* |
363 | * Do a dummy execvp to get the PLT entry resolved, | 368 | * Do a dummy execvp to get the PLT entry resolved, |
364 | * so we avoid the resolver overhead on the real | 369 | * so we avoid the resolver overhead on the real |
365 | * execvp call. | 370 | * execvp call. |
366 | */ | 371 | */ |
367 | execvp("", (char **)argv); | 372 | execvp("", (char **)argv); |
368 | 373 | ||
369 | /* | 374 | /* |
370 | * Tell the parent we're ready to go | 375 | * Tell the parent we're ready to go |
371 | */ | 376 | */ |
372 | close(child_ready_pipe[1]); | 377 | close(child_ready_pipe[1]); |
373 | 378 | ||
374 | /* | 379 | /* |
375 | * Wait until the parent tells us to go. | 380 | * Wait until the parent tells us to go. |
376 | */ | 381 | */ |
377 | if (read(go_pipe[0], &buf, 1) == -1) | 382 | if (read(go_pipe[0], &buf, 1) == -1) |
378 | perror("unable to read pipe"); | 383 | perror("unable to read pipe"); |
379 | 384 | ||
380 | execvp(argv[0], (char **)argv); | 385 | execvp(argv[0], (char **)argv); |
381 | 386 | ||
382 | perror(argv[0]); | 387 | perror(argv[0]); |
383 | exit(-1); | 388 | exit(-1); |
384 | } | 389 | } |
385 | 390 | ||
386 | if (target_tid == -1 && target_pid == -1 && !system_wide) | 391 | if (target_tid == -1 && target_pid == -1 && !system_wide) |
387 | all_tids[0] = child_pid; | 392 | all_tids[0] = child_pid; |
388 | 393 | ||
389 | /* | 394 | /* |
390 | * Wait for the child to be ready to exec. | 395 | * Wait for the child to be ready to exec. |
391 | */ | 396 | */ |
392 | close(child_ready_pipe[1]); | 397 | close(child_ready_pipe[1]); |
393 | close(go_pipe[0]); | 398 | close(go_pipe[0]); |
394 | if (read(child_ready_pipe[0], &buf, 1) == -1) | 399 | if (read(child_ready_pipe[0], &buf, 1) == -1) |
395 | perror("unable to read pipe"); | 400 | perror("unable to read pipe"); |
396 | close(child_ready_pipe[0]); | 401 | close(child_ready_pipe[0]); |
397 | } | 402 | } |
398 | 403 | ||
399 | for (counter = 0; counter < nr_counters; counter++) | 404 | for (counter = 0; counter < nr_counters; counter++) |
400 | ncreated += create_perf_stat_counter(counter, &perm_err); | 405 | ncreated += create_perf_stat_counter(counter, &perm_err); |
401 | 406 | ||
402 | if (ncreated < nr_counters) { | 407 | if (ncreated < nr_counters) { |
403 | if (perm_err) | 408 | if (perm_err) |
404 | error("You may not have permission to collect %sstats.\n" | 409 | error("You may not have permission to collect %sstats.\n" |
405 | "\t Consider tweaking" | 410 | "\t Consider tweaking" |
406 | " /proc/sys/kernel/perf_event_paranoid or running as root.", | 411 | " /proc/sys/kernel/perf_event_paranoid or running as root.", |
407 | system_wide ? "system-wide " : ""); | 412 | system_wide ? "system-wide " : ""); |
408 | die("Not all events could be opened.\n"); | 413 | die("Not all events could be opened.\n"); |
409 | if (child_pid != -1) | 414 | if (child_pid != -1) |
410 | kill(child_pid, SIGTERM); | 415 | kill(child_pid, SIGTERM); |
411 | return -1; | 416 | return -1; |
412 | } | 417 | } |
413 | 418 | ||
414 | /* | 419 | /* |
415 | * Enable counters and exec the command: | 420 | * Enable counters and exec the command: |
416 | */ | 421 | */ |
417 | t0 = rdclock(); | 422 | t0 = rdclock(); |
418 | 423 | ||
419 | if (forks) { | 424 | if (forks) { |
420 | close(go_pipe[1]); | 425 | close(go_pipe[1]); |
421 | wait(&status); | 426 | wait(&status); |
422 | } else { | 427 | } else { |
423 | while(!done) sleep(1); | 428 | while(!done) sleep(1); |
424 | } | 429 | } |
425 | 430 | ||
426 | t1 = rdclock(); | 431 | t1 = rdclock(); |
427 | 432 | ||
428 | update_stats(&walltime_nsecs_stats, t1 - t0); | 433 | update_stats(&walltime_nsecs_stats, t1 - t0); |
429 | 434 | ||
430 | if (no_aggr) { | 435 | if (no_aggr) { |
431 | for (counter = 0; counter < nr_counters; counter++) | 436 | for (counter = 0; counter < nr_counters; counter++) |
432 | read_counter(counter); | 437 | read_counter(counter); |
433 | } else { | 438 | } else { |
434 | for (counter = 0; counter < nr_counters; counter++) | 439 | for (counter = 0; counter < nr_counters; counter++) |
435 | read_counter_aggr(counter); | 440 | read_counter_aggr(counter); |
436 | } | 441 | } |
437 | return WEXITSTATUS(status); | 442 | return WEXITSTATUS(status); |
438 | } | 443 | } |
439 | 444 | ||
440 | static void print_noise(int counter, double avg) | 445 | static void print_noise(int counter, double avg) |
441 | { | 446 | { |
442 | if (run_count == 1) | 447 | if (run_count == 1) |
443 | return; | 448 | return; |
444 | 449 | ||
445 | fprintf(stderr, " ( +- %7.3f%% )", | 450 | fprintf(stderr, " ( +- %7.3f%% )", |
446 | 100 * stddev_stats(&event_res_stats[counter][0]) / avg); | 451 | 100 * stddev_stats(&event_res_stats[counter][0]) / avg); |
447 | } | 452 | } |
448 | 453 | ||
449 | static void nsec_printout(int cpu, int counter, double avg) | 454 | static void nsec_printout(int cpu, int counter, double avg) |
450 | { | 455 | { |
451 | double msecs = avg / 1e6; | 456 | double msecs = avg / 1e6; |
457 | char cpustr[16] = { '\0', }; | ||
458 | const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s"; | ||
452 | 459 | ||
453 | if (no_aggr) | 460 | if (no_aggr) |
454 | fprintf(stderr, "CPU%-4d %18.6f %-24s", | 461 | sprintf(cpustr, "CPU%*d%s", |
455 | cpumap[cpu], msecs, event_name(counter)); | 462 | csv_output ? 0 : -4, |
456 | else | 463 | cpumap[cpu], csv_sep); |
457 | fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter)); | ||
458 | 464 | ||
465 | fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter)); | ||
466 | |||
467 | if (csv_output) | ||
468 | return; | ||
469 | |||
459 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { | 470 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { |
460 | fprintf(stderr, " # %10.3f CPUs ", | 471 | fprintf(stderr, " # %10.3f CPUs ", |
461 | avg / avg_stats(&walltime_nsecs_stats)); | 472 | avg / avg_stats(&walltime_nsecs_stats)); |
462 | } | 473 | } |
463 | } | 474 | } |
464 | 475 | ||
465 | static void abs_printout(int cpu, int counter, double avg) | 476 | static void abs_printout(int cpu, int counter, double avg) |
466 | { | 477 | { |
467 | double total, ratio = 0.0; | 478 | double total, ratio = 0.0; |
468 | char cpustr[16] = { '\0', }; | 479 | char cpustr[16] = { '\0', }; |
480 | const char *fmt; | ||
469 | 481 | ||
482 | if (csv_output) | ||
483 | fmt = "%s%.0f%s%s"; | ||
484 | else if (big_num) | ||
485 | fmt = "%s%'18.0f%s%-24s"; | ||
486 | else | ||
487 | fmt = "%s%18.0f%s%-24s"; | ||
488 | |||
470 | if (no_aggr) | 489 | if (no_aggr) |
471 | sprintf(cpustr, "CPU%-4d", cpumap[cpu]); | 490 | sprintf(cpustr, "CPU%*d%s", |
491 | csv_output ? 0 : -4, | ||
492 | cpumap[cpu], csv_sep); | ||
472 | else | 493 | else |
473 | cpu = 0; | 494 | cpu = 0; |
474 | 495 | ||
475 | if (big_num) | 496 | fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter)); |
476 | fprintf(stderr, "%s %'18.0f %-24s", | ||
477 | cpustr, avg, event_name(counter)); | ||
478 | else | ||
479 | fprintf(stderr, "%s %18.0f %-24s", | ||
480 | cpustr, avg, event_name(counter)); | ||
481 | 497 | ||
498 | if (csv_output) | ||
499 | return; | ||
500 | |||
482 | if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { | 501 | if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { |
483 | total = avg_stats(&runtime_cycles_stats[cpu]); | 502 | total = avg_stats(&runtime_cycles_stats[cpu]); |
484 | 503 | ||
485 | if (total) | 504 | if (total) |
486 | ratio = avg / total; | 505 | ratio = avg / total; |
487 | 506 | ||
488 | fprintf(stderr, " # %10.3f IPC ", ratio); | 507 | fprintf(stderr, " # %10.3f IPC ", ratio); |
489 | } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && | 508 | } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && |
490 | runtime_branches_stats[cpu].n != 0) { | 509 | runtime_branches_stats[cpu].n != 0) { |
491 | total = avg_stats(&runtime_branches_stats[cpu]); | 510 | total = avg_stats(&runtime_branches_stats[cpu]); |
492 | 511 | ||
493 | if (total) | 512 | if (total) |
494 | ratio = avg * 100 / total; | 513 | ratio = avg * 100 / total; |
495 | 514 | ||
496 | fprintf(stderr, " # %10.3f %% ", ratio); | 515 | fprintf(stderr, " # %10.3f %% ", ratio); |
497 | 516 | ||
498 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 517 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
499 | total = avg_stats(&runtime_nsecs_stats[cpu]); | 518 | total = avg_stats(&runtime_nsecs_stats[cpu]); |
500 | 519 | ||
501 | if (total) | 520 | if (total) |
502 | ratio = 1000.0 * avg / total; | 521 | ratio = 1000.0 * avg / total; |
503 | 522 | ||
504 | fprintf(stderr, " # %10.3f M/sec", ratio); | 523 | fprintf(stderr, " # %10.3f M/sec", ratio); |
505 | } | 524 | } |
506 | } | 525 | } |
507 | 526 | ||
508 | /* | 527 | /* |
509 | * Print out the results of a single counter: | 528 | * Print out the results of a single counter: |
510 | * aggregated counts in system-wide mode | 529 | * aggregated counts in system-wide mode |
511 | */ | 530 | */ |
512 | static void print_counter_aggr(int counter) | 531 | static void print_counter_aggr(int counter) |
513 | { | 532 | { |
514 | double avg = avg_stats(&event_res_stats[counter][0]); | 533 | double avg = avg_stats(&event_res_stats[counter][0]); |
515 | int scaled = event_scaled[counter]; | 534 | int scaled = event_scaled[counter]; |
516 | 535 | ||
517 | if (scaled == -1) { | 536 | if (scaled == -1) { |
518 | fprintf(stderr, " %18s %-24s\n", | 537 | fprintf(stderr, "%*s%s%-24s\n", |
519 | "<not counted>", event_name(counter)); | 538 | csv_output ? 0 : 18, |
539 | "<not counted>", csv_sep, event_name(counter)); | ||
520 | return; | 540 | return; |
521 | } | 541 | } |
522 | 542 | ||
523 | if (nsec_counter(counter)) | 543 | if (nsec_counter(counter)) |
524 | nsec_printout(-1, counter, avg); | 544 | nsec_printout(-1, counter, avg); |
525 | else | 545 | else |
526 | abs_printout(-1, counter, avg); | 546 | abs_printout(-1, counter, avg); |
527 | 547 | ||
548 | if (csv_output) { | ||
549 | fputc('\n', stderr); | ||
550 | return; | ||
551 | } | ||
552 | |||
528 | print_noise(counter, avg); | 553 | print_noise(counter, avg); |
529 | 554 | ||
530 | if (scaled) { | 555 | if (scaled) { |
531 | double avg_enabled, avg_running; | 556 | double avg_enabled, avg_running; |
532 | 557 | ||
533 | avg_enabled = avg_stats(&event_res_stats[counter][1]); | 558 | avg_enabled = avg_stats(&event_res_stats[counter][1]); |
534 | avg_running = avg_stats(&event_res_stats[counter][2]); | 559 | avg_running = avg_stats(&event_res_stats[counter][2]); |
535 | 560 | ||
536 | fprintf(stderr, " (scaled from %.2f%%)", | 561 | fprintf(stderr, " (scaled from %.2f%%)", |
537 | 100 * avg_running / avg_enabled); | 562 | 100 * avg_running / avg_enabled); |
538 | } | 563 | } |
539 | 564 | ||
540 | fprintf(stderr, "\n"); | 565 | fprintf(stderr, "\n"); |
541 | } | 566 | } |
542 | 567 | ||
543 | /* | 568 | /* |
544 | * Print out the results of a single counter: | 569 | * Print out the results of a single counter: |
545 | * does not use aggregated count in system-wide | 570 | * does not use aggregated count in system-wide |
546 | */ | 571 | */ |
547 | static void print_counter(int counter) | 572 | static void print_counter(int counter) |
548 | { | 573 | { |
549 | u64 ena, run, val; | 574 | u64 ena, run, val; |
550 | int cpu; | 575 | int cpu; |
551 | 576 | ||
552 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 577 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
553 | val = cpu_counts[cpu][counter].val; | 578 | val = cpu_counts[cpu][counter].val; |
554 | ena = cpu_counts[cpu][counter].ena; | 579 | ena = cpu_counts[cpu][counter].ena; |
555 | run = cpu_counts[cpu][counter].run; | 580 | run = cpu_counts[cpu][counter].run; |
556 | if (run == 0 || ena == 0) { | 581 | if (run == 0 || ena == 0) { |
557 | fprintf(stderr, "CPU%-4d %18s %-24s", cpumap[cpu], | 582 | fprintf(stderr, "CPU%*d%s%*s%s%-24s", |
558 | "<not counted>", event_name(counter)); | 583 | csv_output ? 0 : -4, |
584 | cpumap[cpu], csv_sep, | ||
585 | csv_output ? 0 : 18, | ||
586 | "<not counted>", csv_sep, | ||
587 | event_name(counter)); | ||
559 | 588 | ||
560 | fprintf(stderr, "\n"); | 589 | fprintf(stderr, "\n"); |
561 | continue; | 590 | continue; |
562 | } | 591 | } |
563 | 592 | ||
564 | if (nsec_counter(counter)) | 593 | if (nsec_counter(counter)) |
565 | nsec_printout(cpu, counter, val); | 594 | nsec_printout(cpu, counter, val); |
566 | else | 595 | else |
567 | abs_printout(cpu, counter, val); | 596 | abs_printout(cpu, counter, val); |
568 | 597 | ||
569 | print_noise(counter, 1.0); | 598 | if (!csv_output) { |
599 | print_noise(counter, 1.0); | ||
570 | 600 | ||
571 | if (run != ena) { | 601 | if (run != ena) { |
572 | fprintf(stderr, " (scaled from %.2f%%)", | 602 | fprintf(stderr, " (scaled from %.2f%%)", |
573 | 100.0 * run / ena); | 603 | 100.0 * run / ena); |
604 | } | ||
574 | } | 605 | } |
575 | fprintf(stderr, "\n"); | 606 | fprintf(stderr, "\n"); |
576 | } | 607 | } |
577 | } | 608 | } |
578 | 609 | ||
579 | static void print_stat(int argc, const char **argv) | 610 | static void print_stat(int argc, const char **argv) |
580 | { | 611 | { |
581 | int i, counter; | 612 | int i, counter; |
582 | 613 | ||
583 | fflush(stdout); | 614 | fflush(stdout); |
584 | 615 | ||
585 | fprintf(stderr, "\n"); | 616 | if (!csv_output) { |
586 | fprintf(stderr, " Performance counter stats for "); | 617 | fprintf(stderr, "\n"); |
587 | if(target_pid == -1 && target_tid == -1) { | 618 | fprintf(stderr, " Performance counter stats for "); |
588 | fprintf(stderr, "\'%s", argv[0]); | 619 | if(target_pid == -1 && target_tid == -1) { |
589 | for (i = 1; i < argc; i++) | 620 | fprintf(stderr, "\'%s", argv[0]); |
590 | fprintf(stderr, " %s", argv[i]); | 621 | for (i = 1; i < argc; i++) |
591 | } else if (target_pid != -1) | 622 | fprintf(stderr, " %s", argv[i]); |
592 | fprintf(stderr, "process id \'%d", target_pid); | 623 | } else if (target_pid != -1) |
593 | else | 624 | fprintf(stderr, "process id \'%d", target_pid); |
594 | fprintf(stderr, "thread id \'%d", target_tid); | 625 | else |
626 | fprintf(stderr, "thread id \'%d", target_tid); | ||
595 | 627 | ||
596 | fprintf(stderr, "\'"); | 628 | fprintf(stderr, "\'"); |
597 | if (run_count > 1) | 629 | if (run_count > 1) |
598 | fprintf(stderr, " (%d runs)", run_count); | 630 | fprintf(stderr, " (%d runs)", run_count); |
599 | fprintf(stderr, ":\n\n"); | 631 | fprintf(stderr, ":\n\n"); |
632 | } | ||
600 | 633 | ||
601 | if (no_aggr) { | 634 | if (no_aggr) { |
602 | for (counter = 0; counter < nr_counters; counter++) | 635 | for (counter = 0; counter < nr_counters; counter++) |
603 | print_counter(counter); | 636 | print_counter(counter); |
604 | } else { | 637 | } else { |
605 | for (counter = 0; counter < nr_counters; counter++) | 638 | for (counter = 0; counter < nr_counters; counter++) |
606 | print_counter_aggr(counter); | 639 | print_counter_aggr(counter); |
607 | } | 640 | } |
608 | 641 | ||
609 | fprintf(stderr, "\n"); | 642 | if (!csv_output) { |
610 | fprintf(stderr, " %18.9f seconds time elapsed", | 643 | fprintf(stderr, "\n"); |
611 | avg_stats(&walltime_nsecs_stats)/1e9); | 644 | fprintf(stderr, " %18.9f seconds time elapsed", |
612 | if (run_count > 1) { | 645 | avg_stats(&walltime_nsecs_stats)/1e9); |
613 | fprintf(stderr, " ( +- %7.3f%% )", | 646 | if (run_count > 1) { |
647 | fprintf(stderr, " ( +- %7.3f%% )", | ||
614 | 100*stddev_stats(&walltime_nsecs_stats) / | 648 | 100*stddev_stats(&walltime_nsecs_stats) / |
615 | avg_stats(&walltime_nsecs_stats)); | 649 | avg_stats(&walltime_nsecs_stats)); |
650 | } | ||
651 | fprintf(stderr, "\n\n"); | ||
616 | } | 652 | } |
617 | fprintf(stderr, "\n\n"); | ||
618 | } | 653 | } |
619 | 654 | ||
620 | static volatile int signr = -1; | 655 | static volatile int signr = -1; |
621 | 656 | ||
622 | static void skip_signal(int signo) | 657 | static void skip_signal(int signo) |
623 | { | 658 | { |
624 | if(child_pid == -1) | 659 | if(child_pid == -1) |
625 | done = 1; | 660 | done = 1; |
626 | 661 | ||
627 | signr = signo; | 662 | signr = signo; |
628 | } | 663 | } |
629 | 664 | ||
630 | static void sig_atexit(void) | 665 | static void sig_atexit(void) |
631 | { | 666 | { |
632 | if (child_pid != -1) | 667 | if (child_pid != -1) |
633 | kill(child_pid, SIGTERM); | 668 | kill(child_pid, SIGTERM); |
634 | 669 | ||
635 | if (signr == -1) | 670 | if (signr == -1) |
636 | return; | 671 | return; |
637 | 672 | ||
638 | signal(signr, SIG_DFL); | 673 | signal(signr, SIG_DFL); |
639 | kill(getpid(), signr); | 674 | kill(getpid(), signr); |
640 | } | 675 | } |
641 | 676 | ||
642 | static const char * const stat_usage[] = { | 677 | static const char * const stat_usage[] = { |
643 | "perf stat [<options>] [<command>]", | 678 | "perf stat [<options>] [<command>]", |
644 | NULL | 679 | NULL |
645 | }; | 680 | }; |
646 | 681 | ||
682 | static int stat__set_big_num(const struct option *opt __used, | ||
683 | const char *s __used, int unset) | ||
684 | { | ||
685 | big_num_opt = unset ? 0 : 1; | ||
686 | return 0; | ||
687 | } | ||
688 | |||
647 | static const struct option options[] = { | 689 | static const struct option options[] = { |
648 | OPT_CALLBACK('e', "event", NULL, "event", | 690 | OPT_CALLBACK('e', "event", NULL, "event", |
649 | "event selector. use 'perf list' to list available events", | 691 | "event selector. use 'perf list' to list available events", |
650 | parse_events), | 692 | parse_events), |
651 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, | 693 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, |
652 | "child tasks do not inherit counters"), | 694 | "child tasks do not inherit counters"), |
653 | OPT_INTEGER('p', "pid", &target_pid, | 695 | OPT_INTEGER('p', "pid", &target_pid, |
654 | "stat events on existing process id"), | 696 | "stat events on existing process id"), |
655 | OPT_INTEGER('t', "tid", &target_tid, | 697 | OPT_INTEGER('t', "tid", &target_tid, |
656 | "stat events on existing thread id"), | 698 | "stat events on existing thread id"), |
657 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 699 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
658 | "system-wide collection from all CPUs"), | 700 | "system-wide collection from all CPUs"), |
659 | OPT_BOOLEAN('c', "scale", &scale, | 701 | OPT_BOOLEAN('c', "scale", &scale, |
660 | "scale/normalize counters"), | 702 | "scale/normalize counters"), |
661 | OPT_INCR('v', "verbose", &verbose, | 703 | OPT_INCR('v', "verbose", &verbose, |
662 | "be more verbose (show counter open errors, etc)"), | 704 | "be more verbose (show counter open errors, etc)"), |
663 | OPT_INTEGER('r', "repeat", &run_count, | 705 | OPT_INTEGER('r', "repeat", &run_count, |
664 | "repeat command and print average + stddev (max: 100)"), | 706 | "repeat command and print average + stddev (max: 100)"), |
665 | OPT_BOOLEAN('n', "null", &null_run, | 707 | OPT_BOOLEAN('n', "null", &null_run, |
666 | "null run - dont start any counters"), | 708 | "null run - dont start any counters"), |
667 | OPT_BOOLEAN('B', "big-num", &big_num, | 709 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, |
668 | "print large numbers with thousands\' separators"), | 710 | "print large numbers with thousands\' separators", |
711 | stat__set_big_num), | ||
669 | OPT_STRING('C', "cpu", &cpu_list, "cpu", | 712 | OPT_STRING('C', "cpu", &cpu_list, "cpu", |
670 | "list of cpus to monitor in system-wide"), | 713 | "list of cpus to monitor in system-wide"), |
671 | OPT_BOOLEAN('A', "no-aggr", &no_aggr, | 714 | OPT_BOOLEAN('A', "no-aggr", &no_aggr, |
672 | "disable CPU count aggregation"), | 715 | "disable CPU count aggregation"), |
716 | OPT_STRING('x', "field-separator", &csv_sep, "separator", | ||
717 | "print counts with custom separator"), | ||
673 | OPT_END() | 718 | OPT_END() |
674 | }; | 719 | }; |
675 | 720 | ||
676 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 721 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
677 | { | 722 | { |
678 | int status; | 723 | int status; |
679 | int i,j; | 724 | int i,j; |
680 | 725 | ||
681 | setlocale(LC_ALL, ""); | 726 | setlocale(LC_ALL, ""); |
682 | 727 | ||
683 | argc = parse_options(argc, argv, options, stat_usage, | 728 | argc = parse_options(argc, argv, options, stat_usage, |
684 | PARSE_OPT_STOP_AT_NON_OPTION); | 729 | PARSE_OPT_STOP_AT_NON_OPTION); |
730 | |||
731 | if (csv_sep) | ||
732 | csv_output = true; | ||
733 | else | ||
734 | csv_sep = DEFAULT_SEPARATOR; | ||
735 | |||
736 | /* | ||
737 | * let the spreadsheet do the pretty-printing | ||
738 | */ | ||
739 | if (csv_output) { | ||
740 | /* User explicitely passed -B? */ | ||
741 | if (big_num_opt == 1) { | ||
742 | fprintf(stderr, "-B option not supported with -x\n"); | ||
743 | usage_with_options(stat_usage, options); | ||
744 | } else /* Nope, so disable big number formatting */ | ||
745 | big_num = false; | ||
746 | } else if (big_num_opt == 0) /* User passed --no-big-num */ | ||
747 | big_num = false; | ||
748 | |||
685 | if (!argc && target_pid == -1 && target_tid == -1) | 749 | if (!argc && target_pid == -1 && target_tid == -1) |
686 | usage_with_options(stat_usage, options); | 750 | usage_with_options(stat_usage, options); |
687 | if (run_count <= 0) | 751 | if (run_count <= 0) |
688 | usage_with_options(stat_usage, options); | 752 | usage_with_options(stat_usage, options); |
689 | 753 | ||
690 | /* no_aggr is for system-wide only */ | 754 | /* no_aggr is for system-wide only */ |
691 | if (no_aggr && !system_wide) | 755 | if (no_aggr && !system_wide) |
692 | usage_with_options(stat_usage, options); | 756 | usage_with_options(stat_usage, options); |
693 | 757 | ||
694 | /* Set attrs and nr_counters if no event is selected and !null_run */ | 758 | /* Set attrs and nr_counters if no event is selected and !null_run */ |
695 | if (!null_run && !nr_counters) { | 759 | if (!null_run && !nr_counters) { |
696 | memcpy(attrs, default_attrs, sizeof(default_attrs)); | 760 | memcpy(attrs, default_attrs, sizeof(default_attrs)); |
697 | nr_counters = ARRAY_SIZE(default_attrs); | 761 | nr_counters = ARRAY_SIZE(default_attrs); |
698 | } | 762 | } |
699 | 763 | ||
700 | if (system_wide) | 764 | if (system_wide) |
701 | nr_cpus = read_cpu_map(cpu_list); | 765 | nr_cpus = read_cpu_map(cpu_list); |
702 | else | 766 | else |
703 | nr_cpus = 1; | 767 | nr_cpus = 1; |
704 | 768 | ||
705 | if (nr_cpus < 1) | 769 | if (nr_cpus < 1) |
706 | usage_with_options(stat_usage, options); | 770 | usage_with_options(stat_usage, options); |
707 | 771 | ||
708 | if (target_pid != -1) { | 772 | if (target_pid != -1) { |
709 | target_tid = target_pid; | 773 | target_tid = target_pid; |
710 | thread_num = find_all_tid(target_pid, &all_tids); | 774 | thread_num = find_all_tid(target_pid, &all_tids); |
711 | if (thread_num <= 0) { | 775 | if (thread_num <= 0) { |
712 | fprintf(stderr, "Can't find all threads of pid %d\n", | 776 | fprintf(stderr, "Can't find all threads of pid %d\n", |
713 | target_pid); | 777 | target_pid); |
714 | usage_with_options(stat_usage, options); | 778 | usage_with_options(stat_usage, options); |
715 | } | 779 | } |
716 | } else { | 780 | } else { |
717 | all_tids=malloc(sizeof(pid_t)); | 781 | all_tids=malloc(sizeof(pid_t)); |
718 | if (!all_tids) | 782 | if (!all_tids) |
719 | return -ENOMEM; | 783 | return -ENOMEM; |
720 | 784 | ||
721 | all_tids[0] = target_tid; | 785 | all_tids[0] = target_tid; |
722 | thread_num = 1; | 786 | thread_num = 1; |
723 | } | 787 | } |
724 | 788 | ||
725 | for (i = 0; i < MAX_NR_CPUS; i++) { | 789 | for (i = 0; i < MAX_NR_CPUS; i++) { |
726 | for (j = 0; j < MAX_COUNTERS; j++) { | 790 | for (j = 0; j < MAX_COUNTERS; j++) { |
727 | fd[i][j] = malloc(sizeof(int)*thread_num); | 791 | fd[i][j] = malloc(sizeof(int)*thread_num); |
728 | if (!fd[i][j]) | 792 | if (!fd[i][j]) |
729 | return -ENOMEM; | 793 | return -ENOMEM; |
730 | } | 794 | } |
731 | } | 795 | } |
732 | 796 | ||
733 | /* | 797 | /* |
734 | * We dont want to block the signals - that would cause | 798 | * We dont want to block the signals - that would cause |
735 | * child tasks to inherit that and Ctrl-C would not work. | 799 | * child tasks to inherit that and Ctrl-C would not work. |
736 | * What we want is for Ctrl-C to work in the exec()-ed | 800 | * What we want is for Ctrl-C to work in the exec()-ed |
737 | * task, but being ignored by perf stat itself: | 801 | * task, but being ignored by perf stat itself: |
738 | */ | 802 | */ |
739 | atexit(sig_atexit); | 803 | atexit(sig_atexit); |
740 | signal(SIGINT, skip_signal); | 804 | signal(SIGINT, skip_signal); |
741 | signal(SIGALRM, skip_signal); | 805 | signal(SIGALRM, skip_signal); |
742 | signal(SIGABRT, skip_signal); | 806 | signal(SIGABRT, skip_signal); |
743 | 807 | ||
744 | status = 0; | 808 | status = 0; |
745 | for (run_idx = 0; run_idx < run_count; run_idx++) { | 809 | for (run_idx = 0; run_idx < run_count; run_idx++) { |
746 | if (run_count != 1 && verbose) | 810 | if (run_count != 1 && verbose) |
747 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); | 811 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); |
748 | status = run_perf_stat(argc, argv); | 812 | status = run_perf_stat(argc, argv); |
749 | } | 813 | } |