Commit e4b546a3643fbfc510d5ef7db538e4d3ab00effb

Authored by Ingo Molnar

Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/l…

…inux-2.6 into perf/core

Showing 15 changed files Inline Diff

tools/perf/Documentation/perf-annotate.txt
1 perf-annotate(1) 1 perf-annotate(1)
2 ================ 2 ================
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-annotate - Read perf.data (created by perf record) and display annotated code 6 perf-annotate - Read perf.data (created by perf record) and display annotated code
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf annotate' [-i <file> | --input=file] [symbol_name] 11 'perf annotate' [-i <file> | --input=file] [symbol_name]
12 12
13 DESCRIPTION 13 DESCRIPTION
14 ----------- 14 -----------
15 This command reads the input file and displays an annotated version of the 15 This command reads the input file and displays an annotated version of the
16 code. If the object file has debug symbols then the source code will be 16 code. If the object file has debug symbols then the source code will be
17 displayed alongside assembly code. 17 displayed alongside assembly code.
18 18
19 If there is no debug info in the object, then annotated assembly is displayed. 19 If there is no debug info in the object, then annotated assembly is displayed.
20 20
21 OPTIONS 21 OPTIONS
22 ------- 22 -------
23 -i:: 23 -i::
24 --input=:: 24 --input=::
25 Input file name. (default: perf.data) 25 Input file name. (default: perf.data)
26 26
27 -d::
28 --dsos=<dso[,dso...]>::
29 Only consider symbols in these dsos.
30 -s::
31 --symbol=<symbol>::
32 Symbol to annotate.
33
34 -f::
35 --force::
36 Don't complain, do it.
37
38 -v::
39 --verbose::
40 Be more verbose. (Show symbol address, etc)
41
42 -D::
43 --dump-raw-trace::
44 Dump raw trace in ASCII.
45
46 -k::
47 --vmlinux=<file>::
48 vmlinux pathname.
49
50 -m::
51 --modules::
52 Load module symbols. WARNING: use only with -k and LIVE kernel.
53
54 -l::
55 --print-line::
56 Print matching source lines (may be slow).
57
58 -P::
59 --full-paths::
60 Don't shorten the displayed pathnames.
61
27 --stdio:: Use the stdio interface. 62 --stdio:: Use the stdio interface.
28 63
29 --tui:: Use the TUI interface Use of --tui requires a tty, if one is not 64 --tui:: Use the TUI interface Use of --tui requires a tty, if one is not
30 present, as when piping to other commands, the stdio interface is 65 present, as when piping to other commands, the stdio interface is
31 used. This interfaces starts by centering on the line with more 66 used. This interfaces starts by centering on the line with more
32 samples, TAB/UNTAB cycles thru the lines with more samples. 67 samples, TAB/UNTAB cycles through the lines with more samples.
33 68
34 SEE ALSO 69 SEE ALSO
35 -------- 70 --------
36 linkperf:perf-record[1], linkperf:perf-report[1] 71 linkperf:perf-record[1], linkperf:perf-report[1]
37 72
tools/perf/Documentation/perf-buildid-list.txt
1 perf-buildid-list(1) 1 perf-buildid-list(1)
2 ==================== 2 ====================
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-buildid-list - List the buildids in a perf.data file 6 perf-buildid-list - List the buildids in a perf.data file
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf buildid-list <options>' 11 'perf buildid-list <options>'
12 12
13 DESCRIPTION 13 DESCRIPTION
14 ----------- 14 -----------
15 This command displays the buildids found in a perf.data file, so that other 15 This command displays the buildids found in a perf.data file, so that other
16 tools can be used to fetch packages with matching symbol tables for use by 16 tools can be used to fetch packages with matching symbol tables for use by
17 perf report. 17 perf report.
18 18
19 OPTIONS 19 OPTIONS
20 ------- 20 -------
21 -H::
22 --with-hits::
23 Show only DSOs with hits.
21 -i:: 24 -i::
22 --input=:: 25 --input=::
23 Input file name. (default: perf.data) 26 Input file name. (default: perf.data)
24 -f:: 27 -f::
25 --force:: 28 --force::
26 Don't do ownership validation. 29 Don't do ownership validation.
27 -v:: 30 -v::
28 --verbose:: 31 --verbose::
29 Be more verbose. 32 Be more verbose.
30 33
31 SEE ALSO 34 SEE ALSO
32 -------- 35 --------
33 linkperf:perf-record[1], linkperf:perf-top[1], 36 linkperf:perf-record[1], linkperf:perf-top[1],
34 linkperf:perf-report[1] 37 linkperf:perf-report[1]
35 38
tools/perf/Documentation/perf-diff.txt
1 perf-diff(1) 1 perf-diff(1)
2 ============ 2 ============
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-diff - Read two perf.data files and display the differential profile 6 perf-diff - Read two perf.data files and display the differential profile
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf diff' [oldfile] [newfile] 11 'perf diff' [oldfile] [newfile]
12 12
13 DESCRIPTION 13 DESCRIPTION
14 ----------- 14 -----------
15 This command displays the performance difference amongst two perf.data files 15 This command displays the performance difference amongst two perf.data files
16 captured via perf record. 16 captured via perf record.
17 17
18 If no parameters are passed it will assume perf.data.old and perf.data. 18 If no parameters are passed it will assume perf.data.old and perf.data.
19 19
20 OPTIONS 20 OPTIONS
21 ------- 21 -------
22 -M::
23 --displacement::
24 Show position displacement relative to baseline.
25
26 -D::
27 --dump-raw-trace::
28 Dump raw trace in ASCII.
29
30 -m::
31 --modules::
32 Load module symbols. WARNING: use only with -k and LIVE kernel
33
22 -d:: 34 -d::
23 --dsos=:: 35 --dsos=::
24 Only consider symbols in these dsos. CSV that understands 36 Only consider symbols in these dsos. CSV that understands
25 file://filename entries. 37 file://filename entries.
26 38
27 -C:: 39 -C::
28 --comms=:: 40 --comms=::
29 Only consider symbols in these comms. CSV that understands 41 Only consider symbols in these comms. CSV that understands
30 file://filename entries. 42 file://filename entries.
31 43
32 -S:: 44 -S::
33 --symbols=:: 45 --symbols=::
34 Only consider these symbols. CSV that understands 46 Only consider these symbols. CSV that understands
35 file://filename entries. 47 file://filename entries.
36 48
37 -s:: 49 -s::
38 --sort=:: 50 --sort=::
39 Sort by key(s): pid, comm, dso, symbol. 51 Sort by key(s): pid, comm, dso, symbol.
40 52
41 -t:: 53 -t::
42 --field-separator=:: 54 --field-separator=::
43 55
44 Use a special separator character and don't pad with spaces, replacing 56 Use a special separator character and don't pad with spaces, replacing
45 all occurances of this separator in symbol names (and other output) 57 all occurrences of this separator in symbol names (and other output)
46 with a '.' character, that thus it's the only non valid separator. 58 with a '.' character, that thus it's the only non valid separator.
47 59
48 -v:: 60 -v::
49 --verbose:: 61 --verbose::
50 Be verbose, for instance, show the raw counts in addition to the 62 Be verbose, for instance, show the raw counts in addition to the
51 diff. 63 diff.
64
65 -f::
66 --force::
67 Don't complain, do it.
68
52 69
53 SEE ALSO 70 SEE ALSO
54 -------- 71 --------
55 linkperf:perf-record[1] 72 linkperf:perf-record[1]
56 73
tools/perf/Documentation/perf-kvm.txt
1 perf-kvm(1) 1 perf-kvm(1)
2 =========== 2 ===========
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-kvm - Tool to trace/measure kvm guest os 6 perf-kvm - Tool to trace/measure kvm guest os
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf kvm' [--host] [--guest] [--guestmount=<path> 11 'perf kvm' [--host] [--guest] [--guestmount=<path>
12 [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]] 12 [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]]
13 {top|record|report|diff|buildid-list} 13 {top|record|report|diff|buildid-list}
14 'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path> 14 'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path>
15 | --guestvmlinux=<path>] {top|record|report|diff|buildid-list} 15 | --guestvmlinux=<path>] {top|record|report|diff|buildid-list}
16 16
17 DESCRIPTION 17 DESCRIPTION
18 ----------- 18 -----------
19 There are a couple of variants of perf kvm: 19 There are a couple of variants of perf kvm:
20 20
21 'perf kvm [options] top <command>' to generates and displays 21 'perf kvm [options] top <command>' to generates and displays
22 a performance counter profile of guest os in realtime 22 a performance counter profile of guest os in realtime
23 of an arbitrary workload. 23 of an arbitrary workload.
24 24
25 'perf kvm record <command>' to record the performance couinter profile 25 'perf kvm record <command>' to record the performance counter profile
26 of an arbitrary workload and save it into a perf data file. If both 26 of an arbitrary workload and save it into a perf data file. If both
27 --host and --guest are input, the perf data file name is perf.data.kvm. 27 --host and --guest are input, the perf data file name is perf.data.kvm.
28 If there is no --host but --guest, the file name is perf.data.guest. 28 If there is no --host but --guest, the file name is perf.data.guest.
29 If there is no --guest but --host, the file name is perf.data.host. 29 If there is no --guest but --host, the file name is perf.data.host.
30 30
31 'perf kvm report' to display the performance counter profile information 31 'perf kvm report' to display the performance counter profile information
32 recorded via perf kvm record. 32 recorded via perf kvm record.
33 33
34 'perf kvm diff' to displays the performance difference amongst two perf.data 34 'perf kvm diff' to displays the performance difference amongst two perf.data
35 files captured via perf record. 35 files captured via perf record.
36 36
37 'perf kvm buildid-list' to display the buildids found in a perf data file, 37 'perf kvm buildid-list' to display the buildids found in a perf data file,
38 so that other tools can be used to fetch packages with matching symbol tables 38 so that other tools can be used to fetch packages with matching symbol tables
39 for use by perf report. 39 for use by perf report.
40 40
41 OPTIONS 41 OPTIONS
42 ------- 42 -------
43 -i::
44 --input=::
45 Input file name.
46 -o::
47 --output::
48 Output file name.
43 --host=:: 49 --host=::
44 Collect host side performance profile. 50 Collect host side performance profile.
45 --guest=:: 51 --guest=::
46 Collect guest side performance profile. 52 Collect guest side performance profile.
47 --guestmount=<path>:: 53 --guestmount=<path>::
48 Guest os root file system mount directory. Users mounts guest os 54 Guest os root file system mount directory. Users mounts guest os
49 root directories under <path> by a specific filesystem access method, 55 root directories under <path> by a specific filesystem access method,
50 typically, sshfs. For example, start 2 guest os. The one's pid is 8888 56 typically, sshfs. For example, start 2 guest os. The one's pid is 8888
51 and the other's is 9999. 57 and the other's is 9999.
52 #mkdir ~/guestmount; cd ~/guestmount 58 #mkdir ~/guestmount; cd ~/guestmount
53 #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/ 59 #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
54 #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/ 60 #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
55 #perf kvm --host --guest --guestmount=~/guestmount top 61 #perf kvm --host --guest --guestmount=~/guestmount top
56 --guestkallsyms=<path>:: 62 --guestkallsyms=<path>::
57 Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest 63 Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
58 kernel symbols. Users copy it out from guest os. 64 kernel symbols. Users copy it out from guest os.
59 --guestmodules=<path>:: 65 --guestmodules=<path>::
60 Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest 66 Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
61 kernel module information. Users copy it out from guest os. 67 kernel module information. Users copy it out from guest os.
62 --guestvmlinux=<path>:: 68 --guestvmlinux=<path>::
63 Guest os kernel vmlinux. 69 Guest os kernel vmlinux.
64 70
65 SEE ALSO 71 SEE ALSO
66 -------- 72 --------
67 linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], 73 linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
68 linkperf:perf-diff[1], linkperf:perf-buildid-list[1] 74 linkperf:perf-diff[1], linkperf:perf-buildid-list[1]
69 75
tools/perf/Documentation/perf-lock.txt
1 perf-lock(1) 1 perf-lock(1)
2 ============ 2 ============
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-lock - Analyze lock events 6 perf-lock - Analyze lock events
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf lock' {record|report|trace} 11 'perf lock' {record|report|trace}
12 12
13 DESCRIPTION 13 DESCRIPTION
14 ----------- 14 -----------
15 You can analyze various lock behaviours 15 You can analyze various lock behaviours
16 and statistics with this 'perf lock' command. 16 and statistics with this 'perf lock' command.
17 17
18 'perf lock record <command>' records lock events 18 'perf lock record <command>' records lock events
19 between start and end <command>. And this command 19 between start and end <command>. And this command
20 produces the file "perf.data" which contains tracing 20 produces the file "perf.data" which contains tracing
21 results of lock events. 21 results of lock events.
22 22
23 'perf lock trace' shows raw lock events. 23 'perf lock trace' shows raw lock events.
24 24
25 'perf lock report' reports statistical data. 25 'perf lock report' reports statistical data.
26 26
27 OPTIONS
28 -------
29
30 -i::
31 --input=<file>::
32 Input file name.
33
34 -v::
35 --verbose::
36 Be more verbose (show symbol address, etc).
37
38 -D::
39 --dump-raw-trace::
40 Dump raw trace in ASCII.
41
27 SEE ALSO 42 SEE ALSO
28 -------- 43 --------
29 linkperf:perf[1] 44 linkperf:perf[1]
30 45
tools/perf/Documentation/perf-probe.txt
1 perf-probe(1) 1 perf-probe(1)
2 ============= 2 =============
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-probe - Define new dynamic tracepoints 6 perf-probe - Define new dynamic tracepoints
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf probe' [options] --add='PROBE' [...] 11 'perf probe' [options] --add='PROBE' [...]
12 or 12 or
13 'perf probe' [options] PROBE 13 'perf probe' [options] PROBE
14 or 14 or
15 'perf probe' [options] --del='[GROUP:]EVENT' [...] 15 'perf probe' [options] --del='[GROUP:]EVENT' [...]
16 or 16 or
17 'perf probe' --list 17 'perf probe' --list
18 or 18 or
19 'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' 19 'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
20 or 20 or
21 'perf probe' [options] --vars='PROBEPOINT' 21 'perf probe' [options] --vars='PROBEPOINT'
22 22
23 DESCRIPTION 23 DESCRIPTION
24 ----------- 24 -----------
25 This command defines dynamic tracepoint events, by symbol and registers 25 This command defines dynamic tracepoint events, by symbol and registers
26 without debuginfo, or by C expressions (C line numbers, C function names, 26 without debuginfo, or by C expressions (C line numbers, C function names,
27 and C local variables) with debuginfo. 27 and C local variables) with debuginfo.
28 28
29 29
30 OPTIONS 30 OPTIONS
31 ------- 31 -------
32 -k:: 32 -k::
33 --vmlinux=PATH:: 33 --vmlinux=PATH::
34 Specify vmlinux path which has debuginfo (Dwarf binary). 34 Specify vmlinux path which has debuginfo (Dwarf binary).
35 35
36 -m:: 36 -m::
37 --module=MODNAME:: 37 --module=MODNAME::
38 Specify module name in which perf-probe searches probe points 38 Specify module name in which perf-probe searches probe points
39 or lines. 39 or lines.
40 40
41 -s:: 41 -s::
42 --source=PATH:: 42 --source=PATH::
43 Specify path to kernel source. 43 Specify path to kernel source.
44 44
45 -v:: 45 -v::
46 --verbose:: 46 --verbose::
47 Be more verbose (show parsed arguments, etc). 47 Be more verbose (show parsed arguments, etc).
48 48
49 -a:: 49 -a::
50 --add=:: 50 --add=::
51 Define a probe event (see PROBE SYNTAX for detail). 51 Define a probe event (see PROBE SYNTAX for detail).
52 52
53 -d:: 53 -d::
54 --del=:: 54 --del=::
55 Delete probe events. This accepts glob wildcards('*', '?') and character 55 Delete probe events. This accepts glob wildcards('*', '?') and character
56 classes(e.g. [a-z], [!A-Z]). 56 classes(e.g. [a-z], [!A-Z]).
57 57
58 -l:: 58 -l::
59 --list:: 59 --list::
60 List up current probe events. 60 List up current probe events.
61 61
62 -L:: 62 -L::
63 --line=:: 63 --line=::
64 Show source code lines which can be probed. This needs an argument 64 Show source code lines which can be probed. This needs an argument
65 which specifies a range of the source code. (see LINE SYNTAX for detail) 65 which specifies a range of the source code. (see LINE SYNTAX for detail)
66 66
67 -V:: 67 -V::
68 --vars=:: 68 --vars=::
69 Show available local variables at given probe point. The argument 69 Show available local variables at given probe point. The argument
70 syntax is same as PROBE SYNTAX, but NO ARGs. 70 syntax is same as PROBE SYNTAX, but NO ARGs.
71 71
72 --externs:: 72 --externs::
73 (Only for --vars) Show external defined variables in addition to local 73 (Only for --vars) Show external defined variables in addition to local
74 variables. 74 variables.
75 75
76 -f:: 76 -f::
77 --force:: 77 --force::
78 Forcibly add events with existing name. 78 Forcibly add events with existing name.
79 79
80 -n:: 80 -n::
81 --dry-run:: 81 --dry-run::
82 Dry run. With this option, --add and --del doesn't execute actual 82 Dry run. With this option, --add and --del doesn't execute actual
83 adding and removal operations. 83 adding and removal operations.
84 84
85 --max-probes:: 85 --max-probes::
86 Set the maximum number of probe points for an event. Default is 128. 86 Set the maximum number of probe points for an event. Default is 128.
87 87
88 PROBE SYNTAX 88 PROBE SYNTAX
89 ------------ 89 ------------
90 Probe points are defined by following syntax. 90 Probe points are defined by following syntax.
91 91
92 1) Define event based on function name 92 1) Define event based on function name
93 [EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...] 93 [EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...]
94 94
95 2) Define event based on source file with line number 95 2) Define event based on source file with line number
96 [EVENT=]SRC:ALN [ARG ...] 96 [EVENT=]SRC:ALN [ARG ...]
97 97
98 3) Define event based on source file with lazy pattern 98 3) Define event based on source file with lazy pattern
99 [EVENT=]SRC;PTN [ARG ...] 99 [EVENT=]SRC;PTN [ARG ...]
100 100
101 101
102 'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'. 102 'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'.
103 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. 103 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function.
104 It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern. 104 It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern.
105 'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT). 105 'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT).
106 106
107 PROBE ARGUMENT 107 PROBE ARGUMENT
108 -------------- 108 --------------
109 Each probe argument follows below syntax. 109 Each probe argument follows below syntax.
110 110
111 [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] 111 [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
112 112
113 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) 113 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
114 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. 114 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
115 115
116 LINE SYNTAX 116 LINE SYNTAX
117 ----------- 117 -----------
118 Line range is descripted by following syntax. 118 Line range is described by following syntax.
119 119
120 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" 120 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"
121 121
122 FUNC specifies the function name of showing lines. 'RLN' is the start line 122 FUNC specifies the function name of showing lines. 'RLN' is the start line
123 number from function entry line, and 'RLN2' is the end line number. As same as 123 number from function entry line, and 'RLN2' is the end line number. As same as
124 probe syntax, 'SRC' means the source file path, 'ALN' is start line number, 124 probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
125 and 'ALN2' is end line number in the file. It is also possible to specify how 125 and 'ALN2' is end line number in the file. It is also possible to specify how
126 many lines to show by using 'NUM'. 126 many lines to show by using 'NUM'.
127 So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. 127 So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
128 128
129 LAZY MATCHING 129 LAZY MATCHING
130 ------------- 130 -------------
131 The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]). 131 The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]).
132 132
133 e.g. 133 e.g.
134 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on. 134 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on.
135 135
136 This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.) 136 This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
137 137
138 138
139 EXAMPLES 139 EXAMPLES
140 -------- 140 --------
141 Display which lines in schedule() can be probed: 141 Display which lines in schedule() can be probed:
142 142
143 ./perf probe --line schedule 143 ./perf probe --line schedule
144 144
145 Add a probe on schedule() function 12th line with recording cpu local variable: 145 Add a probe on schedule() function 12th line with recording cpu local variable:
146 146
147 ./perf probe schedule:12 cpu 147 ./perf probe schedule:12 cpu
148 or 148 or
149 ./perf probe --add='schedule:12 cpu' 149 ./perf probe --add='schedule:12 cpu'
150 150
151 this will add one or more probes which has the name start with "schedule". 151 this will add one or more probes which has the name start with "schedule".
152 152
153 Add probes on lines in schedule() function which calls update_rq_clock(). 153 Add probes on lines in schedule() function which calls update_rq_clock().
154 154
155 ./perf probe 'schedule;update_rq_clock*' 155 ./perf probe 'schedule;update_rq_clock*'
156 or 156 or
157 ./perf probe --add='schedule;update_rq_clock*' 157 ./perf probe --add='schedule;update_rq_clock*'
158 158
159 Delete all probes on schedule(). 159 Delete all probes on schedule().
160 160
161 ./perf probe --del='schedule*' 161 ./perf probe --del='schedule*'
162 162
163 163
164 SEE ALSO 164 SEE ALSO
165 -------- 165 --------
166 linkperf:perf-trace[1], linkperf:perf-record[1] 166 linkperf:perf-trace[1], linkperf:perf-record[1]
167 167
tools/perf/Documentation/perf-record.txt
1 perf-record(1) 1 perf-record(1)
2 ============== 2 ==============
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-record - Run a command and record its profile into perf.data 6 perf-record - Run a command and record its profile into perf.data
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> 11 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
12 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>] 12 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
13 13
14 DESCRIPTION 14 DESCRIPTION
15 ----------- 15 -----------
16 This command runs a command and gathers a performance counter profile 16 This command runs a command and gathers a performance counter profile
17 from it, into perf.data - without displaying anything. 17 from it, into perf.data - without displaying anything.
18 18
19 This file can then be inspected later on, using 'perf report'. 19 This file can then be inspected later on, using 'perf report'.
20 20
21 21
22 OPTIONS 22 OPTIONS
23 ------- 23 -------
24 <command>...:: 24 <command>...::
25 Any command you can specify in a shell. 25 Any command you can specify in a shell.
26 26
27 -e:: 27 -e::
28 --event=:: 28 --event=::
29 Select the PMU event. Selection can be: 29 Select the PMU event. Selection can be:
30 30
31 - a symbolic event name (use 'perf list' to list all events) 31 - a symbolic event name (use 'perf list' to list all events)
32 32
33 - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a 33 - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
34 hexadecimal event descriptor. 34 hexadecimal event descriptor.
35 35
36 - a hardware breakpoint event in the form of '\mem:addr[:access]' 36 - a hardware breakpoint event in the form of '\mem:addr[:access]'
37 where addr is the address in memory you want to break in. 37 where addr is the address in memory you want to break in.
38 Access is the memory access type (read, write, execute) it can 38 Access is the memory access type (read, write, execute) it can
39 be passed as follows: '\mem:addr[:[r][w][x]]'. 39 be passed as follows: '\mem:addr[:[r][w][x]]'.
40 If you want to profile read-write accesses in 0x1000, just set 40 If you want to profile read-write accesses in 0x1000, just set
41 'mem:0x1000:rw'. 41 'mem:0x1000:rw'.
42
43 --filter=<filter>::
44 Event filter.
45
42 -a:: 46 -a::
43 System-wide collection. 47 --all-cpus::
48 System-wide collection from all CPUs.
44 49
45 -l:: 50 -l::
46 Scale counter values. 51 Scale counter values.
47 52
48 -p:: 53 -p::
49 --pid=:: 54 --pid=::
50 Record events on existing pid. 55 Record events on existing process ID.
51 56
57 -t::
58 --tid=::
59 Record events on existing thread ID.
60
52 -r:: 61 -r::
53 --realtime=:: 62 --realtime=::
54 Collect data with this RT SCHED_FIFO priority. 63 Collect data with this RT SCHED_FIFO priority.
55 -A:: 64 -A::
56 --append:: 65 --append::
57 Append to the output file to do incremental profiling. 66 Append to the output file to do incremental profiling.
58 67
59 -f:: 68 -f::
60 --force:: 69 --force::
61 Overwrite existing data file. (deprecated) 70 Overwrite existing data file. (deprecated)
62 71
63 -c:: 72 -c::
64 --count=:: 73 --count=::
65 Event period to sample. 74 Event period to sample.
66 75
67 -o:: 76 -o::
68 --output=:: 77 --output=::
69 Output file name. 78 Output file name.
70 79
71 -i:: 80 -i::
72 --no-inherit:: 81 --no-inherit::
73 Child tasks do not inherit counters. 82 Child tasks do not inherit counters.
74 -F:: 83 -F::
75 --freq=:: 84 --freq=::
76 Profile at this frequency. 85 Profile at this frequency.
77 86
78 -m:: 87 -m::
79 --mmap-pages=:: 88 --mmap-pages=::
80 Number of mmap data pages. 89 Number of mmap data pages.
81 90
82 -g:: 91 -g::
83 --call-graph:: 92 --call-graph::
84 Do call-graph (stack chain/backtrace) recording. 93 Do call-graph (stack chain/backtrace) recording.
85 94
86 -q:: 95 -q::
87 --quiet:: 96 --quiet::
88 Don't print any message, useful for scripting. 97 Don't print any message, useful for scripting.
89 98
90 -v:: 99 -v::
91 --verbose:: 100 --verbose::
92 Be more verbose (show counter open errors, etc). 101 Be more verbose (show counter open errors, etc).
93 102
94 -s:: 103 -s::
95 --stat:: 104 --stat::
96 Per thread counts. 105 Per thread counts.
97 106
98 -d:: 107 -d::
99 --data:: 108 --data::
100 Sample addresses. 109 Sample addresses.
101 110
102 -n:: 111 -n::
103 --no-samples:: 112 --no-samples::
104 Don't sample. 113 Don't sample.
105 114
106 -R:: 115 -R::
107 --raw-samples:: 116 --raw-samples::
108 Collect raw sample records from all opened counters (default for tracepoint counters). 117 Collect raw sample records from all opened counters (default for tracepoint counters).
109 118
110 -C:: 119 -C::
111 --cpu:: 120 --cpu::
112 Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a 121 Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
113 comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 122 comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
114 In per-thread mode with inheritance mode on (default), samples are captured only when 123 In per-thread mode with inheritance mode on (default), samples are captured only when
115 the thread executes on the designated CPUs. Default is to monitor all CPUs. 124 the thread executes on the designated CPUs. Default is to monitor all CPUs.
116 125
117 -N:: 126 -N::
118 --no-buildid-cache:: 127 --no-buildid-cache::
119 Do not update the builid cache. This saves some overhead in situations 128 Do not update the builid cache. This saves some overhead in situations
120 where the information in the perf.data file (which includes buildids) 129 where the information in the perf.data file (which includes buildids)
121 is sufficient. 130 is sufficient.
122 131
123 SEE ALSO 132 SEE ALSO
124 -------- 133 --------
125 linkperf:perf-stat[1], linkperf:perf-list[1] 134 linkperf:perf-stat[1], linkperf:perf-list[1]
126 135
tools/perf/Documentation/perf-report.txt
1 perf-report(1) 1 perf-report(1)
2 ============== 2 ==============
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-report - Read perf.data (created by perf record) and display the profile 6 perf-report - Read perf.data (created by perf record) and display the profile
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf report' [-i <file> | --input=file] 11 'perf report' [-i <file> | --input=file]
12 12
13 DESCRIPTION 13 DESCRIPTION
14 ----------- 14 -----------
15 This command displays the performance counter profile information recorded 15 This command displays the performance counter profile information recorded
16 via perf record. 16 via perf record.
17 17
18 OPTIONS 18 OPTIONS
19 ------- 19 -------
20 -i:: 20 -i::
21 --input=:: 21 --input=::
22 Input file name. (default: perf.data) 22 Input file name. (default: perf.data)
23
24 -v::
25 --verbose::
26 Be more verbose. (show symbol address, etc)
27
23 -d:: 28 -d::
24 --dsos=:: 29 --dsos=::
25 Only consider symbols in these dsos. CSV that understands 30 Only consider symbols in these dsos. CSV that understands
26 file://filename entries. 31 file://filename entries.
27 -n:: 32 -n::
28 --show-nr-samples:: 33 --show-nr-samples::
29 Show the number of samples for each symbol 34 Show the number of samples for each symbol
35
36 --showcpuutilization::
37 Show sample percentage for different cpu modes.
38
30 -T:: 39 -T::
31 --threads:: 40 --threads::
32 Show per-thread event counters 41 Show per-thread event counters
33 -C:: 42 -C::
34 --comms=:: 43 --comms=::
35 Only consider symbols in these comms. CSV that understands 44 Only consider symbols in these comms. CSV that understands
36 file://filename entries. 45 file://filename entries.
37 -S:: 46 -S::
38 --symbols=:: 47 --symbols=::
39 Only consider these symbols. CSV that understands 48 Only consider these symbols. CSV that understands
40 file://filename entries. 49 file://filename entries.
41 50
51 -U::
52 --hide-unresolved::
53 Only display entries resolved to a symbol.
54
42 -s:: 55 -s::
43 --sort=:: 56 --sort=::
44 Sort by key(s): pid, comm, dso, symbol, parent. 57 Sort by key(s): pid, comm, dso, symbol, parent.
45 58
59 -p::
60 --parent=<regex>::
61 regex filter to identify parent, see: '--sort parent'
62
63 -x::
64 --exclude-other::
65 Only display entries with parent-match.
66
46 -w:: 67 -w::
47 --field-width=:: 68 --column-widths=<width[,width...]>::
48 Force each column width to the provided list, for large terminal 69 Force each column width to the provided list, for large terminal
49 readability. 70 readability.
50 71
51 -t:: 72 -t::
52 --field-separator=:: 73 --field-separator=::
53 74
54 Use a special separator character and don't pad with spaces, replacing 75 Use a special separator character and don't pad with spaces, replacing
55 all occurances of this separator in symbol names (and other output) 76 all occurrences of this separator in symbol names (and other output)
56 with a '.' character, that thus it's the only non valid separator. 77 with a '.' character, that thus it's the only non valid separator.
57 78
79 -D::
80 --dump-raw-trace::
81 Dump raw trace in ASCII.
82
58 -g [type,min]:: 83 -g [type,min]::
59 --call-graph:: 84 --call-graph::
60 Display callchains using type and min percent threshold. 85 Display call chains using type and min percent threshold.
61 type can be either: 86 type can be either:
62 - flat: single column, linear exposure of callchains. 87 - flat: single column, linear exposure of call chains.
63 - graph: use a graph tree, displaying absolute overhead rates. 88 - graph: use a graph tree, displaying absolute overhead rates.
64 - fractal: like graph, but displays relative rates. Each branch of 89 - fractal: like graph, but displays relative rates. Each branch of
65 the tree is considered as a new profiled object. + 90 the tree is considered as a new profiled object. +
66 Default: fractal,0.5. 91 Default: fractal,0.5.
67 92
93 --pretty=<key>::
94 Pretty printing style. key: normal, raw
95
68 --stdio:: Use the stdio interface. 96 --stdio:: Use the stdio interface.
69 97
70 --tui:: Use the TUI interface, that is integrated with annotate and allows 98 --tui:: Use the TUI interface, that is integrated with annotate and allows
71 zooming into DSOs or threads, among other features. Use of --tui 99 zooming into DSOs or threads, among other features. Use of --tui
72 requires a tty, if one is not present, as when piping to other 100 requires a tty, if one is not present, as when piping to other
73 commands, the stdio interface is used. 101 commands, the stdio interface is used.
102
103 -k::
104 --vmlinux=<file>::
105 vmlinux pathname
106
107 -m::
108 --modules::
109 Load module symbols. WARNING: This should only be used with -k and
110 a LIVE kernel.
111
112 -f::
113 --force::
114 Don't complain, do it.
74 115
75 SEE ALSO 116 SEE ALSO
76 -------- 117 --------
77 linkperf:perf-stat[1] 118 linkperf:perf-stat[1]
78 119
tools/perf/Documentation/perf-sched.txt
1 perf-sched(1) 1 perf-sched(1)
2 ============== 2 ==============
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-sched - Tool to trace/measure scheduler properties (latencies) 6 perf-sched - Tool to trace/measure scheduler properties (latencies)
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf sched' {record|latency|replay|trace} 11 'perf sched' {record|latency|map|replay|trace}
12 12
13 DESCRIPTION 13 DESCRIPTION
14 ----------- 14 -----------
15 There are four variants of perf sched: 15 There are five variants of perf sched:
16 16
17 'perf sched record <command>' to record the scheduling events 17 'perf sched record <command>' to record the scheduling events
18 of an arbitrary workload. 18 of an arbitrary workload.
19 19
20 'perf sched latency' to report the per task scheduling latencies 20 'perf sched latency' to report the per task scheduling latencies
21 and other scheduling properties of the workload. 21 and other scheduling properties of the workload.
22 22
23 'perf sched trace' to see a detailed trace of the workload that 23 'perf sched trace' to see a detailed trace of the workload that
24 was recorded. 24 was recorded.
25 25
26 'perf sched replay' to simulate the workload that was recorded 26 'perf sched replay' to simulate the workload that was recorded
27 via perf sched record. (this is done by starting up mockup threads 27 via perf sched record. (this is done by starting up mockup threads
28 that mimic the workload based on the events in the trace. These 28 that mimic the workload based on the events in the trace. These
29 threads can then replay the timings (CPU runtime and sleep patterns) 29 threads can then replay the timings (CPU runtime and sleep patterns)
30 of the workload as it occurred when it was recorded - and can repeat 30 of the workload as it occurred when it was recorded - and can repeat
31 it a number of times, measuring its performance.) 31 it a number of times, measuring its performance.)
32 32
33 'perf sched map' to print a textual context-switching outline of
34 workload captured via perf sched record. Columns stand for
35 individual CPUs, and the two-letter shortcuts stand for tasks that
36 are running on a CPU. A '*' denotes the CPU that had the event, and
37 a dot signals an idle CPU.
38
33 OPTIONS 39 OPTIONS
34 ------- 40 -------
41 -i::
42 --input=<file>::
43 Input file name. (default: perf.data)
44
45 -v::
46 --verbose::
47 Be more verbose. (show symbol address, etc)
48
35 -D:: 49 -D::
36 --dump-raw-trace=:: 50 --dump-raw-trace=::
37 Display verbose dump of the sched data. 51 Display verbose dump of the sched data.
38 52
39 SEE ALSO 53 SEE ALSO
40 -------- 54 --------
41 linkperf:perf-record[1] 55 linkperf:perf-record[1]
42 56
tools/perf/Documentation/perf-script.txt
1 perf-script(1) 1 perf-script(1)
2 ============= 2 =============
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-script - Read perf.data (created by perf record) and display trace output 6 perf-script - Read perf.data (created by perf record) and display trace output
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf script' [<options>] 11 'perf script' [<options>]
12 'perf script' [<options>] record <script> [<record-options>] <command> 12 'perf script' [<options>] record <script> [<record-options>] <command>
13 'perf script' [<options>] report <script> [script-args] 13 'perf script' [<options>] report <script> [script-args]
14 'perf script' [<options>] <script> <required-script-args> [<record-options>] <command> 14 'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
15 'perf script' [<options>] <top-script> [script-args] 15 'perf script' [<options>] <top-script> [script-args]
16 16
17 DESCRIPTION 17 DESCRIPTION
18 ----------- 18 -----------
19 This command reads the input file and displays the trace recorded. 19 This command reads the input file and displays the trace recorded.
20 20
21 There are several variants of perf script: 21 There are several variants of perf script:
22 22
23 'perf script' to see a detailed trace of the workload that was 23 'perf script' to see a detailed trace of the workload that was
24 recorded. 24 recorded.
25 25
26 You can also run a set of pre-canned scripts that aggregate and 26 You can also run a set of pre-canned scripts that aggregate and
27 summarize the raw trace data in various ways (the list of scripts is 27 summarize the raw trace data in various ways (the list of scripts is
28 available via 'perf script -l'). The following variants allow you to 28 available via 'perf script -l'). The following variants allow you to
29 record and run those scripts: 29 record and run those scripts:
30 30
31 'perf script record <script> <command>' to record the events required 31 'perf script record <script> <command>' to record the events required
32 for 'perf script report'. <script> is the name displayed in the 32 for 'perf script report'. <script> is the name displayed in the
33 output of 'perf script --list' i.e. the actual script name minus any 33 output of 'perf script --list' i.e. the actual script name minus any
34 language extension. If <command> is not specified, the events are 34 language extension. If <command> is not specified, the events are
35 recorded using the -a (system-wide) 'perf record' option. 35 recorded using the -a (system-wide) 'perf record' option.
36 36
37 'perf script report <script> [args]' to run and display the results 37 'perf script report <script> [args]' to run and display the results
38 of <script>. <script> is the name displayed in the output of 'perf 38 of <script>. <script> is the name displayed in the output of 'perf
39 trace --list' i.e. the actual script name minus any language 39 trace --list' i.e. the actual script name minus any language
40 extension. The perf.data output from a previous run of 'perf script 40 extension. The perf.data output from a previous run of 'perf script
41 record <script>' is used and should be present for this command to 41 record <script>' is used and should be present for this command to
42 succeed. [args] refers to the (mainly optional) args expected by 42 succeed. [args] refers to the (mainly optional) args expected by
43 the script. 43 the script.
44 44
45 'perf script <script> <required-script-args> <command>' to both 45 'perf script <script> <required-script-args> <command>' to both
46 record the events required for <script> and to run the <script> 46 record the events required for <script> and to run the <script>
47 using 'live-mode' i.e. without writing anything to disk. <script> 47 using 'live-mode' i.e. without writing anything to disk. <script>
48 is the name displayed in the output of 'perf script --list' i.e. the 48 is the name displayed in the output of 'perf script --list' i.e. the
49 actual script name minus any language extension. If <command> is 49 actual script name minus any language extension. If <command> is
50 not specified, the events are recorded using the -a (system-wide) 50 not specified, the events are recorded using the -a (system-wide)
51 'perf record' option. If <script> has any required args, they 51 'perf record' option. If <script> has any required args, they
52 should be specified before <command>. This mode doesn't allow for 52 should be specified before <command>. This mode doesn't allow for
53 optional script args to be specified; if optional script args are 53 optional script args to be specified; if optional script args are
54 desired, they can be specified using separate 'perf script record' 54 desired, they can be specified using separate 'perf script record'
55 and 'perf script report' commands, with the stdout of the record step 55 and 'perf script report' commands, with the stdout of the record step
56 piped to the stdin of the report script, using the '-o -' and '-i -' 56 piped to the stdin of the report script, using the '-o -' and '-i -'
57 options of the corresponding commands. 57 options of the corresponding commands.
58 58
59 'perf script <top-script>' to both record the events required for 59 'perf script <top-script>' to both record the events required for
60 <top-script> and to run the <top-script> using 'live-mode' 60 <top-script> and to run the <top-script> using 'live-mode'
61 i.e. without writing anything to disk. <top-script> is the name 61 i.e. without writing anything to disk. <top-script> is the name
62 displayed in the output of 'perf script --list' i.e. the actual 62 displayed in the output of 'perf script --list' i.e. the actual
63 script name minus any language extension; a <top-script> is defined 63 script name minus any language extension; a <top-script> is defined
64 as any script name ending with the string 'top'. 64 as any script name ending with the string 'top'.
65 65
66 [<record-options>] can be passed to the record steps of 'perf script 66 [<record-options>] can be passed to the record steps of 'perf script
67 record' and 'live-mode' variants; this isn't possible however for 67 record' and 'live-mode' variants; this isn't possible however for
68 <top-script> 'live-mode' or 'perf script report' variants. 68 <top-script> 'live-mode' or 'perf script report' variants.
69 69
70 See the 'SEE ALSO' section for links to language-specific 70 See the 'SEE ALSO' section for links to language-specific
71 information on how to write and run your own trace scripts. 71 information on how to write and run your own trace scripts.
72 72
73 OPTIONS 73 OPTIONS
74 ------- 74 -------
75 <command>...:: 75 <command>...::
76 Any command you can specify in a shell. 76 Any command you can specify in a shell.
77 77
78 -D:: 78 -D::
79 --dump-raw-script=:: 79 --dump-raw-script=::
80 Display verbose dump of the trace data. 80 Display verbose dump of the trace data.
81 81
82 -L:: 82 -L::
83 --Latency=:: 83 --Latency=::
84 Show latency attributes (irqs/preemption disabled, etc). 84 Show latency attributes (irqs/preemption disabled, etc).
85 85
86 -l:: 86 -l::
87 --list=:: 87 --list=::
88 Display a list of available trace scripts. 88 Display a list of available trace scripts.
89 89
90 -s ['lang']:: 90 -s ['lang']::
91 --script=:: 91 --script=::
92 Process trace data with the given script ([lang]:script[.ext]). 92 Process trace data with the given script ([lang]:script[.ext]).
93 If the string 'lang' is specified in place of a script name, a 93 If the string 'lang' is specified in place of a script name, a
94 list of supported languages will be displayed instead. 94 list of supported languages will be displayed instead.
95 95
96 -g:: 96 -g::
97 --gen-script=:: 97 --gen-script=::
98 Generate perf-script.[ext] starter script for given language, 98 Generate perf-script.[ext] starter script for given language,
99 using current perf.data. 99 using current perf.data.
100 100
101 -a:: 101 -a::
102 Force system-wide collection. Scripts run without a <command> 102 Force system-wide collection. Scripts run without a <command>
103 normally use -a by default, while scripts run with a <command> 103 normally use -a by default, while scripts run with a <command>
104 normally don't - this option allows the latter to be run in 104 normally don't - this option allows the latter to be run in
105 system-wide mode. 105 system-wide mode.
106 106
107 -i::
108 --input=::
109 Input file name.
110
111 -d::
112 --debug-mode::
113 Do various checks like samples ordering and lost events.
107 114
108 SEE ALSO 115 SEE ALSO
109 -------- 116 --------
110 linkperf:perf-record[1], linkperf:perf-script-perl[1], 117 linkperf:perf-record[1], linkperf:perf-script-perl[1],
111 linkperf:perf-script-python[1] 118 linkperf:perf-script-python[1]
112 119
tools/perf/Documentation/perf-stat.txt
1 perf-stat(1) 1 perf-stat(1)
2 ============ 2 ============
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-stat - Run a command and gather performance counter statistics 6 perf-stat - Run a command and gather performance counter statistics
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> 11 'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
12 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] 12 'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
13 13
14 DESCRIPTION 14 DESCRIPTION
15 ----------- 15 -----------
16 This command runs a command and gathers performance counter statistics 16 This command runs a command and gathers performance counter statistics
17 from it. 17 from it.
18 18
19 19
20 OPTIONS 20 OPTIONS
21 ------- 21 -------
22 <command>...:: 22 <command>...::
23 Any command you can specify in a shell. 23 Any command you can specify in a shell.
24 24
25 25
26 -e:: 26 -e::
27 --event=:: 27 --event=::
28 Select the PMU event. Selection can be a symbolic event name 28 Select the PMU event. Selection can be a symbolic event name
29 (use 'perf list' to list all events) or a raw PMU 29 (use 'perf list' to list all events) or a raw PMU
30 event (eventsel+umask) in the form of rNNN where NNN is a 30 event (eventsel+umask) in the form of rNNN where NNN is a
31 hexadecimal event descriptor. 31 hexadecimal event descriptor.
32 32
33 -i:: 33 -i::
34 --no-inherit:: 34 --no-inherit::
35 child tasks do not inherit counters 35 child tasks do not inherit counters
36 -p:: 36 -p::
37 --pid=<pid>:: 37 --pid=<pid>::
38 stat events on existing pid 38 stat events on existing process id
39 39
40 -t::
41 --tid=<tid>::
42 stat events on existing thread id
43
44
40 -a:: 45 -a::
41 system-wide collection 46 --all-cpus::
47 system-wide collection from all CPUs
42 48
43 -c:: 49 -c::
44 scale counter values 50 --scale::
51 scale/normalize counter values
45 52
53 -r::
54 --repeat=<n>::
55 repeat command and print average + stddev (max: 100)
56
46 -B:: 57 -B::
58 --big-num::
47 print large numbers with thousands' separators according to locale 59 print large numbers with thousands' separators according to locale
48 60
49 -C:: 61 -C::
50 --cpu=:: 62 --cpu=::
51 Count only on the list of cpus provided. Multiple CPUs can be provided as a 63 Count only on the list of CPUs provided. Multiple CPUs can be provided as a
52 comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 64 comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
53 In per-thread mode, this option is ignored. The -a option is still necessary 65 In per-thread mode, this option is ignored. The -a option is still necessary
54 to activate system-wide monitoring. Default is to count on all CPUs. 66 to activate system-wide monitoring. Default is to count on all CPUs.
55 67
56 -A:: 68 -A::
57 --no-aggr:: 69 --no-aggr::
58 Do not aggregate counts across all monitored CPUs in system-wide mode (-a). 70 Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
59 This option is only valid in system-wide mode. 71 This option is only valid in system-wide mode.
72
73 -n::
74 --null::
75 null run - don't start any counters
76
77 -v::
78 --verbose::
79 be more verbose (show counter open errors, etc)
80
81 -x SEP::
82 --field-separator SEP::
83 print counts using a CSV-style output to make it easy to import directly into
84 spreadsheets. Columns are separated by the string specified in SEP.
60 85
61 EXAMPLES 86 EXAMPLES
62 -------- 87 --------
63 88
64 $ perf stat -- make -j 89 $ perf stat -- make -j
65 90
66 Performance counter stats for 'make -j': 91 Performance counter stats for 'make -j':
67 92
68 8117.370256 task clock ticks # 11.281 CPU utilization factor 93 8117.370256 task clock ticks # 11.281 CPU utilization factor
69 678 context switches # 0.000 M/sec 94 678 context switches # 0.000 M/sec
70 133 CPU migrations # 0.000 M/sec 95 133 CPU migrations # 0.000 M/sec
71 235724 pagefaults # 0.029 M/sec 96 235724 pagefaults # 0.029 M/sec
72 24821162526 CPU cycles # 3057.784 M/sec 97 24821162526 CPU cycles # 3057.784 M/sec
73 18687303457 instructions # 2302.138 M/sec 98 18687303457 instructions # 2302.138 M/sec
74 172158895 cache references # 21.209 M/sec 99 172158895 cache references # 21.209 M/sec
75 27075259 cache misses # 3.335 M/sec 100 27075259 cache misses # 3.335 M/sec
76 101
77 Wall-clock time elapsed: 719.554352 msecs 102 Wall-clock time elapsed: 719.554352 msecs
78 103
79 SEE ALSO 104 SEE ALSO
80 -------- 105 --------
81 linkperf:perf-top[1], linkperf:perf-list[1] 106 linkperf:perf-top[1], linkperf:perf-list[1]
82 107
tools/perf/Documentation/perf-test.txt
1 perf-test(1) 1 perf-test(1)
2 ============ 2 ============
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-test - Runs sanity tests. 6 perf-test - Runs sanity tests.
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf test <options>' 11 'perf test <options>'
12 12
13 DESCRIPTION 13 DESCRIPTION
14 ----------- 14 -----------
15 This command does assorted sanity tests, initially thru linked routines but 15 This command does assorted sanity tests, initially through linked routines but
16 also will look for a directory with more tests in the form of scripts. 16 also will look for a directory with more tests in the form of scripts.
17 17
18 OPTIONS 18 OPTIONS
19 ------- 19 -------
20 -v:: 20 -v::
21 --verbose:: 21 --verbose::
22 Be more verbose. 22 Be more verbose.
23 23
tools/perf/Documentation/perf-top.txt
1 perf-top(1) 1 perf-top(1)
2 =========== 2 ===========
3 3
4 NAME 4 NAME
5 ---- 5 ----
6 perf-top - System profiling tool. 6 perf-top - System profiling tool.
7 7
8 SYNOPSIS 8 SYNOPSIS
9 -------- 9 --------
10 [verse] 10 [verse]
11 'perf top' [-e <EVENT> | --event=EVENT] [<options>] 11 'perf top' [-e <EVENT> | --event=EVENT] [<options>]
12 12
13 DESCRIPTION 13 DESCRIPTION
14 ----------- 14 -----------
15 This command generates and displays a performance counter profile in realtime. 15 This command generates and displays a performance counter profile in real time.
16 16
17 17
18 OPTIONS 18 OPTIONS
19 ------- 19 -------
20 -a:: 20 -a::
21 --all-cpus:: 21 --all-cpus::
22 System-wide collection. (default) 22 System-wide collection. (default)
23 23
24 -c <count>:: 24 -c <count>::
25 --count=<count>:: 25 --count=<count>::
26 Event period to sample. 26 Event period to sample.
27 27
28 -C <cpu-list>:: 28 -C <cpu-list>::
29 --cpu=<cpu>:: 29 --cpu=<cpu>::
30 Monitor only on the list of cpus provided. Multiple CPUs can be provided as a 30 Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
31 comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 31 comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
32 Default is to monitor all CPUS. 32 Default is to monitor all CPUS.
33 33
34 -d <seconds>:: 34 -d <seconds>::
35 --delay=<seconds>:: 35 --delay=<seconds>::
36 Number of seconds to delay between refreshes. 36 Number of seconds to delay between refreshes.
37 37
38 -e <event>:: 38 -e <event>::
39 --event=<event>:: 39 --event=<event>::
40 Select the PMU event. Selection can be a symbolic event name 40 Select the PMU event. Selection can be a symbolic event name
41 (use 'perf list' to list all events) or a raw PMU 41 (use 'perf list' to list all events) or a raw PMU
42 event (eventsel+umask) in the form of rNNN where NNN is a 42 event (eventsel+umask) in the form of rNNN where NNN is a
43 hexadecimal event descriptor. 43 hexadecimal event descriptor.
44 44
45 -E <entries>:: 45 -E <entries>::
46 --entries=<entries>:: 46 --entries=<entries>::
47 Display this many functions. 47 Display this many functions.
48 48
49 -f <count>:: 49 -f <count>::
50 --count-filter=<count>:: 50 --count-filter=<count>::
51 Only display functions with more events than this. 51 Only display functions with more events than this.
52 52
53 -g::
54 --group::
55 Put the counters into a counter group.
56
53 -F <freq>:: 57 -F <freq>::
54 --freq=<freq>:: 58 --freq=<freq>::
55 Profile at this frequency. 59 Profile at this frequency.
56 60
57 -i:: 61 -i::
58 --inherit:: 62 --inherit::
59 Child tasks inherit counters, only makes sens with -p option. 63 Child tasks inherit counters, only makes sens with -p option.
60 64
61 -k <path>:: 65 -k <path>::
62 --vmlinux=<path>:: 66 --vmlinux=<path>::
63 Path to vmlinux. Required for annotation functionality. 67 Path to vmlinux. Required for annotation functionality.
64 68
65 -m <pages>:: 69 -m <pages>::
66 --mmap-pages=<pages>:: 70 --mmap-pages=<pages>::
67 Number of mmapped data pages. 71 Number of mmapped data pages.
68 72
69 -p <pid>:: 73 -p <pid>::
70 --pid=<pid>:: 74 --pid=<pid>::
71 Profile events on existing pid. 75 Profile events on existing Process ID.
72 76
77 -t <tid>::
78 --tid=<tid>::
79 Profile events on existing thread ID.
80
73 -r <priority>:: 81 -r <priority>::
74 --realtime=<priority>:: 82 --realtime=<priority>::
75 Collect data with this RT SCHED_FIFO priority. 83 Collect data with this RT SCHED_FIFO priority.
76 84
77 -s <symbol>:: 85 -s <symbol>::
78 --sym-annotate=<symbol>:: 86 --sym-annotate=<symbol>::
79 Annotate this symbol. 87 Annotate this symbol.
88
89 -K::
90 --hide_kernel_symbols::
91 Hide kernel symbols.
92
93 -U::
94 --hide_user_symbols::
95 Hide user symbols.
96
97 -D::
98 --dump-symtab::
99 Dump the symbol table used for profiling.
80 100
81 -v:: 101 -v::
82 --verbose:: 102 --verbose::
83 Be more verbose (show counter open errors, etc). 103 Be more verbose (show counter open errors, etc).
84 104
85 -z:: 105 -z::
86 --zero:: 106 --zero::
87 Zero history across display updates. 107 Zero history across display updates.
88 108
89 INTERACTIVE PROMPTING KEYS 109 INTERACTIVE PROMPTING KEYS
90 -------------------------- 110 --------------------------
91 111
92 [d]:: 112 [d]::
93 Display refresh delay. 113 Display refresh delay.
94 114
95 [e]:: 115 [e]::
96 Number of entries to display. 116 Number of entries to display.
97 117
98 [E]:: 118 [E]::
99 Event to display when multiple counters are active. 119 Event to display when multiple counters are active.
100 120
101 [f]:: 121 [f]::
102 Profile display filter (>= hit count). 122 Profile display filter (>= hit count).
103 123
104 [F]:: 124 [F]::
105 Annotation display filter (>= % of total). 125 Annotation display filter (>= % of total).
106 126
107 [s]:: 127 [s]::
108 Annotate symbol. 128 Annotate symbol.
109 129
110 [S]:: 130 [S]::
111 Stop annotation, return to full profile display. 131 Stop annotation, return to full profile display.
112 132
113 [w]:: 133 [w]::
114 Toggle between weighted sum and individual count[E]r profile. 134 Toggle between weighted sum and individual count[E]r profile.
115 135
116 [z]:: 136 [z]::
117 Toggle event count zeroing across display updates. 137 Toggle event count zeroing across display updates.
118 138
119 [qQ]:: 139 [qQ]::
120 Quit. 140 Quit.
121 141
122 Pressing any unmapped key displays a menu, and prompts for input. 142 Pressing any unmapped key displays a menu, and prompts for input.
123 143
124 144
125 SEE ALSO 145 SEE ALSO
126 -------- 146 --------
127 linkperf:perf-stat[1], linkperf:perf-list[1] 147 linkperf:perf-stat[1], linkperf:perf-list[1]
128 148
tools/perf/builtin-diff.c
1 /* 1 /*
2 * builtin-diff.c 2 * builtin-diff.c
3 * 3 *
4 * Builtin diff command: Analyze two perf.data input files, look up and read 4 * Builtin diff command: Analyze two perf.data input files, look up and read
5 * DSOs and symbol information, sort them and produce a diff. 5 * DSOs and symbol information, sort them and produce a diff.
6 */ 6 */
7 #include "builtin.h" 7 #include "builtin.h"
8 8
9 #include "util/debug.h" 9 #include "util/debug.h"
10 #include "util/event.h" 10 #include "util/event.h"
11 #include "util/hist.h" 11 #include "util/hist.h"
12 #include "util/session.h" 12 #include "util/session.h"
13 #include "util/sort.h" 13 #include "util/sort.h"
14 #include "util/symbol.h" 14 #include "util/symbol.h"
15 #include "util/util.h" 15 #include "util/util.h"
16 16
17 #include <stdlib.h> 17 #include <stdlib.h>
18 18
19 static char const *input_old = "perf.data.old", 19 static char const *input_old = "perf.data.old",
20 *input_new = "perf.data"; 20 *input_new = "perf.data";
21 static char diff__default_sort_order[] = "dso,symbol"; 21 static char diff__default_sort_order[] = "dso,symbol";
22 static bool force; 22 static bool force;
23 static bool show_displacement; 23 static bool show_displacement;
24 24
25 static int hists__add_entry(struct hists *self, 25 static int hists__add_entry(struct hists *self,
26 struct addr_location *al, u64 period) 26 struct addr_location *al, u64 period)
27 { 27 {
28 if (__hists__add_entry(self, al, NULL, period) != NULL) 28 if (__hists__add_entry(self, al, NULL, period) != NULL)
29 return 0; 29 return 0;
30 return -ENOMEM; 30 return -ENOMEM;
31 } 31 }
32 32
33 static int diff__process_sample_event(event_t *event, struct perf_session *session) 33 static int diff__process_sample_event(event_t *event, struct perf_session *session)
34 { 34 {
35 struct addr_location al; 35 struct addr_location al;
36 struct sample_data data = { .period = 1, }; 36 struct sample_data data = { .period = 1, };
37 37
38 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 38 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
39 pr_warning("problem processing %d event, skipping it.\n", 39 pr_warning("problem processing %d event, skipping it.\n",
40 event->header.type); 40 event->header.type);
41 return -1; 41 return -1;
42 } 42 }
43 43
44 if (al.filtered || al.sym == NULL) 44 if (al.filtered || al.sym == NULL)
45 return 0; 45 return 0;
46 46
47 if (hists__add_entry(&session->hists, &al, data.period)) { 47 if (hists__add_entry(&session->hists, &al, data.period)) {
48 pr_warning("problem incrementing symbol period, skipping event\n"); 48 pr_warning("problem incrementing symbol period, skipping event\n");
49 return -1; 49 return -1;
50 } 50 }
51 51
52 session->hists.stats.total_period += data.period; 52 session->hists.stats.total_period += data.period;
53 return 0; 53 return 0;
54 } 54 }
55 55
56 static struct perf_event_ops event_ops = { 56 static struct perf_event_ops event_ops = {
57 .sample = diff__process_sample_event, 57 .sample = diff__process_sample_event,
58 .mmap = event__process_mmap, 58 .mmap = event__process_mmap,
59 .comm = event__process_comm, 59 .comm = event__process_comm,
60 .exit = event__process_task, 60 .exit = event__process_task,
61 .fork = event__process_task, 61 .fork = event__process_task,
62 .lost = event__process_lost, 62 .lost = event__process_lost,
63 }; 63 };
64 64
65 static void perf_session__insert_hist_entry_by_name(struct rb_root *root, 65 static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
66 struct hist_entry *he) 66 struct hist_entry *he)
67 { 67 {
68 struct rb_node **p = &root->rb_node; 68 struct rb_node **p = &root->rb_node;
69 struct rb_node *parent = NULL; 69 struct rb_node *parent = NULL;
70 struct hist_entry *iter; 70 struct hist_entry *iter;
71 71
72 while (*p != NULL) { 72 while (*p != NULL) {
73 parent = *p; 73 parent = *p;
74 iter = rb_entry(parent, struct hist_entry, rb_node); 74 iter = rb_entry(parent, struct hist_entry, rb_node);
75 if (hist_entry__cmp(he, iter) < 0) 75 if (hist_entry__cmp(he, iter) < 0)
76 p = &(*p)->rb_left; 76 p = &(*p)->rb_left;
77 else 77 else
78 p = &(*p)->rb_right; 78 p = &(*p)->rb_right;
79 } 79 }
80 80
81 rb_link_node(&he->rb_node, parent, p); 81 rb_link_node(&he->rb_node, parent, p);
82 rb_insert_color(&he->rb_node, root); 82 rb_insert_color(&he->rb_node, root);
83 } 83 }
84 84
85 static void hists__resort_entries(struct hists *self) 85 static void hists__resort_entries(struct hists *self)
86 { 86 {
87 unsigned long position = 1; 87 unsigned long position = 1;
88 struct rb_root tmp = RB_ROOT; 88 struct rb_root tmp = RB_ROOT;
89 struct rb_node *next = rb_first(&self->entries); 89 struct rb_node *next = rb_first(&self->entries);
90 90
91 while (next != NULL) { 91 while (next != NULL) {
92 struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node); 92 struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
93 93
94 next = rb_next(&n->rb_node); 94 next = rb_next(&n->rb_node);
95 rb_erase(&n->rb_node, &self->entries); 95 rb_erase(&n->rb_node, &self->entries);
96 n->position = position++; 96 n->position = position++;
97 perf_session__insert_hist_entry_by_name(&tmp, n); 97 perf_session__insert_hist_entry_by_name(&tmp, n);
98 } 98 }
99 99
100 self->entries = tmp; 100 self->entries = tmp;
101 } 101 }
102 102
103 static void hists__set_positions(struct hists *self) 103 static void hists__set_positions(struct hists *self)
104 { 104 {
105 hists__output_resort(self); 105 hists__output_resort(self);
106 hists__resort_entries(self); 106 hists__resort_entries(self);
107 } 107 }
108 108
109 static struct hist_entry *hists__find_entry(struct hists *self, 109 static struct hist_entry *hists__find_entry(struct hists *self,
110 struct hist_entry *he) 110 struct hist_entry *he)
111 { 111 {
112 struct rb_node *n = self->entries.rb_node; 112 struct rb_node *n = self->entries.rb_node;
113 113
114 while (n) { 114 while (n) {
115 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node); 115 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
116 int64_t cmp = hist_entry__cmp(he, iter); 116 int64_t cmp = hist_entry__cmp(he, iter);
117 117
118 if (cmp < 0) 118 if (cmp < 0)
119 n = n->rb_left; 119 n = n->rb_left;
120 else if (cmp > 0) 120 else if (cmp > 0)
121 n = n->rb_right; 121 n = n->rb_right;
122 else 122 else
123 return iter; 123 return iter;
124 } 124 }
125 125
126 return NULL; 126 return NULL;
127 } 127 }
128 128
129 static void hists__match(struct hists *older, struct hists *newer) 129 static void hists__match(struct hists *older, struct hists *newer)
130 { 130 {
131 struct rb_node *nd; 131 struct rb_node *nd;
132 132
133 for (nd = rb_first(&newer->entries); nd; nd = rb_next(nd)) { 133 for (nd = rb_first(&newer->entries); nd; nd = rb_next(nd)) {
134 struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node); 134 struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
135 pos->pair = hists__find_entry(older, pos); 135 pos->pair = hists__find_entry(older, pos);
136 } 136 }
137 } 137 }
138 138
139 static int __cmd_diff(void) 139 static int __cmd_diff(void)
140 { 140 {
141 int ret, i; 141 int ret, i;
142 struct perf_session *session[2]; 142 struct perf_session *session[2];
143 143
144 session[0] = perf_session__new(input_old, O_RDONLY, force, false); 144 session[0] = perf_session__new(input_old, O_RDONLY, force, false);
145 session[1] = perf_session__new(input_new, O_RDONLY, force, false); 145 session[1] = perf_session__new(input_new, O_RDONLY, force, false);
146 if (session[0] == NULL || session[1] == NULL) 146 if (session[0] == NULL || session[1] == NULL)
147 return -ENOMEM; 147 return -ENOMEM;
148 148
149 for (i = 0; i < 2; ++i) { 149 for (i = 0; i < 2; ++i) {
150 ret = perf_session__process_events(session[i], &event_ops); 150 ret = perf_session__process_events(session[i], &event_ops);
151 if (ret) 151 if (ret)
152 goto out_delete; 152 goto out_delete;
153 } 153 }
154 154
155 hists__output_resort(&session[1]->hists); 155 hists__output_resort(&session[1]->hists);
156 if (show_displacement) 156 if (show_displacement)
157 hists__set_positions(&session[0]->hists); 157 hists__set_positions(&session[0]->hists);
158 158
159 hists__match(&session[0]->hists, &session[1]->hists); 159 hists__match(&session[0]->hists, &session[1]->hists);
160 hists__fprintf(&session[1]->hists, &session[0]->hists, 160 hists__fprintf(&session[1]->hists, &session[0]->hists,
161 show_displacement, stdout); 161 show_displacement, stdout);
162 out_delete: 162 out_delete:
163 for (i = 0; i < 2; ++i) 163 for (i = 0; i < 2; ++i)
164 perf_session__delete(session[i]); 164 perf_session__delete(session[i]);
165 return ret; 165 return ret;
166 } 166 }
167 167
168 static const char * const diff_usage[] = { 168 static const char * const diff_usage[] = {
169 "perf diff [<options>] [old_file] [new_file]", 169 "perf diff [<options>] [old_file] [new_file]",
170 NULL, 170 NULL,
171 }; 171 };
172 172
173 static const struct option options[] = { 173 static const struct option options[] = {
174 OPT_INCR('v', "verbose", &verbose, 174 OPT_INCR('v', "verbose", &verbose,
175 "be more verbose (show symbol address, etc)"), 175 "be more verbose (show symbol address, etc)"),
176 OPT_BOOLEAN('m', "displacement", &show_displacement, 176 OPT_BOOLEAN('M', "displacement", &show_displacement,
177 "Show position displacement relative to baseline"), 177 "Show position displacement relative to baseline"),
178 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 178 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
179 "dump raw trace in ASCII"), 179 "dump raw trace in ASCII"),
180 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 180 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
181 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 181 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
182 "load module symbols - WARNING: use only with -k and LIVE kernel"), 182 "load module symbols - WARNING: use only with -k and LIVE kernel"),
183 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 183 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
184 "only consider symbols in these dsos"), 184 "only consider symbols in these dsos"),
185 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", 185 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
186 "only consider symbols in these comms"), 186 "only consider symbols in these comms"),
187 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 187 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
188 "only consider these symbols"), 188 "only consider these symbols"),
189 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 189 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
190 "sort by key(s): pid, comm, dso, symbol, parent"), 190 "sort by key(s): pid, comm, dso, symbol, parent"),
191 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", 191 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
192 "separator for columns, no spaces will be added between " 192 "separator for columns, no spaces will be added between "
193 "columns '.' is reserved."), 193 "columns '.' is reserved."),
194 OPT_END() 194 OPT_END()
195 }; 195 };
196 196
197 int cmd_diff(int argc, const char **argv, const char *prefix __used) 197 int cmd_diff(int argc, const char **argv, const char *prefix __used)
198 { 198 {
199 sort_order = diff__default_sort_order; 199 sort_order = diff__default_sort_order;
200 argc = parse_options(argc, argv, options, diff_usage, 0); 200 argc = parse_options(argc, argv, options, diff_usage, 0);
201 if (argc) { 201 if (argc) {
202 if (argc > 2) 202 if (argc > 2)
203 usage_with_options(diff_usage, options); 203 usage_with_options(diff_usage, options);
204 if (argc == 2) { 204 if (argc == 2) {
205 input_old = argv[0]; 205 input_old = argv[0];
206 input_new = argv[1]; 206 input_new = argv[1];
207 } else 207 } else
208 input_new = argv[0]; 208 input_new = argv[0];
209 } else if (symbol_conf.default_guest_vmlinux_name || 209 } else if (symbol_conf.default_guest_vmlinux_name ||
210 symbol_conf.default_guest_kallsyms) { 210 symbol_conf.default_guest_kallsyms) {
211 input_old = "perf.data.host"; 211 input_old = "perf.data.host";
212 input_new = "perf.data.guest"; 212 input_new = "perf.data.guest";
213 } 213 }
214 214
215 symbol_conf.exclude_other = false; 215 symbol_conf.exclude_other = false;
216 if (symbol__init() < 0) 216 if (symbol__init() < 0)
217 return -1; 217 return -1;
218 218
219 setup_sorting(diff_usage, options); 219 setup_sorting(diff_usage, options);
220 setup_pager(); 220 setup_pager();
221 221
222 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL); 222 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL);
223 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", NULL); 223 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", NULL);
224 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", NULL); 224 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", NULL);
225 225
226 return __cmd_diff(); 226 return __cmd_diff();
227 } 227 }
228 228
tools/perf/builtin-stat.c
1 /* 1 /*
2 * builtin-stat.c 2 * builtin-stat.c
3 * 3 *
4 * Builtin stat command: Give a precise performance counters summary 4 * Builtin stat command: Give a precise performance counters summary
5 * overview about any workload, CPU or specific PID. 5 * overview about any workload, CPU or specific PID.
6 * 6 *
7 * Sample output: 7 * Sample output:
8 8
9 $ perf stat ~/hackbench 10 9 $ perf stat ~/hackbench 10
10 Time: 0.104 10 Time: 0.104
11 11
12 Performance counter stats for '/home/mingo/hackbench': 12 Performance counter stats for '/home/mingo/hackbench':
13 13
14 1255.538611 task clock ticks # 10.143 CPU utilization factor 14 1255.538611 task clock ticks # 10.143 CPU utilization factor
15 54011 context switches # 0.043 M/sec 15 54011 context switches # 0.043 M/sec
16 385 CPU migrations # 0.000 M/sec 16 385 CPU migrations # 0.000 M/sec
17 17755 pagefaults # 0.014 M/sec 17 17755 pagefaults # 0.014 M/sec
18 3808323185 CPU cycles # 3033.219 M/sec 18 3808323185 CPU cycles # 3033.219 M/sec
19 1575111190 instructions # 1254.530 M/sec 19 1575111190 instructions # 1254.530 M/sec
20 17367895 cache references # 13.833 M/sec 20 17367895 cache references # 13.833 M/sec
21 7674421 cache misses # 6.112 M/sec 21 7674421 cache misses # 6.112 M/sec
22 22
23 Wall-clock time elapsed: 123.786620 msecs 23 Wall-clock time elapsed: 123.786620 msecs
24 24
25 * 25 *
26 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 26 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
27 * 27 *
28 * Improvements and fixes by: 28 * Improvements and fixes by:
29 * 29 *
30 * Arjan van de Ven <arjan@linux.intel.com> 30 * Arjan van de Ven <arjan@linux.intel.com>
31 * Yanmin Zhang <yanmin.zhang@intel.com> 31 * Yanmin Zhang <yanmin.zhang@intel.com>
32 * Wu Fengguang <fengguang.wu@intel.com> 32 * Wu Fengguang <fengguang.wu@intel.com>
33 * Mike Galbraith <efault@gmx.de> 33 * Mike Galbraith <efault@gmx.de>
34 * Paul Mackerras <paulus@samba.org> 34 * Paul Mackerras <paulus@samba.org>
35 * Jaswinder Singh Rajput <jaswinder@kernel.org> 35 * Jaswinder Singh Rajput <jaswinder@kernel.org>
36 * 36 *
37 * Released under the GPL v2. (and only v2, not any later version) 37 * Released under the GPL v2. (and only v2, not any later version)
38 */ 38 */
39 39
40 #include "perf.h" 40 #include "perf.h"
41 #include "builtin.h" 41 #include "builtin.h"
42 #include "util/util.h" 42 #include "util/util.h"
43 #include "util/parse-options.h" 43 #include "util/parse-options.h"
44 #include "util/parse-events.h" 44 #include "util/parse-events.h"
45 #include "util/event.h" 45 #include "util/event.h"
46 #include "util/debug.h" 46 #include "util/debug.h"
47 #include "util/header.h" 47 #include "util/header.h"
48 #include "util/cpumap.h" 48 #include "util/cpumap.h"
49 #include "util/thread.h" 49 #include "util/thread.h"
50 50
51 #include <sys/prctl.h> 51 #include <sys/prctl.h>
52 #include <math.h> 52 #include <math.h>
53 #include <locale.h> 53 #include <locale.h>
54 54
55 #define DEFAULT_SEPARATOR " "
56
55 static struct perf_event_attr default_attrs[] = { 57 static struct perf_event_attr default_attrs[] = {
56 58
57 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 59 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
58 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 60 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
59 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 61 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
60 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 62 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
61 63
62 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 64 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
63 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 65 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
64 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 66 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
65 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 67 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
66 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, 68 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES },
67 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, 69 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
68 70
69 }; 71 };
70 72
71 static bool system_wide = false; 73 static bool system_wide = false;
72 static int nr_cpus = 0; 74 static int nr_cpus = 0;
73 static int run_idx = 0; 75 static int run_idx = 0;
74 76
75 static int run_count = 1; 77 static int run_count = 1;
76 static bool no_inherit = false; 78 static bool no_inherit = false;
77 static bool scale = true; 79 static bool scale = true;
78 static bool no_aggr = false; 80 static bool no_aggr = false;
79 static pid_t target_pid = -1; 81 static pid_t target_pid = -1;
80 static pid_t target_tid = -1; 82 static pid_t target_tid = -1;
81 static pid_t *all_tids = NULL; 83 static pid_t *all_tids = NULL;
82 static int thread_num = 0; 84 static int thread_num = 0;
83 static pid_t child_pid = -1; 85 static pid_t child_pid = -1;
84 static bool null_run = false; 86 static bool null_run = false;
85 static bool big_num = false; 87 static bool big_num = true;
88 static int big_num_opt = -1;
86 static const char *cpu_list; 89 static const char *cpu_list;
90 static const char *csv_sep = NULL;
91 static bool csv_output = false;
87 92
88 93
89 static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 94 static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
90 95
91 static int event_scaled[MAX_COUNTERS]; 96 static int event_scaled[MAX_COUNTERS];
92 97
93 static struct { 98 static struct {
94 u64 val; 99 u64 val;
95 u64 ena; 100 u64 ena;
96 u64 run; 101 u64 run;
97 } cpu_counts[MAX_NR_CPUS][MAX_COUNTERS]; 102 } cpu_counts[MAX_NR_CPUS][MAX_COUNTERS];
98 103
99 static volatile int done = 0; 104 static volatile int done = 0;
100 105
101 struct stats 106 struct stats
102 { 107 {
103 double n, mean, M2; 108 double n, mean, M2;
104 }; 109 };
105 110
106 static void update_stats(struct stats *stats, u64 val) 111 static void update_stats(struct stats *stats, u64 val)
107 { 112 {
108 double delta; 113 double delta;
109 114
110 stats->n++; 115 stats->n++;
111 delta = val - stats->mean; 116 delta = val - stats->mean;
112 stats->mean += delta / stats->n; 117 stats->mean += delta / stats->n;
113 stats->M2 += delta*(val - stats->mean); 118 stats->M2 += delta*(val - stats->mean);
114 } 119 }
115 120
116 static double avg_stats(struct stats *stats) 121 static double avg_stats(struct stats *stats)
117 { 122 {
118 return stats->mean; 123 return stats->mean;
119 } 124 }
120 125
121 /* 126 /*
122 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 127 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
123 * 128 *
124 * (\Sum n_i^2) - ((\Sum n_i)^2)/n 129 * (\Sum n_i^2) - ((\Sum n_i)^2)/n
125 * s^2 = ------------------------------- 130 * s^2 = -------------------------------
126 * n - 1 131 * n - 1
127 * 132 *
128 * http://en.wikipedia.org/wiki/Stddev 133 * http://en.wikipedia.org/wiki/Stddev
129 * 134 *
130 * The std dev of the mean is related to the std dev by: 135 * The std dev of the mean is related to the std dev by:
131 * 136 *
132 * s 137 * s
133 * s_mean = ------- 138 * s_mean = -------
134 * sqrt(n) 139 * sqrt(n)
135 * 140 *
136 */ 141 */
137 static double stddev_stats(struct stats *stats) 142 static double stddev_stats(struct stats *stats)
138 { 143 {
139 double variance = stats->M2 / (stats->n - 1); 144 double variance = stats->M2 / (stats->n - 1);
140 double variance_mean = variance / stats->n; 145 double variance_mean = variance / stats->n;
141 146
142 return sqrt(variance_mean); 147 return sqrt(variance_mean);
143 } 148 }
144 149
145 struct stats event_res_stats[MAX_COUNTERS][3]; 150 struct stats event_res_stats[MAX_COUNTERS][3];
146 struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 151 struct stats runtime_nsecs_stats[MAX_NR_CPUS];
147 struct stats runtime_cycles_stats[MAX_NR_CPUS]; 152 struct stats runtime_cycles_stats[MAX_NR_CPUS];
148 struct stats runtime_branches_stats[MAX_NR_CPUS]; 153 struct stats runtime_branches_stats[MAX_NR_CPUS];
149 struct stats walltime_nsecs_stats; 154 struct stats walltime_nsecs_stats;
150 155
151 #define MATCH_EVENT(t, c, counter) \ 156 #define MATCH_EVENT(t, c, counter) \
152 (attrs[counter].type == PERF_TYPE_##t && \ 157 (attrs[counter].type == PERF_TYPE_##t && \
153 attrs[counter].config == PERF_COUNT_##c) 158 attrs[counter].config == PERF_COUNT_##c)
154 159
155 #define ERR_PERF_OPEN \ 160 #define ERR_PERF_OPEN \
156 "counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information." 161 "counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information."
157 162
158 static int create_perf_stat_counter(int counter, bool *perm_err) 163 static int create_perf_stat_counter(int counter, bool *perm_err)
159 { 164 {
160 struct perf_event_attr *attr = attrs + counter; 165 struct perf_event_attr *attr = attrs + counter;
161 int thread; 166 int thread;
162 int ncreated = 0; 167 int ncreated = 0;
163 168
164 if (scale) 169 if (scale)
165 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 170 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
166 PERF_FORMAT_TOTAL_TIME_RUNNING; 171 PERF_FORMAT_TOTAL_TIME_RUNNING;
167 172
168 if (system_wide) { 173 if (system_wide) {
169 int cpu; 174 int cpu;
170 175
171 for (cpu = 0; cpu < nr_cpus; cpu++) { 176 for (cpu = 0; cpu < nr_cpus; cpu++) {
172 fd[cpu][counter][0] = sys_perf_event_open(attr, 177 fd[cpu][counter][0] = sys_perf_event_open(attr,
173 -1, cpumap[cpu], -1, 0); 178 -1, cpumap[cpu], -1, 0);
174 if (fd[cpu][counter][0] < 0) { 179 if (fd[cpu][counter][0] < 0) {
175 if (errno == EPERM || errno == EACCES) 180 if (errno == EPERM || errno == EACCES)
176 *perm_err = true; 181 *perm_err = true;
177 error(ERR_PERF_OPEN, counter, 182 error(ERR_PERF_OPEN, counter,
178 fd[cpu][counter][0], strerror(errno)); 183 fd[cpu][counter][0], strerror(errno));
179 } else { 184 } else {
180 ++ncreated; 185 ++ncreated;
181 } 186 }
182 } 187 }
183 } else { 188 } else {
184 attr->inherit = !no_inherit; 189 attr->inherit = !no_inherit;
185 if (target_pid == -1 && target_tid == -1) { 190 if (target_pid == -1 && target_tid == -1) {
186 attr->disabled = 1; 191 attr->disabled = 1;
187 attr->enable_on_exec = 1; 192 attr->enable_on_exec = 1;
188 } 193 }
189 for (thread = 0; thread < thread_num; thread++) { 194 for (thread = 0; thread < thread_num; thread++) {
190 fd[0][counter][thread] = sys_perf_event_open(attr, 195 fd[0][counter][thread] = sys_perf_event_open(attr,
191 all_tids[thread], -1, -1, 0); 196 all_tids[thread], -1, -1, 0);
192 if (fd[0][counter][thread] < 0) { 197 if (fd[0][counter][thread] < 0) {
193 if (errno == EPERM || errno == EACCES) 198 if (errno == EPERM || errno == EACCES)
194 *perm_err = true; 199 *perm_err = true;
195 error(ERR_PERF_OPEN, counter, 200 error(ERR_PERF_OPEN, counter,
196 fd[0][counter][thread], 201 fd[0][counter][thread],
197 strerror(errno)); 202 strerror(errno));
198 } else { 203 } else {
199 ++ncreated; 204 ++ncreated;
200 } 205 }
201 } 206 }
202 } 207 }
203 208
204 return ncreated; 209 return ncreated;
205 } 210 }
206 211
207 /* 212 /*
208 * Does the counter have nsecs as a unit? 213 * Does the counter have nsecs as a unit?
209 */ 214 */
210 static inline int nsec_counter(int counter) 215 static inline int nsec_counter(int counter)
211 { 216 {
212 if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || 217 if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
213 MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) 218 MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
214 return 1; 219 return 1;
215 220
216 return 0; 221 return 0;
217 } 222 }
218 223
219 /* 224 /*
220 * Read out the results of a single counter: 225 * Read out the results of a single counter:
221 * aggregate counts across CPUs in system-wide mode 226 * aggregate counts across CPUs in system-wide mode
222 */ 227 */
223 static void read_counter_aggr(int counter) 228 static void read_counter_aggr(int counter)
224 { 229 {
225 u64 count[3], single_count[3]; 230 u64 count[3], single_count[3];
226 int cpu; 231 int cpu;
227 size_t res, nv; 232 size_t res, nv;
228 int scaled; 233 int scaled;
229 int i, thread; 234 int i, thread;
230 235
231 count[0] = count[1] = count[2] = 0; 236 count[0] = count[1] = count[2] = 0;
232 237
233 nv = scale ? 3 : 1; 238 nv = scale ? 3 : 1;
234 for (cpu = 0; cpu < nr_cpus; cpu++) { 239 for (cpu = 0; cpu < nr_cpus; cpu++) {
235 for (thread = 0; thread < thread_num; thread++) { 240 for (thread = 0; thread < thread_num; thread++) {
236 if (fd[cpu][counter][thread] < 0) 241 if (fd[cpu][counter][thread] < 0)
237 continue; 242 continue;
238 243
239 res = read(fd[cpu][counter][thread], 244 res = read(fd[cpu][counter][thread],
240 single_count, nv * sizeof(u64)); 245 single_count, nv * sizeof(u64));
241 assert(res == nv * sizeof(u64)); 246 assert(res == nv * sizeof(u64));
242 247
243 close(fd[cpu][counter][thread]); 248 close(fd[cpu][counter][thread]);
244 fd[cpu][counter][thread] = -1; 249 fd[cpu][counter][thread] = -1;
245 250
246 count[0] += single_count[0]; 251 count[0] += single_count[0];
247 if (scale) { 252 if (scale) {
248 count[1] += single_count[1]; 253 count[1] += single_count[1];
249 count[2] += single_count[2]; 254 count[2] += single_count[2];
250 } 255 }
251 } 256 }
252 } 257 }
253 258
254 scaled = 0; 259 scaled = 0;
255 if (scale) { 260 if (scale) {
256 if (count[2] == 0) { 261 if (count[2] == 0) {
257 event_scaled[counter] = -1; 262 event_scaled[counter] = -1;
258 count[0] = 0; 263 count[0] = 0;
259 return; 264 return;
260 } 265 }
261 266
262 if (count[2] < count[1]) { 267 if (count[2] < count[1]) {
263 event_scaled[counter] = 1; 268 event_scaled[counter] = 1;
264 count[0] = (unsigned long long) 269 count[0] = (unsigned long long)
265 ((double)count[0] * count[1] / count[2] + 0.5); 270 ((double)count[0] * count[1] / count[2] + 0.5);
266 } 271 }
267 } 272 }
268 273
269 for (i = 0; i < 3; i++) 274 for (i = 0; i < 3; i++)
270 update_stats(&event_res_stats[counter][i], count[i]); 275 update_stats(&event_res_stats[counter][i], count[i]);
271 276
272 if (verbose) { 277 if (verbose) {
273 fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), 278 fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter),
274 count[0], count[1], count[2]); 279 count[0], count[1], count[2]);
275 } 280 }
276 281
277 /* 282 /*
278 * Save the full runtime - to allow normalization during printout: 283 * Save the full runtime - to allow normalization during printout:
279 */ 284 */
280 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) 285 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
281 update_stats(&runtime_nsecs_stats[0], count[0]); 286 update_stats(&runtime_nsecs_stats[0], count[0]);
282 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) 287 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
283 update_stats(&runtime_cycles_stats[0], count[0]); 288 update_stats(&runtime_cycles_stats[0], count[0]);
284 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) 289 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
285 update_stats(&runtime_branches_stats[0], count[0]); 290 update_stats(&runtime_branches_stats[0], count[0]);
286 } 291 }
287 292
288 /* 293 /*
289 * Read out the results of a single counter: 294 * Read out the results of a single counter:
290 * do not aggregate counts across CPUs in system-wide mode 295 * do not aggregate counts across CPUs in system-wide mode
291 */ 296 */
292 static void read_counter(int counter) 297 static void read_counter(int counter)
293 { 298 {
294 u64 count[3]; 299 u64 count[3];
295 int cpu; 300 int cpu;
296 size_t res, nv; 301 size_t res, nv;
297 302
298 count[0] = count[1] = count[2] = 0; 303 count[0] = count[1] = count[2] = 0;
299 304
300 nv = scale ? 3 : 1; 305 nv = scale ? 3 : 1;
301 306
302 for (cpu = 0; cpu < nr_cpus; cpu++) { 307 for (cpu = 0; cpu < nr_cpus; cpu++) {
303 308
304 if (fd[cpu][counter][0] < 0) 309 if (fd[cpu][counter][0] < 0)
305 continue; 310 continue;
306 311
307 res = read(fd[cpu][counter][0], count, nv * sizeof(u64)); 312 res = read(fd[cpu][counter][0], count, nv * sizeof(u64));
308 313
309 assert(res == nv * sizeof(u64)); 314 assert(res == nv * sizeof(u64));
310 315
311 close(fd[cpu][counter][0]); 316 close(fd[cpu][counter][0]);
312 fd[cpu][counter][0] = -1; 317 fd[cpu][counter][0] = -1;
313 318
314 if (scale) { 319 if (scale) {
315 if (count[2] == 0) { 320 if (count[2] == 0) {
316 count[0] = 0; 321 count[0] = 0;
317 } else if (count[2] < count[1]) { 322 } else if (count[2] < count[1]) {
318 count[0] = (unsigned long long) 323 count[0] = (unsigned long long)
319 ((double)count[0] * count[1] / count[2] + 0.5); 324 ((double)count[0] * count[1] / count[2] + 0.5);
320 } 325 }
321 } 326 }
322 cpu_counts[cpu][counter].val = count[0]; /* scaled count */ 327 cpu_counts[cpu][counter].val = count[0]; /* scaled count */
323 cpu_counts[cpu][counter].ena = count[1]; 328 cpu_counts[cpu][counter].ena = count[1];
324 cpu_counts[cpu][counter].run = count[2]; 329 cpu_counts[cpu][counter].run = count[2];
325 330
326 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) 331 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
327 update_stats(&runtime_nsecs_stats[cpu], count[0]); 332 update_stats(&runtime_nsecs_stats[cpu], count[0]);
328 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) 333 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
329 update_stats(&runtime_cycles_stats[cpu], count[0]); 334 update_stats(&runtime_cycles_stats[cpu], count[0]);
330 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) 335 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
331 update_stats(&runtime_branches_stats[cpu], count[0]); 336 update_stats(&runtime_branches_stats[cpu], count[0]);
332 } 337 }
333 } 338 }
334 339
335 static int run_perf_stat(int argc __used, const char **argv) 340 static int run_perf_stat(int argc __used, const char **argv)
336 { 341 {
337 unsigned long long t0, t1; 342 unsigned long long t0, t1;
338 int status = 0; 343 int status = 0;
339 int counter, ncreated = 0; 344 int counter, ncreated = 0;
340 int child_ready_pipe[2], go_pipe[2]; 345 int child_ready_pipe[2], go_pipe[2];
341 bool perm_err = false; 346 bool perm_err = false;
342 const bool forks = (argc > 0); 347 const bool forks = (argc > 0);
343 char buf; 348 char buf;
344 349
345 if (!system_wide) 350 if (!system_wide)
346 nr_cpus = 1; 351 nr_cpus = 1;
347 352
348 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { 353 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
349 perror("failed to create pipes"); 354 perror("failed to create pipes");
350 exit(1); 355 exit(1);
351 } 356 }
352 357
353 if (forks) { 358 if (forks) {
354 if ((child_pid = fork()) < 0) 359 if ((child_pid = fork()) < 0)
355 perror("failed to fork"); 360 perror("failed to fork");
356 361
357 if (!child_pid) { 362 if (!child_pid) {
358 close(child_ready_pipe[0]); 363 close(child_ready_pipe[0]);
359 close(go_pipe[1]); 364 close(go_pipe[1]);
360 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 365 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
361 366
362 /* 367 /*
363 * Do a dummy execvp to get the PLT entry resolved, 368 * Do a dummy execvp to get the PLT entry resolved,
364 * so we avoid the resolver overhead on the real 369 * so we avoid the resolver overhead on the real
365 * execvp call. 370 * execvp call.
366 */ 371 */
367 execvp("", (char **)argv); 372 execvp("", (char **)argv);
368 373
369 /* 374 /*
370 * Tell the parent we're ready to go 375 * Tell the parent we're ready to go
371 */ 376 */
372 close(child_ready_pipe[1]); 377 close(child_ready_pipe[1]);
373 378
374 /* 379 /*
375 * Wait until the parent tells us to go. 380 * Wait until the parent tells us to go.
376 */ 381 */
377 if (read(go_pipe[0], &buf, 1) == -1) 382 if (read(go_pipe[0], &buf, 1) == -1)
378 perror("unable to read pipe"); 383 perror("unable to read pipe");
379 384
380 execvp(argv[0], (char **)argv); 385 execvp(argv[0], (char **)argv);
381 386
382 perror(argv[0]); 387 perror(argv[0]);
383 exit(-1); 388 exit(-1);
384 } 389 }
385 390
386 if (target_tid == -1 && target_pid == -1 && !system_wide) 391 if (target_tid == -1 && target_pid == -1 && !system_wide)
387 all_tids[0] = child_pid; 392 all_tids[0] = child_pid;
388 393
389 /* 394 /*
390 * Wait for the child to be ready to exec. 395 * Wait for the child to be ready to exec.
391 */ 396 */
392 close(child_ready_pipe[1]); 397 close(child_ready_pipe[1]);
393 close(go_pipe[0]); 398 close(go_pipe[0]);
394 if (read(child_ready_pipe[0], &buf, 1) == -1) 399 if (read(child_ready_pipe[0], &buf, 1) == -1)
395 perror("unable to read pipe"); 400 perror("unable to read pipe");
396 close(child_ready_pipe[0]); 401 close(child_ready_pipe[0]);
397 } 402 }
398 403
399 for (counter = 0; counter < nr_counters; counter++) 404 for (counter = 0; counter < nr_counters; counter++)
400 ncreated += create_perf_stat_counter(counter, &perm_err); 405 ncreated += create_perf_stat_counter(counter, &perm_err);
401 406
402 if (ncreated < nr_counters) { 407 if (ncreated < nr_counters) {
403 if (perm_err) 408 if (perm_err)
404 error("You may not have permission to collect %sstats.\n" 409 error("You may not have permission to collect %sstats.\n"
405 "\t Consider tweaking" 410 "\t Consider tweaking"
406 " /proc/sys/kernel/perf_event_paranoid or running as root.", 411 " /proc/sys/kernel/perf_event_paranoid or running as root.",
407 system_wide ? "system-wide " : ""); 412 system_wide ? "system-wide " : "");
408 die("Not all events could be opened.\n"); 413 die("Not all events could be opened.\n");
409 if (child_pid != -1) 414 if (child_pid != -1)
410 kill(child_pid, SIGTERM); 415 kill(child_pid, SIGTERM);
411 return -1; 416 return -1;
412 } 417 }
413 418
414 /* 419 /*
415 * Enable counters and exec the command: 420 * Enable counters and exec the command:
416 */ 421 */
417 t0 = rdclock(); 422 t0 = rdclock();
418 423
419 if (forks) { 424 if (forks) {
420 close(go_pipe[1]); 425 close(go_pipe[1]);
421 wait(&status); 426 wait(&status);
422 } else { 427 } else {
423 while(!done) sleep(1); 428 while(!done) sleep(1);
424 } 429 }
425 430
426 t1 = rdclock(); 431 t1 = rdclock();
427 432
428 update_stats(&walltime_nsecs_stats, t1 - t0); 433 update_stats(&walltime_nsecs_stats, t1 - t0);
429 434
430 if (no_aggr) { 435 if (no_aggr) {
431 for (counter = 0; counter < nr_counters; counter++) 436 for (counter = 0; counter < nr_counters; counter++)
432 read_counter(counter); 437 read_counter(counter);
433 } else { 438 } else {
434 for (counter = 0; counter < nr_counters; counter++) 439 for (counter = 0; counter < nr_counters; counter++)
435 read_counter_aggr(counter); 440 read_counter_aggr(counter);
436 } 441 }
437 return WEXITSTATUS(status); 442 return WEXITSTATUS(status);
438 } 443 }
439 444
440 static void print_noise(int counter, double avg) 445 static void print_noise(int counter, double avg)
441 { 446 {
442 if (run_count == 1) 447 if (run_count == 1)
443 return; 448 return;
444 449
445 fprintf(stderr, " ( +- %7.3f%% )", 450 fprintf(stderr, " ( +- %7.3f%% )",
446 100 * stddev_stats(&event_res_stats[counter][0]) / avg); 451 100 * stddev_stats(&event_res_stats[counter][0]) / avg);
447 } 452 }
448 453
449 static void nsec_printout(int cpu, int counter, double avg) 454 static void nsec_printout(int cpu, int counter, double avg)
450 { 455 {
451 double msecs = avg / 1e6; 456 double msecs = avg / 1e6;
457 char cpustr[16] = { '\0', };
458 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
452 459
453 if (no_aggr) 460 if (no_aggr)
454 fprintf(stderr, "CPU%-4d %18.6f %-24s", 461 sprintf(cpustr, "CPU%*d%s",
455 cpumap[cpu], msecs, event_name(counter)); 462 csv_output ? 0 : -4,
456 else 463 cpumap[cpu], csv_sep);
457 fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter));
458 464
465 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter));
466
467 if (csv_output)
468 return;
469
459 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { 470 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
460 fprintf(stderr, " # %10.3f CPUs ", 471 fprintf(stderr, " # %10.3f CPUs ",
461 avg / avg_stats(&walltime_nsecs_stats)); 472 avg / avg_stats(&walltime_nsecs_stats));
462 } 473 }
463 } 474 }
464 475
465 static void abs_printout(int cpu, int counter, double avg) 476 static void abs_printout(int cpu, int counter, double avg)
466 { 477 {
467 double total, ratio = 0.0; 478 double total, ratio = 0.0;
468 char cpustr[16] = { '\0', }; 479 char cpustr[16] = { '\0', };
480 const char *fmt;
469 481
482 if (csv_output)
483 fmt = "%s%.0f%s%s";
484 else if (big_num)
485 fmt = "%s%'18.0f%s%-24s";
486 else
487 fmt = "%s%18.0f%s%-24s";
488
470 if (no_aggr) 489 if (no_aggr)
471 sprintf(cpustr, "CPU%-4d", cpumap[cpu]); 490 sprintf(cpustr, "CPU%*d%s",
491 csv_output ? 0 : -4,
492 cpumap[cpu], csv_sep);
472 else 493 else
473 cpu = 0; 494 cpu = 0;
474 495
475 if (big_num) 496 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter));
476 fprintf(stderr, "%s %'18.0f %-24s",
477 cpustr, avg, event_name(counter));
478 else
479 fprintf(stderr, "%s %18.0f %-24s",
480 cpustr, avg, event_name(counter));
481 497
498 if (csv_output)
499 return;
500
482 if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { 501 if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
483 total = avg_stats(&runtime_cycles_stats[cpu]); 502 total = avg_stats(&runtime_cycles_stats[cpu]);
484 503
485 if (total) 504 if (total)
486 ratio = avg / total; 505 ratio = avg / total;
487 506
488 fprintf(stderr, " # %10.3f IPC ", ratio); 507 fprintf(stderr, " # %10.3f IPC ", ratio);
489 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && 508 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
490 runtime_branches_stats[cpu].n != 0) { 509 runtime_branches_stats[cpu].n != 0) {
491 total = avg_stats(&runtime_branches_stats[cpu]); 510 total = avg_stats(&runtime_branches_stats[cpu]);
492 511
493 if (total) 512 if (total)
494 ratio = avg * 100 / total; 513 ratio = avg * 100 / total;
495 514
496 fprintf(stderr, " # %10.3f %% ", ratio); 515 fprintf(stderr, " # %10.3f %% ", ratio);
497 516
498 } else if (runtime_nsecs_stats[cpu].n != 0) { 517 } else if (runtime_nsecs_stats[cpu].n != 0) {
499 total = avg_stats(&runtime_nsecs_stats[cpu]); 518 total = avg_stats(&runtime_nsecs_stats[cpu]);
500 519
501 if (total) 520 if (total)
502 ratio = 1000.0 * avg / total; 521 ratio = 1000.0 * avg / total;
503 522
504 fprintf(stderr, " # %10.3f M/sec", ratio); 523 fprintf(stderr, " # %10.3f M/sec", ratio);
505 } 524 }
506 } 525 }
507 526
508 /* 527 /*
509 * Print out the results of a single counter: 528 * Print out the results of a single counter:
510 * aggregated counts in system-wide mode 529 * aggregated counts in system-wide mode
511 */ 530 */
512 static void print_counter_aggr(int counter) 531 static void print_counter_aggr(int counter)
513 { 532 {
514 double avg = avg_stats(&event_res_stats[counter][0]); 533 double avg = avg_stats(&event_res_stats[counter][0]);
515 int scaled = event_scaled[counter]; 534 int scaled = event_scaled[counter];
516 535
517 if (scaled == -1) { 536 if (scaled == -1) {
518 fprintf(stderr, " %18s %-24s\n", 537 fprintf(stderr, "%*s%s%-24s\n",
519 "<not counted>", event_name(counter)); 538 csv_output ? 0 : 18,
539 "<not counted>", csv_sep, event_name(counter));
520 return; 540 return;
521 } 541 }
522 542
523 if (nsec_counter(counter)) 543 if (nsec_counter(counter))
524 nsec_printout(-1, counter, avg); 544 nsec_printout(-1, counter, avg);
525 else 545 else
526 abs_printout(-1, counter, avg); 546 abs_printout(-1, counter, avg);
527 547
548 if (csv_output) {
549 fputc('\n', stderr);
550 return;
551 }
552
528 print_noise(counter, avg); 553 print_noise(counter, avg);
529 554
530 if (scaled) { 555 if (scaled) {
531 double avg_enabled, avg_running; 556 double avg_enabled, avg_running;
532 557
533 avg_enabled = avg_stats(&event_res_stats[counter][1]); 558 avg_enabled = avg_stats(&event_res_stats[counter][1]);
534 avg_running = avg_stats(&event_res_stats[counter][2]); 559 avg_running = avg_stats(&event_res_stats[counter][2]);
535 560
536 fprintf(stderr, " (scaled from %.2f%%)", 561 fprintf(stderr, " (scaled from %.2f%%)",
537 100 * avg_running / avg_enabled); 562 100 * avg_running / avg_enabled);
538 } 563 }
539 564
540 fprintf(stderr, "\n"); 565 fprintf(stderr, "\n");
541 } 566 }
542 567
543 /* 568 /*
544 * Print out the results of a single counter: 569 * Print out the results of a single counter:
545 * does not use aggregated count in system-wide 570 * does not use aggregated count in system-wide
546 */ 571 */
547 static void print_counter(int counter) 572 static void print_counter(int counter)
548 { 573 {
549 u64 ena, run, val; 574 u64 ena, run, val;
550 int cpu; 575 int cpu;
551 576
552 for (cpu = 0; cpu < nr_cpus; cpu++) { 577 for (cpu = 0; cpu < nr_cpus; cpu++) {
553 val = cpu_counts[cpu][counter].val; 578 val = cpu_counts[cpu][counter].val;
554 ena = cpu_counts[cpu][counter].ena; 579 ena = cpu_counts[cpu][counter].ena;
555 run = cpu_counts[cpu][counter].run; 580 run = cpu_counts[cpu][counter].run;
556 if (run == 0 || ena == 0) { 581 if (run == 0 || ena == 0) {
557 fprintf(stderr, "CPU%-4d %18s %-24s", cpumap[cpu], 582 fprintf(stderr, "CPU%*d%s%*s%s%-24s",
558 "<not counted>", event_name(counter)); 583 csv_output ? 0 : -4,
584 cpumap[cpu], csv_sep,
585 csv_output ? 0 : 18,
586 "<not counted>", csv_sep,
587 event_name(counter));
559 588
560 fprintf(stderr, "\n"); 589 fprintf(stderr, "\n");
561 continue; 590 continue;
562 } 591 }
563 592
564 if (nsec_counter(counter)) 593 if (nsec_counter(counter))
565 nsec_printout(cpu, counter, val); 594 nsec_printout(cpu, counter, val);
566 else 595 else
567 abs_printout(cpu, counter, val); 596 abs_printout(cpu, counter, val);
568 597
569 print_noise(counter, 1.0); 598 if (!csv_output) {
599 print_noise(counter, 1.0);
570 600
571 if (run != ena) { 601 if (run != ena) {
572 fprintf(stderr, " (scaled from %.2f%%)", 602 fprintf(stderr, " (scaled from %.2f%%)",
573 100.0 * run / ena); 603 100.0 * run / ena);
604 }
574 } 605 }
575 fprintf(stderr, "\n"); 606 fprintf(stderr, "\n");
576 } 607 }
577 } 608 }
578 609
579 static void print_stat(int argc, const char **argv) 610 static void print_stat(int argc, const char **argv)
580 { 611 {
581 int i, counter; 612 int i, counter;
582 613
583 fflush(stdout); 614 fflush(stdout);
584 615
585 fprintf(stderr, "\n"); 616 if (!csv_output) {
586 fprintf(stderr, " Performance counter stats for "); 617 fprintf(stderr, "\n");
587 if(target_pid == -1 && target_tid == -1) { 618 fprintf(stderr, " Performance counter stats for ");
588 fprintf(stderr, "\'%s", argv[0]); 619 if(target_pid == -1 && target_tid == -1) {
589 for (i = 1; i < argc; i++) 620 fprintf(stderr, "\'%s", argv[0]);
590 fprintf(stderr, " %s", argv[i]); 621 for (i = 1; i < argc; i++)
591 } else if (target_pid != -1) 622 fprintf(stderr, " %s", argv[i]);
592 fprintf(stderr, "process id \'%d", target_pid); 623 } else if (target_pid != -1)
593 else 624 fprintf(stderr, "process id \'%d", target_pid);
594 fprintf(stderr, "thread id \'%d", target_tid); 625 else
626 fprintf(stderr, "thread id \'%d", target_tid);
595 627
596 fprintf(stderr, "\'"); 628 fprintf(stderr, "\'");
597 if (run_count > 1) 629 if (run_count > 1)
598 fprintf(stderr, " (%d runs)", run_count); 630 fprintf(stderr, " (%d runs)", run_count);
599 fprintf(stderr, ":\n\n"); 631 fprintf(stderr, ":\n\n");
632 }
600 633
601 if (no_aggr) { 634 if (no_aggr) {
602 for (counter = 0; counter < nr_counters; counter++) 635 for (counter = 0; counter < nr_counters; counter++)
603 print_counter(counter); 636 print_counter(counter);
604 } else { 637 } else {
605 for (counter = 0; counter < nr_counters; counter++) 638 for (counter = 0; counter < nr_counters; counter++)
606 print_counter_aggr(counter); 639 print_counter_aggr(counter);
607 } 640 }
608 641
609 fprintf(stderr, "\n"); 642 if (!csv_output) {
610 fprintf(stderr, " %18.9f seconds time elapsed", 643 fprintf(stderr, "\n");
611 avg_stats(&walltime_nsecs_stats)/1e9); 644 fprintf(stderr, " %18.9f seconds time elapsed",
612 if (run_count > 1) { 645 avg_stats(&walltime_nsecs_stats)/1e9);
613 fprintf(stderr, " ( +- %7.3f%% )", 646 if (run_count > 1) {
647 fprintf(stderr, " ( +- %7.3f%% )",
614 100*stddev_stats(&walltime_nsecs_stats) / 648 100*stddev_stats(&walltime_nsecs_stats) /
615 avg_stats(&walltime_nsecs_stats)); 649 avg_stats(&walltime_nsecs_stats));
650 }
651 fprintf(stderr, "\n\n");
616 } 652 }
617 fprintf(stderr, "\n\n");
618 } 653 }
619 654
620 static volatile int signr = -1; 655 static volatile int signr = -1;
621 656
622 static void skip_signal(int signo) 657 static void skip_signal(int signo)
623 { 658 {
624 if(child_pid == -1) 659 if(child_pid == -1)
625 done = 1; 660 done = 1;
626 661
627 signr = signo; 662 signr = signo;
628 } 663 }
629 664
630 static void sig_atexit(void) 665 static void sig_atexit(void)
631 { 666 {
632 if (child_pid != -1) 667 if (child_pid != -1)
633 kill(child_pid, SIGTERM); 668 kill(child_pid, SIGTERM);
634 669
635 if (signr == -1) 670 if (signr == -1)
636 return; 671 return;
637 672
638 signal(signr, SIG_DFL); 673 signal(signr, SIG_DFL);
639 kill(getpid(), signr); 674 kill(getpid(), signr);
640 } 675 }
641 676
642 static const char * const stat_usage[] = { 677 static const char * const stat_usage[] = {
643 "perf stat [<options>] [<command>]", 678 "perf stat [<options>] [<command>]",
644 NULL 679 NULL
645 }; 680 };
646 681
682 static int stat__set_big_num(const struct option *opt __used,
683 const char *s __used, int unset)
684 {
685 big_num_opt = unset ? 0 : 1;
686 return 0;
687 }
688
647 static const struct option options[] = { 689 static const struct option options[] = {
648 OPT_CALLBACK('e', "event", NULL, "event", 690 OPT_CALLBACK('e', "event", NULL, "event",
649 "event selector. use 'perf list' to list available events", 691 "event selector. use 'perf list' to list available events",
650 parse_events), 692 parse_events),
651 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 693 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
652 "child tasks do not inherit counters"), 694 "child tasks do not inherit counters"),
653 OPT_INTEGER('p', "pid", &target_pid, 695 OPT_INTEGER('p', "pid", &target_pid,
654 "stat events on existing process id"), 696 "stat events on existing process id"),
655 OPT_INTEGER('t', "tid", &target_tid, 697 OPT_INTEGER('t', "tid", &target_tid,
656 "stat events on existing thread id"), 698 "stat events on existing thread id"),
657 OPT_BOOLEAN('a', "all-cpus", &system_wide, 699 OPT_BOOLEAN('a', "all-cpus", &system_wide,
658 "system-wide collection from all CPUs"), 700 "system-wide collection from all CPUs"),
659 OPT_BOOLEAN('c', "scale", &scale, 701 OPT_BOOLEAN('c', "scale", &scale,
660 "scale/normalize counters"), 702 "scale/normalize counters"),
661 OPT_INCR('v', "verbose", &verbose, 703 OPT_INCR('v', "verbose", &verbose,
662 "be more verbose (show counter open errors, etc)"), 704 "be more verbose (show counter open errors, etc)"),
663 OPT_INTEGER('r', "repeat", &run_count, 705 OPT_INTEGER('r', "repeat", &run_count,
664 "repeat command and print average + stddev (max: 100)"), 706 "repeat command and print average + stddev (max: 100)"),
665 OPT_BOOLEAN('n', "null", &null_run, 707 OPT_BOOLEAN('n', "null", &null_run,
666 "null run - dont start any counters"), 708 "null run - dont start any counters"),
667 OPT_BOOLEAN('B', "big-num", &big_num, 709 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
668 "print large numbers with thousands\' separators"), 710 "print large numbers with thousands\' separators",
711 stat__set_big_num),
669 OPT_STRING('C', "cpu", &cpu_list, "cpu", 712 OPT_STRING('C', "cpu", &cpu_list, "cpu",
670 "list of cpus to monitor in system-wide"), 713 "list of cpus to monitor in system-wide"),
671 OPT_BOOLEAN('A', "no-aggr", &no_aggr, 714 OPT_BOOLEAN('A', "no-aggr", &no_aggr,
672 "disable CPU count aggregation"), 715 "disable CPU count aggregation"),
716 OPT_STRING('x', "field-separator", &csv_sep, "separator",
717 "print counts with custom separator"),
673 OPT_END() 718 OPT_END()
674 }; 719 };
675 720
676 int cmd_stat(int argc, const char **argv, const char *prefix __used) 721 int cmd_stat(int argc, const char **argv, const char *prefix __used)
677 { 722 {
678 int status; 723 int status;
679 int i,j; 724 int i,j;
680 725
681 setlocale(LC_ALL, ""); 726 setlocale(LC_ALL, "");
682 727
683 argc = parse_options(argc, argv, options, stat_usage, 728 argc = parse_options(argc, argv, options, stat_usage,
684 PARSE_OPT_STOP_AT_NON_OPTION); 729 PARSE_OPT_STOP_AT_NON_OPTION);
730
731 if (csv_sep)
732 csv_output = true;
733 else
734 csv_sep = DEFAULT_SEPARATOR;
735
736 /*
737 * let the spreadsheet do the pretty-printing
738 */
739 if (csv_output) {
740 /* User explicitely passed -B? */
741 if (big_num_opt == 1) {
742 fprintf(stderr, "-B option not supported with -x\n");
743 usage_with_options(stat_usage, options);
744 } else /* Nope, so disable big number formatting */
745 big_num = false;
746 } else if (big_num_opt == 0) /* User passed --no-big-num */
747 big_num = false;
748
685 if (!argc && target_pid == -1 && target_tid == -1) 749 if (!argc && target_pid == -1 && target_tid == -1)
686 usage_with_options(stat_usage, options); 750 usage_with_options(stat_usage, options);
687 if (run_count <= 0) 751 if (run_count <= 0)
688 usage_with_options(stat_usage, options); 752 usage_with_options(stat_usage, options);
689 753
690 /* no_aggr is for system-wide only */ 754 /* no_aggr is for system-wide only */
691 if (no_aggr && !system_wide) 755 if (no_aggr && !system_wide)
692 usage_with_options(stat_usage, options); 756 usage_with_options(stat_usage, options);
693 757
694 /* Set attrs and nr_counters if no event is selected and !null_run */ 758 /* Set attrs and nr_counters if no event is selected and !null_run */
695 if (!null_run && !nr_counters) { 759 if (!null_run && !nr_counters) {
696 memcpy(attrs, default_attrs, sizeof(default_attrs)); 760 memcpy(attrs, default_attrs, sizeof(default_attrs));
697 nr_counters = ARRAY_SIZE(default_attrs); 761 nr_counters = ARRAY_SIZE(default_attrs);
698 } 762 }
699 763
700 if (system_wide) 764 if (system_wide)
701 nr_cpus = read_cpu_map(cpu_list); 765 nr_cpus = read_cpu_map(cpu_list);
702 else 766 else
703 nr_cpus = 1; 767 nr_cpus = 1;
704 768
705 if (nr_cpus < 1) 769 if (nr_cpus < 1)
706 usage_with_options(stat_usage, options); 770 usage_with_options(stat_usage, options);
707 771
708 if (target_pid != -1) { 772 if (target_pid != -1) {
709 target_tid = target_pid; 773 target_tid = target_pid;
710 thread_num = find_all_tid(target_pid, &all_tids); 774 thread_num = find_all_tid(target_pid, &all_tids);
711 if (thread_num <= 0) { 775 if (thread_num <= 0) {
712 fprintf(stderr, "Can't find all threads of pid %d\n", 776 fprintf(stderr, "Can't find all threads of pid %d\n",
713 target_pid); 777 target_pid);
714 usage_with_options(stat_usage, options); 778 usage_with_options(stat_usage, options);
715 } 779 }
716 } else { 780 } else {
717 all_tids=malloc(sizeof(pid_t)); 781 all_tids=malloc(sizeof(pid_t));
718 if (!all_tids) 782 if (!all_tids)
719 return -ENOMEM; 783 return -ENOMEM;
720 784
721 all_tids[0] = target_tid; 785 all_tids[0] = target_tid;
722 thread_num = 1; 786 thread_num = 1;
723 } 787 }
724 788
725 for (i = 0; i < MAX_NR_CPUS; i++) { 789 for (i = 0; i < MAX_NR_CPUS; i++) {
726 for (j = 0; j < MAX_COUNTERS; j++) { 790 for (j = 0; j < MAX_COUNTERS; j++) {
727 fd[i][j] = malloc(sizeof(int)*thread_num); 791 fd[i][j] = malloc(sizeof(int)*thread_num);
728 if (!fd[i][j]) 792 if (!fd[i][j])
729 return -ENOMEM; 793 return -ENOMEM;
730 } 794 }
731 } 795 }
732 796
733 /* 797 /*
734 * We dont want to block the signals - that would cause 798 * We dont want to block the signals - that would cause
735 * child tasks to inherit that and Ctrl-C would not work. 799 * child tasks to inherit that and Ctrl-C would not work.
736 * What we want is for Ctrl-C to work in the exec()-ed 800 * What we want is for Ctrl-C to work in the exec()-ed
737 * task, but being ignored by perf stat itself: 801 * task, but being ignored by perf stat itself:
738 */ 802 */
739 atexit(sig_atexit); 803 atexit(sig_atexit);
740 signal(SIGINT, skip_signal); 804 signal(SIGINT, skip_signal);
741 signal(SIGALRM, skip_signal); 805 signal(SIGALRM, skip_signal);
742 signal(SIGABRT, skip_signal); 806 signal(SIGABRT, skip_signal);
743 807
744 status = 0; 808 status = 0;
745 for (run_idx = 0; run_idx < run_count; run_idx++) { 809 for (run_idx = 0; run_idx < run_count; run_idx++) {
746 if (run_count != 1 && verbose) 810 if (run_count != 1 && verbose)
747 fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); 811 fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
748 status = run_perf_stat(argc, argv); 812 status = run_perf_stat(argc, argv);
749 } 813 }