Commit 9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2
Committed by
Ingo Molnar
1 parent
016e92fbc9
Exists in
master
and in
7 other branches
perf tools: Improve thread comm resolution in perf sched
When we get sched traces that involve a task that was already created before opening the event, we won't have the comm event for it. So if we can't find the comm event for a given thread, we look at the traces that may contain these informations. Before: ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms | kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms | kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms | :5124:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms | :6244:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms | firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms | npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms | :6245:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms | After: ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms | kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms | kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms | firefox:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms | npviewer.bin:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms | firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms | npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms | npviewer.bin:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms | Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1255012632-7882-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 3 changed files with 67 additions and 12 deletions Inline Diff
tools/perf/builtin-sched.c
1 | #include "builtin.h" | 1 | #include "builtin.h" |
2 | #include "perf.h" | 2 | #include "perf.h" |
3 | 3 | ||
4 | #include "util/util.h" | 4 | #include "util/util.h" |
5 | #include "util/cache.h" | 5 | #include "util/cache.h" |
6 | #include "util/symbol.h" | 6 | #include "util/symbol.h" |
7 | #include "util/thread.h" | 7 | #include "util/thread.h" |
8 | #include "util/header.h" | 8 | #include "util/header.h" |
9 | 9 | ||
10 | #include "util/parse-options.h" | 10 | #include "util/parse-options.h" |
11 | #include "util/trace-event.h" | 11 | #include "util/trace-event.h" |
12 | 12 | ||
13 | #include "util/debug.h" | 13 | #include "util/debug.h" |
14 | #include "util/data_map.h" | 14 | #include "util/data_map.h" |
15 | 15 | ||
16 | #include <sys/types.h> | 16 | #include <sys/types.h> |
17 | #include <sys/prctl.h> | 17 | #include <sys/prctl.h> |
18 | 18 | ||
19 | #include <semaphore.h> | 19 | #include <semaphore.h> |
20 | #include <pthread.h> | 20 | #include <pthread.h> |
21 | #include <math.h> | 21 | #include <math.h> |
22 | 22 | ||
23 | static char const *input_name = "perf.data"; | 23 | static char const *input_name = "perf.data"; |
24 | 24 | ||
25 | static unsigned long total_comm = 0; | 25 | static unsigned long total_comm = 0; |
26 | 26 | ||
27 | static struct rb_root threads; | 27 | static struct rb_root threads; |
28 | static struct thread *last_match; | 28 | static struct thread *last_match; |
29 | 29 | ||
30 | static struct perf_header *header; | 30 | static struct perf_header *header; |
31 | static u64 sample_type; | 31 | static u64 sample_type; |
32 | 32 | ||
33 | static char default_sort_order[] = "avg, max, switch, runtime"; | 33 | static char default_sort_order[] = "avg, max, switch, runtime"; |
34 | static char *sort_order = default_sort_order; | 34 | static char *sort_order = default_sort_order; |
35 | 35 | ||
36 | static char *cwd; | 36 | static char *cwd; |
37 | static int cwdlen; | 37 | static int cwdlen; |
38 | 38 | ||
39 | #define PR_SET_NAME 15 /* Set process name */ | 39 | #define PR_SET_NAME 15 /* Set process name */ |
40 | #define MAX_CPUS 4096 | 40 | #define MAX_CPUS 4096 |
41 | 41 | ||
42 | #define BUG_ON(x) assert(!(x)) | 42 | #define BUG_ON(x) assert(!(x)) |
43 | 43 | ||
44 | static u64 run_measurement_overhead; | 44 | static u64 run_measurement_overhead; |
45 | static u64 sleep_measurement_overhead; | 45 | static u64 sleep_measurement_overhead; |
46 | 46 | ||
47 | #define COMM_LEN 20 | 47 | #define COMM_LEN 20 |
48 | #define SYM_LEN 129 | 48 | #define SYM_LEN 129 |
49 | 49 | ||
50 | #define MAX_PID 65536 | 50 | #define MAX_PID 65536 |
51 | 51 | ||
52 | static unsigned long nr_tasks; | 52 | static unsigned long nr_tasks; |
53 | 53 | ||
54 | struct sched_atom; | 54 | struct sched_atom; |
55 | 55 | ||
56 | struct task_desc { | 56 | struct task_desc { |
57 | unsigned long nr; | 57 | unsigned long nr; |
58 | unsigned long pid; | 58 | unsigned long pid; |
59 | char comm[COMM_LEN]; | 59 | char comm[COMM_LEN]; |
60 | 60 | ||
61 | unsigned long nr_events; | 61 | unsigned long nr_events; |
62 | unsigned long curr_event; | 62 | unsigned long curr_event; |
63 | struct sched_atom **atoms; | 63 | struct sched_atom **atoms; |
64 | 64 | ||
65 | pthread_t thread; | 65 | pthread_t thread; |
66 | sem_t sleep_sem; | 66 | sem_t sleep_sem; |
67 | 67 | ||
68 | sem_t ready_for_work; | 68 | sem_t ready_for_work; |
69 | sem_t work_done_sem; | 69 | sem_t work_done_sem; |
70 | 70 | ||
71 | u64 cpu_usage; | 71 | u64 cpu_usage; |
72 | }; | 72 | }; |
73 | 73 | ||
74 | enum sched_event_type { | 74 | enum sched_event_type { |
75 | SCHED_EVENT_RUN, | 75 | SCHED_EVENT_RUN, |
76 | SCHED_EVENT_SLEEP, | 76 | SCHED_EVENT_SLEEP, |
77 | SCHED_EVENT_WAKEUP, | 77 | SCHED_EVENT_WAKEUP, |
78 | }; | 78 | }; |
79 | 79 | ||
80 | struct sched_atom { | 80 | struct sched_atom { |
81 | enum sched_event_type type; | 81 | enum sched_event_type type; |
82 | u64 timestamp; | 82 | u64 timestamp; |
83 | u64 duration; | 83 | u64 duration; |
84 | unsigned long nr; | 84 | unsigned long nr; |
85 | int specific_wait; | 85 | int specific_wait; |
86 | sem_t *wait_sem; | 86 | sem_t *wait_sem; |
87 | struct task_desc *wakee; | 87 | struct task_desc *wakee; |
88 | }; | 88 | }; |
89 | 89 | ||
90 | static struct task_desc *pid_to_task[MAX_PID]; | 90 | static struct task_desc *pid_to_task[MAX_PID]; |
91 | 91 | ||
92 | static struct task_desc **tasks; | 92 | static struct task_desc **tasks; |
93 | 93 | ||
94 | static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; | 94 | static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; |
95 | static u64 start_time; | 95 | static u64 start_time; |
96 | 96 | ||
97 | static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; | 97 | static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; |
98 | 98 | ||
99 | static unsigned long nr_run_events; | 99 | static unsigned long nr_run_events; |
100 | static unsigned long nr_sleep_events; | 100 | static unsigned long nr_sleep_events; |
101 | static unsigned long nr_wakeup_events; | 101 | static unsigned long nr_wakeup_events; |
102 | 102 | ||
103 | static unsigned long nr_sleep_corrections; | 103 | static unsigned long nr_sleep_corrections; |
104 | static unsigned long nr_run_events_optimized; | 104 | static unsigned long nr_run_events_optimized; |
105 | 105 | ||
106 | static unsigned long targetless_wakeups; | 106 | static unsigned long targetless_wakeups; |
107 | static unsigned long multitarget_wakeups; | 107 | static unsigned long multitarget_wakeups; |
108 | 108 | ||
109 | static u64 cpu_usage; | 109 | static u64 cpu_usage; |
110 | static u64 runavg_cpu_usage; | 110 | static u64 runavg_cpu_usage; |
111 | static u64 parent_cpu_usage; | 111 | static u64 parent_cpu_usage; |
112 | static u64 runavg_parent_cpu_usage; | 112 | static u64 runavg_parent_cpu_usage; |
113 | 113 | ||
114 | static unsigned long nr_runs; | 114 | static unsigned long nr_runs; |
115 | static u64 sum_runtime; | 115 | static u64 sum_runtime; |
116 | static u64 sum_fluct; | 116 | static u64 sum_fluct; |
117 | static u64 run_avg; | 117 | static u64 run_avg; |
118 | 118 | ||
119 | static unsigned long replay_repeat = 10; | 119 | static unsigned long replay_repeat = 10; |
120 | static unsigned long nr_timestamps; | 120 | static unsigned long nr_timestamps; |
121 | static unsigned long nr_unordered_timestamps; | 121 | static unsigned long nr_unordered_timestamps; |
122 | static unsigned long nr_state_machine_bugs; | 122 | static unsigned long nr_state_machine_bugs; |
123 | static unsigned long nr_context_switch_bugs; | 123 | static unsigned long nr_context_switch_bugs; |
124 | static unsigned long nr_events; | 124 | static unsigned long nr_events; |
125 | static unsigned long nr_lost_chunks; | 125 | static unsigned long nr_lost_chunks; |
126 | static unsigned long nr_lost_events; | 126 | static unsigned long nr_lost_events; |
127 | 127 | ||
128 | #define TASK_STATE_TO_CHAR_STR "RSDTtZX" | 128 | #define TASK_STATE_TO_CHAR_STR "RSDTtZX" |
129 | 129 | ||
130 | enum thread_state { | 130 | enum thread_state { |
131 | THREAD_SLEEPING = 0, | 131 | THREAD_SLEEPING = 0, |
132 | THREAD_WAIT_CPU, | 132 | THREAD_WAIT_CPU, |
133 | THREAD_SCHED_IN, | 133 | THREAD_SCHED_IN, |
134 | THREAD_IGNORE | 134 | THREAD_IGNORE |
135 | }; | 135 | }; |
136 | 136 | ||
137 | struct work_atom { | 137 | struct work_atom { |
138 | struct list_head list; | 138 | struct list_head list; |
139 | enum thread_state state; | 139 | enum thread_state state; |
140 | u64 sched_out_time; | 140 | u64 sched_out_time; |
141 | u64 wake_up_time; | 141 | u64 wake_up_time; |
142 | u64 sched_in_time; | 142 | u64 sched_in_time; |
143 | u64 runtime; | 143 | u64 runtime; |
144 | }; | 144 | }; |
145 | 145 | ||
146 | struct work_atoms { | 146 | struct work_atoms { |
147 | struct list_head work_list; | 147 | struct list_head work_list; |
148 | struct thread *thread; | 148 | struct thread *thread; |
149 | struct rb_node node; | 149 | struct rb_node node; |
150 | u64 max_lat; | 150 | u64 max_lat; |
151 | u64 total_lat; | 151 | u64 total_lat; |
152 | u64 nb_atoms; | 152 | u64 nb_atoms; |
153 | u64 total_runtime; | 153 | u64 total_runtime; |
154 | }; | 154 | }; |
155 | 155 | ||
156 | typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); | 156 | typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); |
157 | 157 | ||
158 | static struct rb_root atom_root, sorted_atom_root; | 158 | static struct rb_root atom_root, sorted_atom_root; |
159 | 159 | ||
160 | static u64 all_runtime; | 160 | static u64 all_runtime; |
161 | static u64 all_count; | 161 | static u64 all_count; |
162 | 162 | ||
163 | 163 | ||
164 | static u64 get_nsecs(void) | 164 | static u64 get_nsecs(void) |
165 | { | 165 | { |
166 | struct timespec ts; | 166 | struct timespec ts; |
167 | 167 | ||
168 | clock_gettime(CLOCK_MONOTONIC, &ts); | 168 | clock_gettime(CLOCK_MONOTONIC, &ts); |
169 | 169 | ||
170 | return ts.tv_sec * 1000000000ULL + ts.tv_nsec; | 170 | return ts.tv_sec * 1000000000ULL + ts.tv_nsec; |
171 | } | 171 | } |
172 | 172 | ||
173 | static void burn_nsecs(u64 nsecs) | 173 | static void burn_nsecs(u64 nsecs) |
174 | { | 174 | { |
175 | u64 T0 = get_nsecs(), T1; | 175 | u64 T0 = get_nsecs(), T1; |
176 | 176 | ||
177 | do { | 177 | do { |
178 | T1 = get_nsecs(); | 178 | T1 = get_nsecs(); |
179 | } while (T1 + run_measurement_overhead < T0 + nsecs); | 179 | } while (T1 + run_measurement_overhead < T0 + nsecs); |
180 | } | 180 | } |
181 | 181 | ||
182 | static void sleep_nsecs(u64 nsecs) | 182 | static void sleep_nsecs(u64 nsecs) |
183 | { | 183 | { |
184 | struct timespec ts; | 184 | struct timespec ts; |
185 | 185 | ||
186 | ts.tv_nsec = nsecs % 999999999; | 186 | ts.tv_nsec = nsecs % 999999999; |
187 | ts.tv_sec = nsecs / 999999999; | 187 | ts.tv_sec = nsecs / 999999999; |
188 | 188 | ||
189 | nanosleep(&ts, NULL); | 189 | nanosleep(&ts, NULL); |
190 | } | 190 | } |
191 | 191 | ||
192 | static void calibrate_run_measurement_overhead(void) | 192 | static void calibrate_run_measurement_overhead(void) |
193 | { | 193 | { |
194 | u64 T0, T1, delta, min_delta = 1000000000ULL; | 194 | u64 T0, T1, delta, min_delta = 1000000000ULL; |
195 | int i; | 195 | int i; |
196 | 196 | ||
197 | for (i = 0; i < 10; i++) { | 197 | for (i = 0; i < 10; i++) { |
198 | T0 = get_nsecs(); | 198 | T0 = get_nsecs(); |
199 | burn_nsecs(0); | 199 | burn_nsecs(0); |
200 | T1 = get_nsecs(); | 200 | T1 = get_nsecs(); |
201 | delta = T1-T0; | 201 | delta = T1-T0; |
202 | min_delta = min(min_delta, delta); | 202 | min_delta = min(min_delta, delta); |
203 | } | 203 | } |
204 | run_measurement_overhead = min_delta; | 204 | run_measurement_overhead = min_delta; |
205 | 205 | ||
206 | printf("run measurement overhead: %Ld nsecs\n", min_delta); | 206 | printf("run measurement overhead: %Ld nsecs\n", min_delta); |
207 | } | 207 | } |
208 | 208 | ||
209 | static void calibrate_sleep_measurement_overhead(void) | 209 | static void calibrate_sleep_measurement_overhead(void) |
210 | { | 210 | { |
211 | u64 T0, T1, delta, min_delta = 1000000000ULL; | 211 | u64 T0, T1, delta, min_delta = 1000000000ULL; |
212 | int i; | 212 | int i; |
213 | 213 | ||
214 | for (i = 0; i < 10; i++) { | 214 | for (i = 0; i < 10; i++) { |
215 | T0 = get_nsecs(); | 215 | T0 = get_nsecs(); |
216 | sleep_nsecs(10000); | 216 | sleep_nsecs(10000); |
217 | T1 = get_nsecs(); | 217 | T1 = get_nsecs(); |
218 | delta = T1-T0; | 218 | delta = T1-T0; |
219 | min_delta = min(min_delta, delta); | 219 | min_delta = min(min_delta, delta); |
220 | } | 220 | } |
221 | min_delta -= 10000; | 221 | min_delta -= 10000; |
222 | sleep_measurement_overhead = min_delta; | 222 | sleep_measurement_overhead = min_delta; |
223 | 223 | ||
224 | printf("sleep measurement overhead: %Ld nsecs\n", min_delta); | 224 | printf("sleep measurement overhead: %Ld nsecs\n", min_delta); |
225 | } | 225 | } |
226 | 226 | ||
227 | static struct sched_atom * | 227 | static struct sched_atom * |
228 | get_new_event(struct task_desc *task, u64 timestamp) | 228 | get_new_event(struct task_desc *task, u64 timestamp) |
229 | { | 229 | { |
230 | struct sched_atom *event = calloc(1, sizeof(*event)); | 230 | struct sched_atom *event = calloc(1, sizeof(*event)); |
231 | unsigned long idx = task->nr_events; | 231 | unsigned long idx = task->nr_events; |
232 | size_t size; | 232 | size_t size; |
233 | 233 | ||
234 | event->timestamp = timestamp; | 234 | event->timestamp = timestamp; |
235 | event->nr = idx; | 235 | event->nr = idx; |
236 | 236 | ||
237 | task->nr_events++; | 237 | task->nr_events++; |
238 | size = sizeof(struct sched_atom *) * task->nr_events; | 238 | size = sizeof(struct sched_atom *) * task->nr_events; |
239 | task->atoms = realloc(task->atoms, size); | 239 | task->atoms = realloc(task->atoms, size); |
240 | BUG_ON(!task->atoms); | 240 | BUG_ON(!task->atoms); |
241 | 241 | ||
242 | task->atoms[idx] = event; | 242 | task->atoms[idx] = event; |
243 | 243 | ||
244 | return event; | 244 | return event; |
245 | } | 245 | } |
246 | 246 | ||
247 | static struct sched_atom *last_event(struct task_desc *task) | 247 | static struct sched_atom *last_event(struct task_desc *task) |
248 | { | 248 | { |
249 | if (!task->nr_events) | 249 | if (!task->nr_events) |
250 | return NULL; | 250 | return NULL; |
251 | 251 | ||
252 | return task->atoms[task->nr_events - 1]; | 252 | return task->atoms[task->nr_events - 1]; |
253 | } | 253 | } |
254 | 254 | ||
255 | static void | 255 | static void |
256 | add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) | 256 | add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) |
257 | { | 257 | { |
258 | struct sched_atom *event, *curr_event = last_event(task); | 258 | struct sched_atom *event, *curr_event = last_event(task); |
259 | 259 | ||
260 | /* | 260 | /* |
261 | * optimize an existing RUN event by merging this one | 261 | * optimize an existing RUN event by merging this one |
262 | * to it: | 262 | * to it: |
263 | */ | 263 | */ |
264 | if (curr_event && curr_event->type == SCHED_EVENT_RUN) { | 264 | if (curr_event && curr_event->type == SCHED_EVENT_RUN) { |
265 | nr_run_events_optimized++; | 265 | nr_run_events_optimized++; |
266 | curr_event->duration += duration; | 266 | curr_event->duration += duration; |
267 | return; | 267 | return; |
268 | } | 268 | } |
269 | 269 | ||
270 | event = get_new_event(task, timestamp); | 270 | event = get_new_event(task, timestamp); |
271 | 271 | ||
272 | event->type = SCHED_EVENT_RUN; | 272 | event->type = SCHED_EVENT_RUN; |
273 | event->duration = duration; | 273 | event->duration = duration; |
274 | 274 | ||
275 | nr_run_events++; | 275 | nr_run_events++; |
276 | } | 276 | } |
277 | 277 | ||
278 | static void | 278 | static void |
279 | add_sched_event_wakeup(struct task_desc *task, u64 timestamp, | 279 | add_sched_event_wakeup(struct task_desc *task, u64 timestamp, |
280 | struct task_desc *wakee) | 280 | struct task_desc *wakee) |
281 | { | 281 | { |
282 | struct sched_atom *event, *wakee_event; | 282 | struct sched_atom *event, *wakee_event; |
283 | 283 | ||
284 | event = get_new_event(task, timestamp); | 284 | event = get_new_event(task, timestamp); |
285 | event->type = SCHED_EVENT_WAKEUP; | 285 | event->type = SCHED_EVENT_WAKEUP; |
286 | event->wakee = wakee; | 286 | event->wakee = wakee; |
287 | 287 | ||
288 | wakee_event = last_event(wakee); | 288 | wakee_event = last_event(wakee); |
289 | if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { | 289 | if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { |
290 | targetless_wakeups++; | 290 | targetless_wakeups++; |
291 | return; | 291 | return; |
292 | } | 292 | } |
293 | if (wakee_event->wait_sem) { | 293 | if (wakee_event->wait_sem) { |
294 | multitarget_wakeups++; | 294 | multitarget_wakeups++; |
295 | return; | 295 | return; |
296 | } | 296 | } |
297 | 297 | ||
298 | wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem)); | 298 | wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem)); |
299 | sem_init(wakee_event->wait_sem, 0, 0); | 299 | sem_init(wakee_event->wait_sem, 0, 0); |
300 | wakee_event->specific_wait = 1; | 300 | wakee_event->specific_wait = 1; |
301 | event->wait_sem = wakee_event->wait_sem; | 301 | event->wait_sem = wakee_event->wait_sem; |
302 | 302 | ||
303 | nr_wakeup_events++; | 303 | nr_wakeup_events++; |
304 | } | 304 | } |
305 | 305 | ||
306 | static void | 306 | static void |
307 | add_sched_event_sleep(struct task_desc *task, u64 timestamp, | 307 | add_sched_event_sleep(struct task_desc *task, u64 timestamp, |
308 | u64 task_state __used) | 308 | u64 task_state __used) |
309 | { | 309 | { |
310 | struct sched_atom *event = get_new_event(task, timestamp); | 310 | struct sched_atom *event = get_new_event(task, timestamp); |
311 | 311 | ||
312 | event->type = SCHED_EVENT_SLEEP; | 312 | event->type = SCHED_EVENT_SLEEP; |
313 | 313 | ||
314 | nr_sleep_events++; | 314 | nr_sleep_events++; |
315 | } | 315 | } |
316 | 316 | ||
317 | static struct task_desc *register_pid(unsigned long pid, const char *comm) | 317 | static struct task_desc *register_pid(unsigned long pid, const char *comm) |
318 | { | 318 | { |
319 | struct task_desc *task; | 319 | struct task_desc *task; |
320 | 320 | ||
321 | BUG_ON(pid >= MAX_PID); | 321 | BUG_ON(pid >= MAX_PID); |
322 | 322 | ||
323 | task = pid_to_task[pid]; | 323 | task = pid_to_task[pid]; |
324 | 324 | ||
325 | if (task) | 325 | if (task) |
326 | return task; | 326 | return task; |
327 | 327 | ||
328 | task = calloc(1, sizeof(*task)); | 328 | task = calloc(1, sizeof(*task)); |
329 | task->pid = pid; | 329 | task->pid = pid; |
330 | task->nr = nr_tasks; | 330 | task->nr = nr_tasks; |
331 | strcpy(task->comm, comm); | 331 | strcpy(task->comm, comm); |
332 | /* | 332 | /* |
333 | * every task starts in sleeping state - this gets ignored | 333 | * every task starts in sleeping state - this gets ignored |
334 | * if there's no wakeup pointing to this sleep state: | 334 | * if there's no wakeup pointing to this sleep state: |
335 | */ | 335 | */ |
336 | add_sched_event_sleep(task, 0, 0); | 336 | add_sched_event_sleep(task, 0, 0); |
337 | 337 | ||
338 | pid_to_task[pid] = task; | 338 | pid_to_task[pid] = task; |
339 | nr_tasks++; | 339 | nr_tasks++; |
340 | tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); | 340 | tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); |
341 | BUG_ON(!tasks); | 341 | BUG_ON(!tasks); |
342 | tasks[task->nr] = task; | 342 | tasks[task->nr] = task; |
343 | 343 | ||
344 | if (verbose) | 344 | if (verbose) |
345 | printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); | 345 | printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); |
346 | 346 | ||
347 | return task; | 347 | return task; |
348 | } | 348 | } |
349 | 349 | ||
350 | 350 | ||
351 | static void print_task_traces(void) | 351 | static void print_task_traces(void) |
352 | { | 352 | { |
353 | struct task_desc *task; | 353 | struct task_desc *task; |
354 | unsigned long i; | 354 | unsigned long i; |
355 | 355 | ||
356 | for (i = 0; i < nr_tasks; i++) { | 356 | for (i = 0; i < nr_tasks; i++) { |
357 | task = tasks[i]; | 357 | task = tasks[i]; |
358 | printf("task %6ld (%20s:%10ld), nr_events: %ld\n", | 358 | printf("task %6ld (%20s:%10ld), nr_events: %ld\n", |
359 | task->nr, task->comm, task->pid, task->nr_events); | 359 | task->nr, task->comm, task->pid, task->nr_events); |
360 | } | 360 | } |
361 | } | 361 | } |
362 | 362 | ||
363 | static void add_cross_task_wakeups(void) | 363 | static void add_cross_task_wakeups(void) |
364 | { | 364 | { |
365 | struct task_desc *task1, *task2; | 365 | struct task_desc *task1, *task2; |
366 | unsigned long i, j; | 366 | unsigned long i, j; |
367 | 367 | ||
368 | for (i = 0; i < nr_tasks; i++) { | 368 | for (i = 0; i < nr_tasks; i++) { |
369 | task1 = tasks[i]; | 369 | task1 = tasks[i]; |
370 | j = i + 1; | 370 | j = i + 1; |
371 | if (j == nr_tasks) | 371 | if (j == nr_tasks) |
372 | j = 0; | 372 | j = 0; |
373 | task2 = tasks[j]; | 373 | task2 = tasks[j]; |
374 | add_sched_event_wakeup(task1, 0, task2); | 374 | add_sched_event_wakeup(task1, 0, task2); |
375 | } | 375 | } |
376 | } | 376 | } |
377 | 377 | ||
378 | static void | 378 | static void |
379 | process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) | 379 | process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) |
380 | { | 380 | { |
381 | int ret = 0; | 381 | int ret = 0; |
382 | u64 now; | 382 | u64 now; |
383 | long long delta; | 383 | long long delta; |
384 | 384 | ||
385 | now = get_nsecs(); | 385 | now = get_nsecs(); |
386 | delta = start_time + atom->timestamp - now; | 386 | delta = start_time + atom->timestamp - now; |
387 | 387 | ||
388 | switch (atom->type) { | 388 | switch (atom->type) { |
389 | case SCHED_EVENT_RUN: | 389 | case SCHED_EVENT_RUN: |
390 | burn_nsecs(atom->duration); | 390 | burn_nsecs(atom->duration); |
391 | break; | 391 | break; |
392 | case SCHED_EVENT_SLEEP: | 392 | case SCHED_EVENT_SLEEP: |
393 | if (atom->wait_sem) | 393 | if (atom->wait_sem) |
394 | ret = sem_wait(atom->wait_sem); | 394 | ret = sem_wait(atom->wait_sem); |
395 | BUG_ON(ret); | 395 | BUG_ON(ret); |
396 | break; | 396 | break; |
397 | case SCHED_EVENT_WAKEUP: | 397 | case SCHED_EVENT_WAKEUP: |
398 | if (atom->wait_sem) | 398 | if (atom->wait_sem) |
399 | ret = sem_post(atom->wait_sem); | 399 | ret = sem_post(atom->wait_sem); |
400 | BUG_ON(ret); | 400 | BUG_ON(ret); |
401 | break; | 401 | break; |
402 | default: | 402 | default: |
403 | BUG_ON(1); | 403 | BUG_ON(1); |
404 | } | 404 | } |
405 | } | 405 | } |
406 | 406 | ||
407 | static u64 get_cpu_usage_nsec_parent(void) | 407 | static u64 get_cpu_usage_nsec_parent(void) |
408 | { | 408 | { |
409 | struct rusage ru; | 409 | struct rusage ru; |
410 | u64 sum; | 410 | u64 sum; |
411 | int err; | 411 | int err; |
412 | 412 | ||
413 | err = getrusage(RUSAGE_SELF, &ru); | 413 | err = getrusage(RUSAGE_SELF, &ru); |
414 | BUG_ON(err); | 414 | BUG_ON(err); |
415 | 415 | ||
416 | sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; | 416 | sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; |
417 | sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; | 417 | sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; |
418 | 418 | ||
419 | return sum; | 419 | return sum; |
420 | } | 420 | } |
421 | 421 | ||
422 | static u64 get_cpu_usage_nsec_self(void) | 422 | static u64 get_cpu_usage_nsec_self(void) |
423 | { | 423 | { |
424 | char filename [] = "/proc/1234567890/sched"; | 424 | char filename [] = "/proc/1234567890/sched"; |
425 | unsigned long msecs, nsecs; | 425 | unsigned long msecs, nsecs; |
426 | char *line = NULL; | 426 | char *line = NULL; |
427 | u64 total = 0; | 427 | u64 total = 0; |
428 | size_t len = 0; | 428 | size_t len = 0; |
429 | ssize_t chars; | 429 | ssize_t chars; |
430 | FILE *file; | 430 | FILE *file; |
431 | int ret; | 431 | int ret; |
432 | 432 | ||
433 | sprintf(filename, "/proc/%d/sched", getpid()); | 433 | sprintf(filename, "/proc/%d/sched", getpid()); |
434 | file = fopen(filename, "r"); | 434 | file = fopen(filename, "r"); |
435 | BUG_ON(!file); | 435 | BUG_ON(!file); |
436 | 436 | ||
437 | while ((chars = getline(&line, &len, file)) != -1) { | 437 | while ((chars = getline(&line, &len, file)) != -1) { |
438 | ret = sscanf(line, "se.sum_exec_runtime : %ld.%06ld\n", | 438 | ret = sscanf(line, "se.sum_exec_runtime : %ld.%06ld\n", |
439 | &msecs, &nsecs); | 439 | &msecs, &nsecs); |
440 | if (ret == 2) { | 440 | if (ret == 2) { |
441 | total = msecs*1e6 + nsecs; | 441 | total = msecs*1e6 + nsecs; |
442 | break; | 442 | break; |
443 | } | 443 | } |
444 | } | 444 | } |
445 | if (line) | 445 | if (line) |
446 | free(line); | 446 | free(line); |
447 | fclose(file); | 447 | fclose(file); |
448 | 448 | ||
449 | return total; | 449 | return total; |
450 | } | 450 | } |
451 | 451 | ||
452 | static void *thread_func(void *ctx) | 452 | static void *thread_func(void *ctx) |
453 | { | 453 | { |
454 | struct task_desc *this_task = ctx; | 454 | struct task_desc *this_task = ctx; |
455 | u64 cpu_usage_0, cpu_usage_1; | 455 | u64 cpu_usage_0, cpu_usage_1; |
456 | unsigned long i, ret; | 456 | unsigned long i, ret; |
457 | char comm2[22]; | 457 | char comm2[22]; |
458 | 458 | ||
459 | sprintf(comm2, ":%s", this_task->comm); | 459 | sprintf(comm2, ":%s", this_task->comm); |
460 | prctl(PR_SET_NAME, comm2); | 460 | prctl(PR_SET_NAME, comm2); |
461 | 461 | ||
462 | again: | 462 | again: |
463 | ret = sem_post(&this_task->ready_for_work); | 463 | ret = sem_post(&this_task->ready_for_work); |
464 | BUG_ON(ret); | 464 | BUG_ON(ret); |
465 | ret = pthread_mutex_lock(&start_work_mutex); | 465 | ret = pthread_mutex_lock(&start_work_mutex); |
466 | BUG_ON(ret); | 466 | BUG_ON(ret); |
467 | ret = pthread_mutex_unlock(&start_work_mutex); | 467 | ret = pthread_mutex_unlock(&start_work_mutex); |
468 | BUG_ON(ret); | 468 | BUG_ON(ret); |
469 | 469 | ||
470 | cpu_usage_0 = get_cpu_usage_nsec_self(); | 470 | cpu_usage_0 = get_cpu_usage_nsec_self(); |
471 | 471 | ||
472 | for (i = 0; i < this_task->nr_events; i++) { | 472 | for (i = 0; i < this_task->nr_events; i++) { |
473 | this_task->curr_event = i; | 473 | this_task->curr_event = i; |
474 | process_sched_event(this_task, this_task->atoms[i]); | 474 | process_sched_event(this_task, this_task->atoms[i]); |
475 | } | 475 | } |
476 | 476 | ||
477 | cpu_usage_1 = get_cpu_usage_nsec_self(); | 477 | cpu_usage_1 = get_cpu_usage_nsec_self(); |
478 | this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; | 478 | this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; |
479 | 479 | ||
480 | ret = sem_post(&this_task->work_done_sem); | 480 | ret = sem_post(&this_task->work_done_sem); |
481 | BUG_ON(ret); | 481 | BUG_ON(ret); |
482 | 482 | ||
483 | ret = pthread_mutex_lock(&work_done_wait_mutex); | 483 | ret = pthread_mutex_lock(&work_done_wait_mutex); |
484 | BUG_ON(ret); | 484 | BUG_ON(ret); |
485 | ret = pthread_mutex_unlock(&work_done_wait_mutex); | 485 | ret = pthread_mutex_unlock(&work_done_wait_mutex); |
486 | BUG_ON(ret); | 486 | BUG_ON(ret); |
487 | 487 | ||
488 | goto again; | 488 | goto again; |
489 | } | 489 | } |
490 | 490 | ||
491 | static void create_tasks(void) | 491 | static void create_tasks(void) |
492 | { | 492 | { |
493 | struct task_desc *task; | 493 | struct task_desc *task; |
494 | pthread_attr_t attr; | 494 | pthread_attr_t attr; |
495 | unsigned long i; | 495 | unsigned long i; |
496 | int err; | 496 | int err; |
497 | 497 | ||
498 | err = pthread_attr_init(&attr); | 498 | err = pthread_attr_init(&attr); |
499 | BUG_ON(err); | 499 | BUG_ON(err); |
500 | err = pthread_attr_setstacksize(&attr, (size_t)(16*1024)); | 500 | err = pthread_attr_setstacksize(&attr, (size_t)(16*1024)); |
501 | BUG_ON(err); | 501 | BUG_ON(err); |
502 | err = pthread_mutex_lock(&start_work_mutex); | 502 | err = pthread_mutex_lock(&start_work_mutex); |
503 | BUG_ON(err); | 503 | BUG_ON(err); |
504 | err = pthread_mutex_lock(&work_done_wait_mutex); | 504 | err = pthread_mutex_lock(&work_done_wait_mutex); |
505 | BUG_ON(err); | 505 | BUG_ON(err); |
506 | for (i = 0; i < nr_tasks; i++) { | 506 | for (i = 0; i < nr_tasks; i++) { |
507 | task = tasks[i]; | 507 | task = tasks[i]; |
508 | sem_init(&task->sleep_sem, 0, 0); | 508 | sem_init(&task->sleep_sem, 0, 0); |
509 | sem_init(&task->ready_for_work, 0, 0); | 509 | sem_init(&task->ready_for_work, 0, 0); |
510 | sem_init(&task->work_done_sem, 0, 0); | 510 | sem_init(&task->work_done_sem, 0, 0); |
511 | task->curr_event = 0; | 511 | task->curr_event = 0; |
512 | err = pthread_create(&task->thread, &attr, thread_func, task); | 512 | err = pthread_create(&task->thread, &attr, thread_func, task); |
513 | BUG_ON(err); | 513 | BUG_ON(err); |
514 | } | 514 | } |
515 | } | 515 | } |
516 | 516 | ||
517 | static void wait_for_tasks(void) | 517 | static void wait_for_tasks(void) |
518 | { | 518 | { |
519 | u64 cpu_usage_0, cpu_usage_1; | 519 | u64 cpu_usage_0, cpu_usage_1; |
520 | struct task_desc *task; | 520 | struct task_desc *task; |
521 | unsigned long i, ret; | 521 | unsigned long i, ret; |
522 | 522 | ||
523 | start_time = get_nsecs(); | 523 | start_time = get_nsecs(); |
524 | cpu_usage = 0; | 524 | cpu_usage = 0; |
525 | pthread_mutex_unlock(&work_done_wait_mutex); | 525 | pthread_mutex_unlock(&work_done_wait_mutex); |
526 | 526 | ||
527 | for (i = 0; i < nr_tasks; i++) { | 527 | for (i = 0; i < nr_tasks; i++) { |
528 | task = tasks[i]; | 528 | task = tasks[i]; |
529 | ret = sem_wait(&task->ready_for_work); | 529 | ret = sem_wait(&task->ready_for_work); |
530 | BUG_ON(ret); | 530 | BUG_ON(ret); |
531 | sem_init(&task->ready_for_work, 0, 0); | 531 | sem_init(&task->ready_for_work, 0, 0); |
532 | } | 532 | } |
533 | ret = pthread_mutex_lock(&work_done_wait_mutex); | 533 | ret = pthread_mutex_lock(&work_done_wait_mutex); |
534 | BUG_ON(ret); | 534 | BUG_ON(ret); |
535 | 535 | ||
536 | cpu_usage_0 = get_cpu_usage_nsec_parent(); | 536 | cpu_usage_0 = get_cpu_usage_nsec_parent(); |
537 | 537 | ||
538 | pthread_mutex_unlock(&start_work_mutex); | 538 | pthread_mutex_unlock(&start_work_mutex); |
539 | 539 | ||
540 | for (i = 0; i < nr_tasks; i++) { | 540 | for (i = 0; i < nr_tasks; i++) { |
541 | task = tasks[i]; | 541 | task = tasks[i]; |
542 | ret = sem_wait(&task->work_done_sem); | 542 | ret = sem_wait(&task->work_done_sem); |
543 | BUG_ON(ret); | 543 | BUG_ON(ret); |
544 | sem_init(&task->work_done_sem, 0, 0); | 544 | sem_init(&task->work_done_sem, 0, 0); |
545 | cpu_usage += task->cpu_usage; | 545 | cpu_usage += task->cpu_usage; |
546 | task->cpu_usage = 0; | 546 | task->cpu_usage = 0; |
547 | } | 547 | } |
548 | 548 | ||
549 | cpu_usage_1 = get_cpu_usage_nsec_parent(); | 549 | cpu_usage_1 = get_cpu_usage_nsec_parent(); |
550 | if (!runavg_cpu_usage) | 550 | if (!runavg_cpu_usage) |
551 | runavg_cpu_usage = cpu_usage; | 551 | runavg_cpu_usage = cpu_usage; |
552 | runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; | 552 | runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; |
553 | 553 | ||
554 | parent_cpu_usage = cpu_usage_1 - cpu_usage_0; | 554 | parent_cpu_usage = cpu_usage_1 - cpu_usage_0; |
555 | if (!runavg_parent_cpu_usage) | 555 | if (!runavg_parent_cpu_usage) |
556 | runavg_parent_cpu_usage = parent_cpu_usage; | 556 | runavg_parent_cpu_usage = parent_cpu_usage; |
557 | runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + | 557 | runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + |
558 | parent_cpu_usage)/10; | 558 | parent_cpu_usage)/10; |
559 | 559 | ||
560 | ret = pthread_mutex_lock(&start_work_mutex); | 560 | ret = pthread_mutex_lock(&start_work_mutex); |
561 | BUG_ON(ret); | 561 | BUG_ON(ret); |
562 | 562 | ||
563 | for (i = 0; i < nr_tasks; i++) { | 563 | for (i = 0; i < nr_tasks; i++) { |
564 | task = tasks[i]; | 564 | task = tasks[i]; |
565 | sem_init(&task->sleep_sem, 0, 0); | 565 | sem_init(&task->sleep_sem, 0, 0); |
566 | task->curr_event = 0; | 566 | task->curr_event = 0; |
567 | } | 567 | } |
568 | } | 568 | } |
569 | 569 | ||
570 | static void run_one_test(void) | 570 | static void run_one_test(void) |
571 | { | 571 | { |
572 | u64 T0, T1, delta, avg_delta, fluct, std_dev; | 572 | u64 T0, T1, delta, avg_delta, fluct, std_dev; |
573 | 573 | ||
574 | T0 = get_nsecs(); | 574 | T0 = get_nsecs(); |
575 | wait_for_tasks(); | 575 | wait_for_tasks(); |
576 | T1 = get_nsecs(); | 576 | T1 = get_nsecs(); |
577 | 577 | ||
578 | delta = T1 - T0; | 578 | delta = T1 - T0; |
579 | sum_runtime += delta; | 579 | sum_runtime += delta; |
580 | nr_runs++; | 580 | nr_runs++; |
581 | 581 | ||
582 | avg_delta = sum_runtime / nr_runs; | 582 | avg_delta = sum_runtime / nr_runs; |
583 | if (delta < avg_delta) | 583 | if (delta < avg_delta) |
584 | fluct = avg_delta - delta; | 584 | fluct = avg_delta - delta; |
585 | else | 585 | else |
586 | fluct = delta - avg_delta; | 586 | fluct = delta - avg_delta; |
587 | sum_fluct += fluct; | 587 | sum_fluct += fluct; |
588 | std_dev = sum_fluct / nr_runs / sqrt(nr_runs); | 588 | std_dev = sum_fluct / nr_runs / sqrt(nr_runs); |
589 | if (!run_avg) | 589 | if (!run_avg) |
590 | run_avg = delta; | 590 | run_avg = delta; |
591 | run_avg = (run_avg*9 + delta)/10; | 591 | run_avg = (run_avg*9 + delta)/10; |
592 | 592 | ||
593 | printf("#%-3ld: %0.3f, ", | 593 | printf("#%-3ld: %0.3f, ", |
594 | nr_runs, (double)delta/1000000.0); | 594 | nr_runs, (double)delta/1000000.0); |
595 | 595 | ||
596 | printf("ravg: %0.2f, ", | 596 | printf("ravg: %0.2f, ", |
597 | (double)run_avg/1e6); | 597 | (double)run_avg/1e6); |
598 | 598 | ||
599 | printf("cpu: %0.2f / %0.2f", | 599 | printf("cpu: %0.2f / %0.2f", |
600 | (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); | 600 | (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); |
601 | 601 | ||
602 | #if 0 | 602 | #if 0 |
603 | /* | 603 | /* |
604 | * rusage statistics done by the parent, these are less | 604 | * rusage statistics done by the parent, these are less |
605 | * accurate than the sum_exec_runtime based statistics: | 605 | * accurate than the sum_exec_runtime based statistics: |
606 | */ | 606 | */ |
607 | printf(" [%0.2f / %0.2f]", | 607 | printf(" [%0.2f / %0.2f]", |
608 | (double)parent_cpu_usage/1e6, | 608 | (double)parent_cpu_usage/1e6, |
609 | (double)runavg_parent_cpu_usage/1e6); | 609 | (double)runavg_parent_cpu_usage/1e6); |
610 | #endif | 610 | #endif |
611 | 611 | ||
612 | printf("\n"); | 612 | printf("\n"); |
613 | 613 | ||
614 | if (nr_sleep_corrections) | 614 | if (nr_sleep_corrections) |
615 | printf(" (%ld sleep corrections)\n", nr_sleep_corrections); | 615 | printf(" (%ld sleep corrections)\n", nr_sleep_corrections); |
616 | nr_sleep_corrections = 0; | 616 | nr_sleep_corrections = 0; |
617 | } | 617 | } |
618 | 618 | ||
619 | static void test_calibrations(void) | 619 | static void test_calibrations(void) |
620 | { | 620 | { |
621 | u64 T0, T1; | 621 | u64 T0, T1; |
622 | 622 | ||
623 | T0 = get_nsecs(); | 623 | T0 = get_nsecs(); |
624 | burn_nsecs(1e6); | 624 | burn_nsecs(1e6); |
625 | T1 = get_nsecs(); | 625 | T1 = get_nsecs(); |
626 | 626 | ||
627 | printf("the run test took %Ld nsecs\n", T1-T0); | 627 | printf("the run test took %Ld nsecs\n", T1-T0); |
628 | 628 | ||
629 | T0 = get_nsecs(); | 629 | T0 = get_nsecs(); |
630 | sleep_nsecs(1e6); | 630 | sleep_nsecs(1e6); |
631 | T1 = get_nsecs(); | 631 | T1 = get_nsecs(); |
632 | 632 | ||
633 | printf("the sleep test took %Ld nsecs\n", T1-T0); | 633 | printf("the sleep test took %Ld nsecs\n", T1-T0); |
634 | } | 634 | } |
635 | 635 | ||
636 | static int | 636 | static int |
637 | process_comm_event(event_t *event, unsigned long offset, unsigned long head) | 637 | process_comm_event(event_t *event, unsigned long offset, unsigned long head) |
638 | { | 638 | { |
639 | struct thread *thread; | 639 | struct thread *thread; |
640 | 640 | ||
641 | thread = threads__findnew(event->comm.pid, &threads, &last_match); | 641 | thread = threads__findnew(event->comm.pid, &threads, &last_match); |
642 | 642 | ||
643 | dump_printf("%p [%p]: perf_event_comm: %s:%d\n", | 643 | dump_printf("%p [%p]: perf_event_comm: %s:%d\n", |
644 | (void *)(offset + head), | 644 | (void *)(offset + head), |
645 | (void *)(long)(event->header.size), | 645 | (void *)(long)(event->header.size), |
646 | event->comm.comm, event->comm.pid); | 646 | event->comm.comm, event->comm.pid); |
647 | 647 | ||
648 | if (thread == NULL || | 648 | if (thread == NULL || |
649 | thread__set_comm(thread, event->comm.comm)) { | 649 | thread__set_comm(thread, event->comm.comm)) { |
650 | dump_printf("problem processing perf_event_comm, skipping event.\n"); | 650 | dump_printf("problem processing perf_event_comm, skipping event.\n"); |
651 | return -1; | 651 | return -1; |
652 | } | 652 | } |
653 | total_comm++; | 653 | total_comm++; |
654 | 654 | ||
655 | return 0; | 655 | return 0; |
656 | } | 656 | } |
657 | 657 | ||
658 | 658 | ||
659 | struct raw_event_sample { | 659 | struct raw_event_sample { |
660 | u32 size; | 660 | u32 size; |
661 | char data[0]; | 661 | char data[0]; |
662 | }; | 662 | }; |
663 | 663 | ||
664 | #define FILL_FIELD(ptr, field, event, data) \ | 664 | #define FILL_FIELD(ptr, field, event, data) \ |
665 | ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) | 665 | ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) |
666 | 666 | ||
667 | #define FILL_ARRAY(ptr, array, event, data) \ | 667 | #define FILL_ARRAY(ptr, array, event, data) \ |
668 | do { \ | 668 | do { \ |
669 | void *__array = raw_field_ptr(event, #array, data); \ | 669 | void *__array = raw_field_ptr(event, #array, data); \ |
670 | memcpy(ptr.array, __array, sizeof(ptr.array)); \ | 670 | memcpy(ptr.array, __array, sizeof(ptr.array)); \ |
671 | } while(0) | 671 | } while(0) |
672 | 672 | ||
673 | #define FILL_COMMON_FIELDS(ptr, event, data) \ | 673 | #define FILL_COMMON_FIELDS(ptr, event, data) \ |
674 | do { \ | 674 | do { \ |
675 | FILL_FIELD(ptr, common_type, event, data); \ | 675 | FILL_FIELD(ptr, common_type, event, data); \ |
676 | FILL_FIELD(ptr, common_flags, event, data); \ | 676 | FILL_FIELD(ptr, common_flags, event, data); \ |
677 | FILL_FIELD(ptr, common_preempt_count, event, data); \ | 677 | FILL_FIELD(ptr, common_preempt_count, event, data); \ |
678 | FILL_FIELD(ptr, common_pid, event, data); \ | 678 | FILL_FIELD(ptr, common_pid, event, data); \ |
679 | FILL_FIELD(ptr, common_tgid, event, data); \ | 679 | FILL_FIELD(ptr, common_tgid, event, data); \ |
680 | } while (0) | 680 | } while (0) |
681 | 681 | ||
682 | 682 | ||
683 | 683 | ||
684 | struct trace_switch_event { | 684 | struct trace_switch_event { |
685 | u32 size; | 685 | u32 size; |
686 | 686 | ||
687 | u16 common_type; | 687 | u16 common_type; |
688 | u8 common_flags; | 688 | u8 common_flags; |
689 | u8 common_preempt_count; | 689 | u8 common_preempt_count; |
690 | u32 common_pid; | 690 | u32 common_pid; |
691 | u32 common_tgid; | 691 | u32 common_tgid; |
692 | 692 | ||
693 | char prev_comm[16]; | 693 | char prev_comm[16]; |
694 | u32 prev_pid; | 694 | u32 prev_pid; |
695 | u32 prev_prio; | 695 | u32 prev_prio; |
696 | u64 prev_state; | 696 | u64 prev_state; |
697 | char next_comm[16]; | 697 | char next_comm[16]; |
698 | u32 next_pid; | 698 | u32 next_pid; |
699 | u32 next_prio; | 699 | u32 next_prio; |
700 | }; | 700 | }; |
701 | 701 | ||
702 | struct trace_runtime_event { | 702 | struct trace_runtime_event { |
703 | u32 size; | 703 | u32 size; |
704 | 704 | ||
705 | u16 common_type; | 705 | u16 common_type; |
706 | u8 common_flags; | 706 | u8 common_flags; |
707 | u8 common_preempt_count; | 707 | u8 common_preempt_count; |
708 | u32 common_pid; | 708 | u32 common_pid; |
709 | u32 common_tgid; | 709 | u32 common_tgid; |
710 | 710 | ||
711 | char comm[16]; | 711 | char comm[16]; |
712 | u32 pid; | 712 | u32 pid; |
713 | u64 runtime; | 713 | u64 runtime; |
714 | u64 vruntime; | 714 | u64 vruntime; |
715 | }; | 715 | }; |
716 | 716 | ||
717 | struct trace_wakeup_event { | 717 | struct trace_wakeup_event { |
718 | u32 size; | 718 | u32 size; |
719 | 719 | ||
720 | u16 common_type; | 720 | u16 common_type; |
721 | u8 common_flags; | 721 | u8 common_flags; |
722 | u8 common_preempt_count; | 722 | u8 common_preempt_count; |
723 | u32 common_pid; | 723 | u32 common_pid; |
724 | u32 common_tgid; | 724 | u32 common_tgid; |
725 | 725 | ||
726 | char comm[16]; | 726 | char comm[16]; |
727 | u32 pid; | 727 | u32 pid; |
728 | 728 | ||
729 | u32 prio; | 729 | u32 prio; |
730 | u32 success; | 730 | u32 success; |
731 | u32 cpu; | 731 | u32 cpu; |
732 | }; | 732 | }; |
733 | 733 | ||
734 | struct trace_fork_event { | 734 | struct trace_fork_event { |
735 | u32 size; | 735 | u32 size; |
736 | 736 | ||
737 | u16 common_type; | 737 | u16 common_type; |
738 | u8 common_flags; | 738 | u8 common_flags; |
739 | u8 common_preempt_count; | 739 | u8 common_preempt_count; |
740 | u32 common_pid; | 740 | u32 common_pid; |
741 | u32 common_tgid; | 741 | u32 common_tgid; |
742 | 742 | ||
743 | char parent_comm[16]; | 743 | char parent_comm[16]; |
744 | u32 parent_pid; | 744 | u32 parent_pid; |
745 | char child_comm[16]; | 745 | char child_comm[16]; |
746 | u32 child_pid; | 746 | u32 child_pid; |
747 | }; | 747 | }; |
748 | 748 | ||
749 | struct trace_sched_handler { | 749 | struct trace_sched_handler { |
750 | void (*switch_event)(struct trace_switch_event *, | 750 | void (*switch_event)(struct trace_switch_event *, |
751 | struct event *, | 751 | struct event *, |
752 | int cpu, | 752 | int cpu, |
753 | u64 timestamp, | 753 | u64 timestamp, |
754 | struct thread *thread); | 754 | struct thread *thread); |
755 | 755 | ||
756 | void (*runtime_event)(struct trace_runtime_event *, | 756 | void (*runtime_event)(struct trace_runtime_event *, |
757 | struct event *, | 757 | struct event *, |
758 | int cpu, | 758 | int cpu, |
759 | u64 timestamp, | 759 | u64 timestamp, |
760 | struct thread *thread); | 760 | struct thread *thread); |
761 | 761 | ||
762 | void (*wakeup_event)(struct trace_wakeup_event *, | 762 | void (*wakeup_event)(struct trace_wakeup_event *, |
763 | struct event *, | 763 | struct event *, |
764 | int cpu, | 764 | int cpu, |
765 | u64 timestamp, | 765 | u64 timestamp, |
766 | struct thread *thread); | 766 | struct thread *thread); |
767 | 767 | ||
768 | void (*fork_event)(struct trace_fork_event *, | 768 | void (*fork_event)(struct trace_fork_event *, |
769 | struct event *, | 769 | struct event *, |
770 | int cpu, | 770 | int cpu, |
771 | u64 timestamp, | 771 | u64 timestamp, |
772 | struct thread *thread); | 772 | struct thread *thread); |
773 | }; | 773 | }; |
774 | 774 | ||
775 | 775 | ||
776 | static void | 776 | static void |
777 | replay_wakeup_event(struct trace_wakeup_event *wakeup_event, | 777 | replay_wakeup_event(struct trace_wakeup_event *wakeup_event, |
778 | struct event *event, | 778 | struct event *event, |
779 | int cpu __used, | 779 | int cpu __used, |
780 | u64 timestamp __used, | 780 | u64 timestamp __used, |
781 | struct thread *thread __used) | 781 | struct thread *thread __used) |
782 | { | 782 | { |
783 | struct task_desc *waker, *wakee; | 783 | struct task_desc *waker, *wakee; |
784 | 784 | ||
785 | if (verbose) { | 785 | if (verbose) { |
786 | printf("sched_wakeup event %p\n", event); | 786 | printf("sched_wakeup event %p\n", event); |
787 | 787 | ||
788 | printf(" ... pid %d woke up %s/%d\n", | 788 | printf(" ... pid %d woke up %s/%d\n", |
789 | wakeup_event->common_pid, | 789 | wakeup_event->common_pid, |
790 | wakeup_event->comm, | 790 | wakeup_event->comm, |
791 | wakeup_event->pid); | 791 | wakeup_event->pid); |
792 | } | 792 | } |
793 | 793 | ||
794 | waker = register_pid(wakeup_event->common_pid, "<unknown>"); | 794 | waker = register_pid(wakeup_event->common_pid, "<unknown>"); |
795 | wakee = register_pid(wakeup_event->pid, wakeup_event->comm); | 795 | wakee = register_pid(wakeup_event->pid, wakeup_event->comm); |
796 | 796 | ||
797 | add_sched_event_wakeup(waker, timestamp, wakee); | 797 | add_sched_event_wakeup(waker, timestamp, wakee); |
798 | } | 798 | } |
799 | 799 | ||
800 | static u64 cpu_last_switched[MAX_CPUS]; | 800 | static u64 cpu_last_switched[MAX_CPUS]; |
801 | 801 | ||
802 | static void | 802 | static void |
803 | replay_switch_event(struct trace_switch_event *switch_event, | 803 | replay_switch_event(struct trace_switch_event *switch_event, |
804 | struct event *event, | 804 | struct event *event, |
805 | int cpu, | 805 | int cpu, |
806 | u64 timestamp, | 806 | u64 timestamp, |
807 | struct thread *thread __used) | 807 | struct thread *thread __used) |
808 | { | 808 | { |
809 | struct task_desc *prev, *next; | 809 | struct task_desc *prev, *next; |
810 | u64 timestamp0; | 810 | u64 timestamp0; |
811 | s64 delta; | 811 | s64 delta; |
812 | 812 | ||
813 | if (verbose) | 813 | if (verbose) |
814 | printf("sched_switch event %p\n", event); | 814 | printf("sched_switch event %p\n", event); |
815 | 815 | ||
816 | if (cpu >= MAX_CPUS || cpu < 0) | 816 | if (cpu >= MAX_CPUS || cpu < 0) |
817 | return; | 817 | return; |
818 | 818 | ||
819 | timestamp0 = cpu_last_switched[cpu]; | 819 | timestamp0 = cpu_last_switched[cpu]; |
820 | if (timestamp0) | 820 | if (timestamp0) |
821 | delta = timestamp - timestamp0; | 821 | delta = timestamp - timestamp0; |
822 | else | 822 | else |
823 | delta = 0; | 823 | delta = 0; |
824 | 824 | ||
825 | if (delta < 0) | 825 | if (delta < 0) |
826 | die("hm, delta: %Ld < 0 ?\n", delta); | 826 | die("hm, delta: %Ld < 0 ?\n", delta); |
827 | 827 | ||
828 | if (verbose) { | 828 | if (verbose) { |
829 | printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n", | 829 | printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n", |
830 | switch_event->prev_comm, switch_event->prev_pid, | 830 | switch_event->prev_comm, switch_event->prev_pid, |
831 | switch_event->next_comm, switch_event->next_pid, | 831 | switch_event->next_comm, switch_event->next_pid, |
832 | delta); | 832 | delta); |
833 | } | 833 | } |
834 | 834 | ||
835 | prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); | 835 | prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); |
836 | next = register_pid(switch_event->next_pid, switch_event->next_comm); | 836 | next = register_pid(switch_event->next_pid, switch_event->next_comm); |
837 | 837 | ||
838 | cpu_last_switched[cpu] = timestamp; | 838 | cpu_last_switched[cpu] = timestamp; |
839 | 839 | ||
840 | add_sched_event_run(prev, timestamp, delta); | 840 | add_sched_event_run(prev, timestamp, delta); |
841 | add_sched_event_sleep(prev, timestamp, switch_event->prev_state); | 841 | add_sched_event_sleep(prev, timestamp, switch_event->prev_state); |
842 | } | 842 | } |
843 | 843 | ||
844 | 844 | ||
845 | static void | 845 | static void |
846 | replay_fork_event(struct trace_fork_event *fork_event, | 846 | replay_fork_event(struct trace_fork_event *fork_event, |
847 | struct event *event, | 847 | struct event *event, |
848 | int cpu __used, | 848 | int cpu __used, |
849 | u64 timestamp __used, | 849 | u64 timestamp __used, |
850 | struct thread *thread __used) | 850 | struct thread *thread __used) |
851 | { | 851 | { |
852 | if (verbose) { | 852 | if (verbose) { |
853 | printf("sched_fork event %p\n", event); | 853 | printf("sched_fork event %p\n", event); |
854 | printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); | 854 | printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); |
855 | printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); | 855 | printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); |
856 | } | 856 | } |
857 | register_pid(fork_event->parent_pid, fork_event->parent_comm); | 857 | register_pid(fork_event->parent_pid, fork_event->parent_comm); |
858 | register_pid(fork_event->child_pid, fork_event->child_comm); | 858 | register_pid(fork_event->child_pid, fork_event->child_comm); |
859 | } | 859 | } |
860 | 860 | ||
861 | static struct trace_sched_handler replay_ops = { | 861 | static struct trace_sched_handler replay_ops = { |
862 | .wakeup_event = replay_wakeup_event, | 862 | .wakeup_event = replay_wakeup_event, |
863 | .switch_event = replay_switch_event, | 863 | .switch_event = replay_switch_event, |
864 | .fork_event = replay_fork_event, | 864 | .fork_event = replay_fork_event, |
865 | }; | 865 | }; |
866 | 866 | ||
867 | struct sort_dimension { | 867 | struct sort_dimension { |
868 | const char *name; | 868 | const char *name; |
869 | sort_fn_t cmp; | 869 | sort_fn_t cmp; |
870 | struct list_head list; | 870 | struct list_head list; |
871 | }; | 871 | }; |
872 | 872 | ||
873 | static LIST_HEAD(cmp_pid); | 873 | static LIST_HEAD(cmp_pid); |
874 | 874 | ||
875 | static int | 875 | static int |
876 | thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) | 876 | thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) |
877 | { | 877 | { |
878 | struct sort_dimension *sort; | 878 | struct sort_dimension *sort; |
879 | int ret = 0; | 879 | int ret = 0; |
880 | 880 | ||
881 | BUG_ON(list_empty(list)); | 881 | BUG_ON(list_empty(list)); |
882 | 882 | ||
883 | list_for_each_entry(sort, list, list) { | 883 | list_for_each_entry(sort, list, list) { |
884 | ret = sort->cmp(l, r); | 884 | ret = sort->cmp(l, r); |
885 | if (ret) | 885 | if (ret) |
886 | return ret; | 886 | return ret; |
887 | } | 887 | } |
888 | 888 | ||
889 | return ret; | 889 | return ret; |
890 | } | 890 | } |
891 | 891 | ||
892 | static struct work_atoms * | 892 | static struct work_atoms * |
893 | thread_atoms_search(struct rb_root *root, struct thread *thread, | 893 | thread_atoms_search(struct rb_root *root, struct thread *thread, |
894 | struct list_head *sort_list) | 894 | struct list_head *sort_list) |
895 | { | 895 | { |
896 | struct rb_node *node = root->rb_node; | 896 | struct rb_node *node = root->rb_node; |
897 | struct work_atoms key = { .thread = thread }; | 897 | struct work_atoms key = { .thread = thread }; |
898 | 898 | ||
899 | while (node) { | 899 | while (node) { |
900 | struct work_atoms *atoms; | 900 | struct work_atoms *atoms; |
901 | int cmp; | 901 | int cmp; |
902 | 902 | ||
903 | atoms = container_of(node, struct work_atoms, node); | 903 | atoms = container_of(node, struct work_atoms, node); |
904 | 904 | ||
905 | cmp = thread_lat_cmp(sort_list, &key, atoms); | 905 | cmp = thread_lat_cmp(sort_list, &key, atoms); |
906 | if (cmp > 0) | 906 | if (cmp > 0) |
907 | node = node->rb_left; | 907 | node = node->rb_left; |
908 | else if (cmp < 0) | 908 | else if (cmp < 0) |
909 | node = node->rb_right; | 909 | node = node->rb_right; |
910 | else { | 910 | else { |
911 | BUG_ON(thread != atoms->thread); | 911 | BUG_ON(thread != atoms->thread); |
912 | return atoms; | 912 | return atoms; |
913 | } | 913 | } |
914 | } | 914 | } |
915 | return NULL; | 915 | return NULL; |
916 | } | 916 | } |
917 | 917 | ||
918 | static void | 918 | static void |
919 | __thread_latency_insert(struct rb_root *root, struct work_atoms *data, | 919 | __thread_latency_insert(struct rb_root *root, struct work_atoms *data, |
920 | struct list_head *sort_list) | 920 | struct list_head *sort_list) |
921 | { | 921 | { |
922 | struct rb_node **new = &(root->rb_node), *parent = NULL; | 922 | struct rb_node **new = &(root->rb_node), *parent = NULL; |
923 | 923 | ||
924 | while (*new) { | 924 | while (*new) { |
925 | struct work_atoms *this; | 925 | struct work_atoms *this; |
926 | int cmp; | 926 | int cmp; |
927 | 927 | ||
928 | this = container_of(*new, struct work_atoms, node); | 928 | this = container_of(*new, struct work_atoms, node); |
929 | parent = *new; | 929 | parent = *new; |
930 | 930 | ||
931 | cmp = thread_lat_cmp(sort_list, data, this); | 931 | cmp = thread_lat_cmp(sort_list, data, this); |
932 | 932 | ||
933 | if (cmp > 0) | 933 | if (cmp > 0) |
934 | new = &((*new)->rb_left); | 934 | new = &((*new)->rb_left); |
935 | else | 935 | else |
936 | new = &((*new)->rb_right); | 936 | new = &((*new)->rb_right); |
937 | } | 937 | } |
938 | 938 | ||
939 | rb_link_node(&data->node, parent, new); | 939 | rb_link_node(&data->node, parent, new); |
940 | rb_insert_color(&data->node, root); | 940 | rb_insert_color(&data->node, root); |
941 | } | 941 | } |
942 | 942 | ||
943 | static void thread_atoms_insert(struct thread *thread) | 943 | static void thread_atoms_insert(struct thread *thread) |
944 | { | 944 | { |
945 | struct work_atoms *atoms; | 945 | struct work_atoms *atoms; |
946 | 946 | ||
947 | atoms = calloc(sizeof(*atoms), 1); | 947 | atoms = calloc(sizeof(*atoms), 1); |
948 | if (!atoms) | 948 | if (!atoms) |
949 | die("No memory"); | 949 | die("No memory"); |
950 | 950 | ||
951 | atoms->thread = thread; | 951 | atoms->thread = thread; |
952 | INIT_LIST_HEAD(&atoms->work_list); | 952 | INIT_LIST_HEAD(&atoms->work_list); |
953 | __thread_latency_insert(&atom_root, atoms, &cmp_pid); | 953 | __thread_latency_insert(&atom_root, atoms, &cmp_pid); |
954 | } | 954 | } |
955 | 955 | ||
956 | static void | 956 | static void |
957 | latency_fork_event(struct trace_fork_event *fork_event __used, | 957 | latency_fork_event(struct trace_fork_event *fork_event __used, |
958 | struct event *event __used, | 958 | struct event *event __used, |
959 | int cpu __used, | 959 | int cpu __used, |
960 | u64 timestamp __used, | 960 | u64 timestamp __used, |
961 | struct thread *thread __used) | 961 | struct thread *thread __used) |
962 | { | 962 | { |
963 | /* should insert the newcomer */ | 963 | /* should insert the newcomer */ |
964 | } | 964 | } |
965 | 965 | ||
966 | __used | 966 | __used |
967 | static char sched_out_state(struct trace_switch_event *switch_event) | 967 | static char sched_out_state(struct trace_switch_event *switch_event) |
968 | { | 968 | { |
969 | const char *str = TASK_STATE_TO_CHAR_STR; | 969 | const char *str = TASK_STATE_TO_CHAR_STR; |
970 | 970 | ||
971 | return str[switch_event->prev_state]; | 971 | return str[switch_event->prev_state]; |
972 | } | 972 | } |
973 | 973 | ||
974 | static void | 974 | static void |
975 | add_sched_out_event(struct work_atoms *atoms, | 975 | add_sched_out_event(struct work_atoms *atoms, |
976 | char run_state, | 976 | char run_state, |
977 | u64 timestamp) | 977 | u64 timestamp) |
978 | { | 978 | { |
979 | struct work_atom *atom; | 979 | struct work_atom *atom; |
980 | 980 | ||
981 | atom = calloc(sizeof(*atom), 1); | 981 | atom = calloc(sizeof(*atom), 1); |
982 | if (!atom) | 982 | if (!atom) |
983 | die("Non memory"); | 983 | die("Non memory"); |
984 | 984 | ||
985 | atom->sched_out_time = timestamp; | 985 | atom->sched_out_time = timestamp; |
986 | 986 | ||
987 | if (run_state == 'R') { | 987 | if (run_state == 'R') { |
988 | atom->state = THREAD_WAIT_CPU; | 988 | atom->state = THREAD_WAIT_CPU; |
989 | atom->wake_up_time = atom->sched_out_time; | 989 | atom->wake_up_time = atom->sched_out_time; |
990 | } | 990 | } |
991 | 991 | ||
992 | list_add_tail(&atom->list, &atoms->work_list); | 992 | list_add_tail(&atom->list, &atoms->work_list); |
993 | } | 993 | } |
994 | 994 | ||
995 | static void | 995 | static void |
996 | add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) | 996 | add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) |
997 | { | 997 | { |
998 | struct work_atom *atom; | 998 | struct work_atom *atom; |
999 | 999 | ||
1000 | BUG_ON(list_empty(&atoms->work_list)); | 1000 | BUG_ON(list_empty(&atoms->work_list)); |
1001 | 1001 | ||
1002 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); | 1002 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); |
1003 | 1003 | ||
1004 | atom->runtime += delta; | 1004 | atom->runtime += delta; |
1005 | atoms->total_runtime += delta; | 1005 | atoms->total_runtime += delta; |
1006 | } | 1006 | } |
1007 | 1007 | ||
1008 | static void | 1008 | static void |
1009 | add_sched_in_event(struct work_atoms *atoms, u64 timestamp) | 1009 | add_sched_in_event(struct work_atoms *atoms, u64 timestamp) |
1010 | { | 1010 | { |
1011 | struct work_atom *atom; | 1011 | struct work_atom *atom; |
1012 | u64 delta; | 1012 | u64 delta; |
1013 | 1013 | ||
1014 | if (list_empty(&atoms->work_list)) | 1014 | if (list_empty(&atoms->work_list)) |
1015 | return; | 1015 | return; |
1016 | 1016 | ||
1017 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); | 1017 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); |
1018 | 1018 | ||
1019 | if (atom->state != THREAD_WAIT_CPU) | 1019 | if (atom->state != THREAD_WAIT_CPU) |
1020 | return; | 1020 | return; |
1021 | 1021 | ||
1022 | if (timestamp < atom->wake_up_time) { | 1022 | if (timestamp < atom->wake_up_time) { |
1023 | atom->state = THREAD_IGNORE; | 1023 | atom->state = THREAD_IGNORE; |
1024 | return; | 1024 | return; |
1025 | } | 1025 | } |
1026 | 1026 | ||
1027 | atom->state = THREAD_SCHED_IN; | 1027 | atom->state = THREAD_SCHED_IN; |
1028 | atom->sched_in_time = timestamp; | 1028 | atom->sched_in_time = timestamp; |
1029 | 1029 | ||
1030 | delta = atom->sched_in_time - atom->wake_up_time; | 1030 | delta = atom->sched_in_time - atom->wake_up_time; |
1031 | atoms->total_lat += delta; | 1031 | atoms->total_lat += delta; |
1032 | if (delta > atoms->max_lat) | 1032 | if (delta > atoms->max_lat) |
1033 | atoms->max_lat = delta; | 1033 | atoms->max_lat = delta; |
1034 | atoms->nb_atoms++; | 1034 | atoms->nb_atoms++; |
1035 | } | 1035 | } |
1036 | 1036 | ||
1037 | static struct thread * | ||
1038 | threads__findnew_from_ctx(u32 pid, struct trace_switch_event *switch_event) | ||
1039 | { | ||
1040 | struct thread *th; | ||
1041 | |||
1042 | th = threads__findnew_nocomm(pid, &threads, &last_match); | ||
1043 | if (th->comm) | ||
1044 | return th; | ||
1045 | |||
1046 | if (pid == switch_event->prev_pid) | ||
1047 | thread__set_comm(th, switch_event->prev_comm); | ||
1048 | else | ||
1049 | thread__set_comm(th, switch_event->next_comm); | ||
1050 | return th; | ||
1051 | } | ||
1052 | |||
1053 | static struct thread * | ||
1054 | threads__findnew_from_wakeup(struct trace_wakeup_event *wakeup_event) | ||
1055 | { | ||
1056 | struct thread *th; | ||
1057 | |||
1058 | th = threads__findnew_nocomm(wakeup_event->pid, &threads, &last_match); | ||
1059 | if (th->comm) | ||
1060 | return th; | ||
1061 | |||
1062 | thread__set_comm(th, wakeup_event->comm); | ||
1063 | |||
1064 | return th; | ||
1065 | } | ||
1066 | |||
1037 | static void | 1067 | static void |
1038 | latency_switch_event(struct trace_switch_event *switch_event, | 1068 | latency_switch_event(struct trace_switch_event *switch_event, |
1039 | struct event *event __used, | 1069 | struct event *event __used, |
1040 | int cpu, | 1070 | int cpu, |
1041 | u64 timestamp, | 1071 | u64 timestamp, |
1042 | struct thread *thread __used) | 1072 | struct thread *thread __used) |
1043 | { | 1073 | { |
1044 | struct work_atoms *out_events, *in_events; | 1074 | struct work_atoms *out_events, *in_events; |
1045 | struct thread *sched_out, *sched_in; | 1075 | struct thread *sched_out, *sched_in; |
1046 | u64 timestamp0; | 1076 | u64 timestamp0; |
1047 | s64 delta; | 1077 | s64 delta; |
1048 | 1078 | ||
1049 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); | 1079 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); |
1050 | 1080 | ||
1051 | timestamp0 = cpu_last_switched[cpu]; | 1081 | timestamp0 = cpu_last_switched[cpu]; |
1052 | cpu_last_switched[cpu] = timestamp; | 1082 | cpu_last_switched[cpu] = timestamp; |
1053 | if (timestamp0) | 1083 | if (timestamp0) |
1054 | delta = timestamp - timestamp0; | 1084 | delta = timestamp - timestamp0; |
1055 | else | 1085 | else |
1056 | delta = 0; | 1086 | delta = 0; |
1057 | 1087 | ||
1058 | if (delta < 0) | 1088 | if (delta < 0) |
1059 | die("hm, delta: %Ld < 0 ?\n", delta); | 1089 | die("hm, delta: %Ld < 0 ?\n", delta); |
1060 | 1090 | ||
1061 | 1091 | ||
1062 | sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); | 1092 | sched_out = threads__findnew_from_ctx(switch_event->prev_pid, |
1063 | sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); | 1093 | switch_event); |
1094 | sched_in = threads__findnew_from_ctx(switch_event->next_pid, | ||
1095 | switch_event); | ||
1064 | 1096 | ||
1065 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | 1097 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); |
1066 | if (!out_events) { | 1098 | if (!out_events) { |
1067 | thread_atoms_insert(sched_out); | 1099 | thread_atoms_insert(sched_out); |
1068 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | 1100 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); |
1069 | if (!out_events) | 1101 | if (!out_events) |
1070 | die("out-event: Internal tree error"); | 1102 | die("out-event: Internal tree error"); |
1071 | } | 1103 | } |
1072 | add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); | 1104 | add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); |
1073 | 1105 | ||
1074 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | 1106 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); |
1075 | if (!in_events) { | 1107 | if (!in_events) { |
1076 | thread_atoms_insert(sched_in); | 1108 | thread_atoms_insert(sched_in); |
1077 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | 1109 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); |
1078 | if (!in_events) | 1110 | if (!in_events) |
1079 | die("in-event: Internal tree error"); | 1111 | die("in-event: Internal tree error"); |
1080 | /* | 1112 | /* |
1081 | * Take came in we have not heard about yet, | 1113 | * Take came in we have not heard about yet, |
1082 | * add in an initial atom in runnable state: | 1114 | * add in an initial atom in runnable state: |
1083 | */ | 1115 | */ |
1084 | add_sched_out_event(in_events, 'R', timestamp); | 1116 | add_sched_out_event(in_events, 'R', timestamp); |
1085 | } | 1117 | } |
1086 | add_sched_in_event(in_events, timestamp); | 1118 | add_sched_in_event(in_events, timestamp); |
1087 | } | 1119 | } |
1088 | 1120 | ||
1089 | static void | 1121 | static void |
1090 | latency_runtime_event(struct trace_runtime_event *runtime_event, | 1122 | latency_runtime_event(struct trace_runtime_event *runtime_event, |
1091 | struct event *event __used, | 1123 | struct event *event __used, |
1092 | int cpu, | 1124 | int cpu, |
1093 | u64 timestamp, | 1125 | u64 timestamp, |
1094 | struct thread *this_thread __used) | 1126 | struct thread *this_thread __used) |
1095 | { | 1127 | { |
1096 | struct work_atoms *atoms; | 1128 | struct work_atoms *atoms; |
1097 | struct thread *thread; | 1129 | struct thread *thread; |
1098 | 1130 | ||
1099 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); | 1131 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); |
1100 | 1132 | ||
1101 | thread = threads__findnew(runtime_event->pid, &threads, &last_match); | 1133 | thread = threads__findnew(runtime_event->pid, &threads, &last_match); |
1102 | atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); | 1134 | atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); |
1103 | if (!atoms) { | 1135 | if (!atoms) { |
1104 | thread_atoms_insert(thread); | 1136 | thread_atoms_insert(thread); |
1105 | atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); | 1137 | atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); |
1106 | if (!atoms) | 1138 | if (!atoms) |
1107 | die("in-event: Internal tree error"); | 1139 | die("in-event: Internal tree error"); |
1108 | add_sched_out_event(atoms, 'R', timestamp); | 1140 | add_sched_out_event(atoms, 'R', timestamp); |
1109 | } | 1141 | } |
1110 | 1142 | ||
1111 | add_runtime_event(atoms, runtime_event->runtime, timestamp); | 1143 | add_runtime_event(atoms, runtime_event->runtime, timestamp); |
1112 | } | 1144 | } |
1113 | 1145 | ||
1114 | static void | 1146 | static void |
1115 | latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | 1147 | latency_wakeup_event(struct trace_wakeup_event *wakeup_event, |
1116 | struct event *__event __used, | 1148 | struct event *__event __used, |
1117 | int cpu __used, | 1149 | int cpu __used, |
1118 | u64 timestamp, | 1150 | u64 timestamp, |
1119 | struct thread *thread __used) | 1151 | struct thread *thread __used) |
1120 | { | 1152 | { |
1121 | struct work_atoms *atoms; | 1153 | struct work_atoms *atoms; |
1122 | struct work_atom *atom; | 1154 | struct work_atom *atom; |
1123 | struct thread *wakee; | 1155 | struct thread *wakee; |
1124 | 1156 | ||
1125 | /* Note for later, it may be interesting to observe the failing cases */ | 1157 | /* Note for later, it may be interesting to observe the failing cases */ |
1126 | if (!wakeup_event->success) | 1158 | if (!wakeup_event->success) |
1127 | return; | 1159 | return; |
1128 | 1160 | ||
1129 | wakee = threads__findnew(wakeup_event->pid, &threads, &last_match); | 1161 | wakee = threads__findnew_from_wakeup(wakeup_event); |
1130 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); | 1162 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); |
1131 | if (!atoms) { | 1163 | if (!atoms) { |
1132 | thread_atoms_insert(wakee); | 1164 | thread_atoms_insert(wakee); |
1133 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); | 1165 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); |
1134 | if (!atoms) | 1166 | if (!atoms) |
1135 | die("wakeup-event: Internal tree error"); | 1167 | die("wakeup-event: Internal tree error"); |
1136 | add_sched_out_event(atoms, 'S', timestamp); | 1168 | add_sched_out_event(atoms, 'S', timestamp); |
1137 | } | 1169 | } |
1138 | 1170 | ||
1139 | BUG_ON(list_empty(&atoms->work_list)); | 1171 | BUG_ON(list_empty(&atoms->work_list)); |
1140 | 1172 | ||
1141 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); | 1173 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); |
1142 | 1174 | ||
1143 | if (atom->state != THREAD_SLEEPING) | 1175 | if (atom->state != THREAD_SLEEPING) |
1144 | nr_state_machine_bugs++; | 1176 | nr_state_machine_bugs++; |
1145 | 1177 | ||
1146 | nr_timestamps++; | 1178 | nr_timestamps++; |
1147 | if (atom->sched_out_time > timestamp) { | 1179 | if (atom->sched_out_time > timestamp) { |
1148 | nr_unordered_timestamps++; | 1180 | nr_unordered_timestamps++; |
1149 | return; | 1181 | return; |
1150 | } | 1182 | } |
1151 | 1183 | ||
1152 | atom->state = THREAD_WAIT_CPU; | 1184 | atom->state = THREAD_WAIT_CPU; |
1153 | atom->wake_up_time = timestamp; | 1185 | atom->wake_up_time = timestamp; |
1154 | } | 1186 | } |
1155 | 1187 | ||
1156 | static struct trace_sched_handler lat_ops = { | 1188 | static struct trace_sched_handler lat_ops = { |
1157 | .wakeup_event = latency_wakeup_event, | 1189 | .wakeup_event = latency_wakeup_event, |
1158 | .switch_event = latency_switch_event, | 1190 | .switch_event = latency_switch_event, |
1159 | .runtime_event = latency_runtime_event, | 1191 | .runtime_event = latency_runtime_event, |
1160 | .fork_event = latency_fork_event, | 1192 | .fork_event = latency_fork_event, |
1161 | }; | 1193 | }; |
1162 | 1194 | ||
1163 | static void output_lat_thread(struct work_atoms *work_list) | 1195 | static void output_lat_thread(struct work_atoms *work_list) |
1164 | { | 1196 | { |
1165 | int i; | 1197 | int i; |
1166 | int ret; | 1198 | int ret; |
1167 | u64 avg; | 1199 | u64 avg; |
1168 | 1200 | ||
1169 | if (!work_list->nb_atoms) | 1201 | if (!work_list->nb_atoms) |
1170 | return; | 1202 | return; |
1171 | /* | 1203 | /* |
1172 | * Ignore idle threads: | 1204 | * Ignore idle threads: |
1173 | */ | 1205 | */ |
1174 | if (!strcmp(work_list->thread->comm, "swapper")) | 1206 | if (!strcmp(work_list->thread->comm, "swapper")) |
1175 | return; | 1207 | return; |
1176 | 1208 | ||
1177 | all_runtime += work_list->total_runtime; | 1209 | all_runtime += work_list->total_runtime; |
1178 | all_count += work_list->nb_atoms; | 1210 | all_count += work_list->nb_atoms; |
1179 | 1211 | ||
1180 | ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); | 1212 | ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); |
1181 | 1213 | ||
1182 | for (i = 0; i < 24 - ret; i++) | 1214 | for (i = 0; i < 24 - ret; i++) |
1183 | printf(" "); | 1215 | printf(" "); |
1184 | 1216 | ||
1185 | avg = work_list->total_lat / work_list->nb_atoms; | 1217 | avg = work_list->total_lat / work_list->nb_atoms; |
1186 | 1218 | ||
1187 | printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", | 1219 | printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", |
1188 | (double)work_list->total_runtime / 1e6, | 1220 | (double)work_list->total_runtime / 1e6, |
1189 | work_list->nb_atoms, (double)avg / 1e6, | 1221 | work_list->nb_atoms, (double)avg / 1e6, |
1190 | (double)work_list->max_lat / 1e6); | 1222 | (double)work_list->max_lat / 1e6); |
1191 | } | 1223 | } |
1192 | 1224 | ||
1193 | static int pid_cmp(struct work_atoms *l, struct work_atoms *r) | 1225 | static int pid_cmp(struct work_atoms *l, struct work_atoms *r) |
1194 | { | 1226 | { |
1195 | if (l->thread->pid < r->thread->pid) | 1227 | if (l->thread->pid < r->thread->pid) |
1196 | return -1; | 1228 | return -1; |
1197 | if (l->thread->pid > r->thread->pid) | 1229 | if (l->thread->pid > r->thread->pid) |
1198 | return 1; | 1230 | return 1; |
1199 | 1231 | ||
1200 | return 0; | 1232 | return 0; |
1201 | } | 1233 | } |
1202 | 1234 | ||
1203 | static struct sort_dimension pid_sort_dimension = { | 1235 | static struct sort_dimension pid_sort_dimension = { |
1204 | .name = "pid", | 1236 | .name = "pid", |
1205 | .cmp = pid_cmp, | 1237 | .cmp = pid_cmp, |
1206 | }; | 1238 | }; |
1207 | 1239 | ||
1208 | static int avg_cmp(struct work_atoms *l, struct work_atoms *r) | 1240 | static int avg_cmp(struct work_atoms *l, struct work_atoms *r) |
1209 | { | 1241 | { |
1210 | u64 avgl, avgr; | 1242 | u64 avgl, avgr; |
1211 | 1243 | ||
1212 | if (!l->nb_atoms) | 1244 | if (!l->nb_atoms) |
1213 | return -1; | 1245 | return -1; |
1214 | 1246 | ||
1215 | if (!r->nb_atoms) | 1247 | if (!r->nb_atoms) |
1216 | return 1; | 1248 | return 1; |
1217 | 1249 | ||
1218 | avgl = l->total_lat / l->nb_atoms; | 1250 | avgl = l->total_lat / l->nb_atoms; |
1219 | avgr = r->total_lat / r->nb_atoms; | 1251 | avgr = r->total_lat / r->nb_atoms; |
1220 | 1252 | ||
1221 | if (avgl < avgr) | 1253 | if (avgl < avgr) |
1222 | return -1; | 1254 | return -1; |
1223 | if (avgl > avgr) | 1255 | if (avgl > avgr) |
1224 | return 1; | 1256 | return 1; |
1225 | 1257 | ||
1226 | return 0; | 1258 | return 0; |
1227 | } | 1259 | } |
1228 | 1260 | ||
1229 | static struct sort_dimension avg_sort_dimension = { | 1261 | static struct sort_dimension avg_sort_dimension = { |
1230 | .name = "avg", | 1262 | .name = "avg", |
1231 | .cmp = avg_cmp, | 1263 | .cmp = avg_cmp, |
1232 | }; | 1264 | }; |
1233 | 1265 | ||
1234 | static int max_cmp(struct work_atoms *l, struct work_atoms *r) | 1266 | static int max_cmp(struct work_atoms *l, struct work_atoms *r) |
1235 | { | 1267 | { |
1236 | if (l->max_lat < r->max_lat) | 1268 | if (l->max_lat < r->max_lat) |
1237 | return -1; | 1269 | return -1; |
1238 | if (l->max_lat > r->max_lat) | 1270 | if (l->max_lat > r->max_lat) |
1239 | return 1; | 1271 | return 1; |
1240 | 1272 | ||
1241 | return 0; | 1273 | return 0; |
1242 | } | 1274 | } |
1243 | 1275 | ||
1244 | static struct sort_dimension max_sort_dimension = { | 1276 | static struct sort_dimension max_sort_dimension = { |
1245 | .name = "max", | 1277 | .name = "max", |
1246 | .cmp = max_cmp, | 1278 | .cmp = max_cmp, |
1247 | }; | 1279 | }; |
1248 | 1280 | ||
1249 | static int switch_cmp(struct work_atoms *l, struct work_atoms *r) | 1281 | static int switch_cmp(struct work_atoms *l, struct work_atoms *r) |
1250 | { | 1282 | { |
1251 | if (l->nb_atoms < r->nb_atoms) | 1283 | if (l->nb_atoms < r->nb_atoms) |
1252 | return -1; | 1284 | return -1; |
1253 | if (l->nb_atoms > r->nb_atoms) | 1285 | if (l->nb_atoms > r->nb_atoms) |
1254 | return 1; | 1286 | return 1; |
1255 | 1287 | ||
1256 | return 0; | 1288 | return 0; |
1257 | } | 1289 | } |
1258 | 1290 | ||
1259 | static struct sort_dimension switch_sort_dimension = { | 1291 | static struct sort_dimension switch_sort_dimension = { |
1260 | .name = "switch", | 1292 | .name = "switch", |
1261 | .cmp = switch_cmp, | 1293 | .cmp = switch_cmp, |
1262 | }; | 1294 | }; |
1263 | 1295 | ||
1264 | static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) | 1296 | static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) |
1265 | { | 1297 | { |
1266 | if (l->total_runtime < r->total_runtime) | 1298 | if (l->total_runtime < r->total_runtime) |
1267 | return -1; | 1299 | return -1; |
1268 | if (l->total_runtime > r->total_runtime) | 1300 | if (l->total_runtime > r->total_runtime) |
1269 | return 1; | 1301 | return 1; |
1270 | 1302 | ||
1271 | return 0; | 1303 | return 0; |
1272 | } | 1304 | } |
1273 | 1305 | ||
1274 | static struct sort_dimension runtime_sort_dimension = { | 1306 | static struct sort_dimension runtime_sort_dimension = { |
1275 | .name = "runtime", | 1307 | .name = "runtime", |
1276 | .cmp = runtime_cmp, | 1308 | .cmp = runtime_cmp, |
1277 | }; | 1309 | }; |
1278 | 1310 | ||
1279 | static struct sort_dimension *available_sorts[] = { | 1311 | static struct sort_dimension *available_sorts[] = { |
1280 | &pid_sort_dimension, | 1312 | &pid_sort_dimension, |
1281 | &avg_sort_dimension, | 1313 | &avg_sort_dimension, |
1282 | &max_sort_dimension, | 1314 | &max_sort_dimension, |
1283 | &switch_sort_dimension, | 1315 | &switch_sort_dimension, |
1284 | &runtime_sort_dimension, | 1316 | &runtime_sort_dimension, |
1285 | }; | 1317 | }; |
1286 | 1318 | ||
1287 | #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) | 1319 | #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) |
1288 | 1320 | ||
1289 | static LIST_HEAD(sort_list); | 1321 | static LIST_HEAD(sort_list); |
1290 | 1322 | ||
1291 | static int sort_dimension__add(char *tok, struct list_head *list) | 1323 | static int sort_dimension__add(char *tok, struct list_head *list) |
1292 | { | 1324 | { |
1293 | int i; | 1325 | int i; |
1294 | 1326 | ||
1295 | for (i = 0; i < NB_AVAILABLE_SORTS; i++) { | 1327 | for (i = 0; i < NB_AVAILABLE_SORTS; i++) { |
1296 | if (!strcmp(available_sorts[i]->name, tok)) { | 1328 | if (!strcmp(available_sorts[i]->name, tok)) { |
1297 | list_add_tail(&available_sorts[i]->list, list); | 1329 | list_add_tail(&available_sorts[i]->list, list); |
1298 | 1330 | ||
1299 | return 0; | 1331 | return 0; |
1300 | } | 1332 | } |
1301 | } | 1333 | } |
1302 | 1334 | ||
1303 | return -1; | 1335 | return -1; |
1304 | } | 1336 | } |
1305 | 1337 | ||
1306 | static void setup_sorting(void); | 1338 | static void setup_sorting(void); |
1307 | 1339 | ||
1308 | static void sort_lat(void) | 1340 | static void sort_lat(void) |
1309 | { | 1341 | { |
1310 | struct rb_node *node; | 1342 | struct rb_node *node; |
1311 | 1343 | ||
1312 | for (;;) { | 1344 | for (;;) { |
1313 | struct work_atoms *data; | 1345 | struct work_atoms *data; |
1314 | node = rb_first(&atom_root); | 1346 | node = rb_first(&atom_root); |
1315 | if (!node) | 1347 | if (!node) |
1316 | break; | 1348 | break; |
1317 | 1349 | ||
1318 | rb_erase(node, &atom_root); | 1350 | rb_erase(node, &atom_root); |
1319 | data = rb_entry(node, struct work_atoms, node); | 1351 | data = rb_entry(node, struct work_atoms, node); |
1320 | __thread_latency_insert(&sorted_atom_root, data, &sort_list); | 1352 | __thread_latency_insert(&sorted_atom_root, data, &sort_list); |
1321 | } | 1353 | } |
1322 | } | 1354 | } |
1323 | 1355 | ||
1324 | static struct trace_sched_handler *trace_handler; | 1356 | static struct trace_sched_handler *trace_handler; |
1325 | 1357 | ||
1326 | static void | 1358 | static void |
1327 | process_sched_wakeup_event(struct raw_event_sample *raw, | 1359 | process_sched_wakeup_event(struct raw_event_sample *raw, |
1328 | struct event *event, | 1360 | struct event *event, |
1329 | int cpu __used, | 1361 | int cpu __used, |
1330 | u64 timestamp __used, | 1362 | u64 timestamp __used, |
1331 | struct thread *thread __used) | 1363 | struct thread *thread __used) |
1332 | { | 1364 | { |
1333 | struct trace_wakeup_event wakeup_event; | 1365 | struct trace_wakeup_event wakeup_event; |
1334 | 1366 | ||
1335 | FILL_COMMON_FIELDS(wakeup_event, event, raw->data); | 1367 | FILL_COMMON_FIELDS(wakeup_event, event, raw->data); |
1336 | 1368 | ||
1337 | FILL_ARRAY(wakeup_event, comm, event, raw->data); | 1369 | FILL_ARRAY(wakeup_event, comm, event, raw->data); |
1338 | FILL_FIELD(wakeup_event, pid, event, raw->data); | 1370 | FILL_FIELD(wakeup_event, pid, event, raw->data); |
1339 | FILL_FIELD(wakeup_event, prio, event, raw->data); | 1371 | FILL_FIELD(wakeup_event, prio, event, raw->data); |
1340 | FILL_FIELD(wakeup_event, success, event, raw->data); | 1372 | FILL_FIELD(wakeup_event, success, event, raw->data); |
1341 | FILL_FIELD(wakeup_event, cpu, event, raw->data); | 1373 | FILL_FIELD(wakeup_event, cpu, event, raw->data); |
1342 | 1374 | ||
1343 | if (trace_handler->wakeup_event) | 1375 | if (trace_handler->wakeup_event) |
1344 | trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread); | 1376 | trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread); |
1345 | } | 1377 | } |
1346 | 1378 | ||
1347 | /* | 1379 | /* |
1348 | * Track the current task - that way we can know whether there's any | 1380 | * Track the current task - that way we can know whether there's any |
1349 | * weird events, such as a task being switched away that is not current. | 1381 | * weird events, such as a task being switched away that is not current. |
1350 | */ | 1382 | */ |
1351 | static int max_cpu; | 1383 | static int max_cpu; |
1352 | 1384 | ||
1353 | static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; | 1385 | static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; |
1354 | 1386 | ||
1355 | static struct thread *curr_thread[MAX_CPUS]; | 1387 | static struct thread *curr_thread[MAX_CPUS]; |
1356 | 1388 | ||
1357 | static char next_shortname1 = 'A'; | 1389 | static char next_shortname1 = 'A'; |
1358 | static char next_shortname2 = '0'; | 1390 | static char next_shortname2 = '0'; |
1359 | 1391 | ||
1360 | static void | 1392 | static void |
1361 | map_switch_event(struct trace_switch_event *switch_event, | 1393 | map_switch_event(struct trace_switch_event *switch_event, |
1362 | struct event *event __used, | 1394 | struct event *event __used, |
1363 | int this_cpu, | 1395 | int this_cpu, |
1364 | u64 timestamp, | 1396 | u64 timestamp, |
1365 | struct thread *thread __used) | 1397 | struct thread *thread __used) |
1366 | { | 1398 | { |
1367 | struct thread *sched_out, *sched_in; | 1399 | struct thread *sched_out, *sched_in; |
1368 | int new_shortname; | 1400 | int new_shortname; |
1369 | u64 timestamp0; | 1401 | u64 timestamp0; |
1370 | s64 delta; | 1402 | s64 delta; |
1371 | int cpu; | 1403 | int cpu; |
1372 | 1404 | ||
1373 | BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); | 1405 | BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); |
1374 | 1406 | ||
1375 | if (this_cpu > max_cpu) | 1407 | if (this_cpu > max_cpu) |
1376 | max_cpu = this_cpu; | 1408 | max_cpu = this_cpu; |
1377 | 1409 | ||
1378 | timestamp0 = cpu_last_switched[this_cpu]; | 1410 | timestamp0 = cpu_last_switched[this_cpu]; |
1379 | cpu_last_switched[this_cpu] = timestamp; | 1411 | cpu_last_switched[this_cpu] = timestamp; |
1380 | if (timestamp0) | 1412 | if (timestamp0) |
1381 | delta = timestamp - timestamp0; | 1413 | delta = timestamp - timestamp0; |
1382 | else | 1414 | else |
1383 | delta = 0; | 1415 | delta = 0; |
1384 | 1416 | ||
1385 | if (delta < 0) | 1417 | if (delta < 0) |
1386 | die("hm, delta: %Ld < 0 ?\n", delta); | 1418 | die("hm, delta: %Ld < 0 ?\n", delta); |
1387 | 1419 | ||
1388 | 1420 | ||
1389 | sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); | 1421 | sched_out = threads__findnew_from_ctx(switch_event->prev_pid, |
1390 | sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); | 1422 | switch_event); |
1423 | sched_in = threads__findnew_from_ctx(switch_event->next_pid, | ||
1424 | switch_event); | ||
1391 | 1425 | ||
1392 | curr_thread[this_cpu] = sched_in; | 1426 | curr_thread[this_cpu] = sched_in; |
1393 | 1427 | ||
1394 | printf(" "); | 1428 | printf(" "); |
1395 | 1429 | ||
1396 | new_shortname = 0; | 1430 | new_shortname = 0; |
1397 | if (!sched_in->shortname[0]) { | 1431 | if (!sched_in->shortname[0]) { |
1398 | sched_in->shortname[0] = next_shortname1; | 1432 | sched_in->shortname[0] = next_shortname1; |
1399 | sched_in->shortname[1] = next_shortname2; | 1433 | sched_in->shortname[1] = next_shortname2; |
1400 | 1434 | ||
1401 | if (next_shortname1 < 'Z') { | 1435 | if (next_shortname1 < 'Z') { |
1402 | next_shortname1++; | 1436 | next_shortname1++; |
1403 | } else { | 1437 | } else { |
1404 | next_shortname1='A'; | 1438 | next_shortname1='A'; |
1405 | if (next_shortname2 < '9') { | 1439 | if (next_shortname2 < '9') { |
1406 | next_shortname2++; | 1440 | next_shortname2++; |
1407 | } else { | 1441 | } else { |
1408 | next_shortname2='0'; | 1442 | next_shortname2='0'; |
1409 | } | 1443 | } |
1410 | } | 1444 | } |
1411 | new_shortname = 1; | 1445 | new_shortname = 1; |
1412 | } | 1446 | } |
1413 | 1447 | ||
1414 | for (cpu = 0; cpu <= max_cpu; cpu++) { | 1448 | for (cpu = 0; cpu <= max_cpu; cpu++) { |
1415 | if (cpu != this_cpu) | 1449 | if (cpu != this_cpu) |
1416 | printf(" "); | 1450 | printf(" "); |
1417 | else | 1451 | else |
1418 | printf("*"); | 1452 | printf("*"); |
1419 | 1453 | ||
1420 | if (curr_thread[cpu]) { | 1454 | if (curr_thread[cpu]) { |
1421 | if (curr_thread[cpu]->pid) | 1455 | if (curr_thread[cpu]->pid) |
1422 | printf("%2s ", curr_thread[cpu]->shortname); | 1456 | printf("%2s ", curr_thread[cpu]->shortname); |
1423 | else | 1457 | else |
1424 | printf(". "); | 1458 | printf(". "); |
1425 | } else | 1459 | } else |
1426 | printf(" "); | 1460 | printf(" "); |
1427 | } | 1461 | } |
1428 | 1462 | ||
1429 | printf(" %12.6f secs ", (double)timestamp/1e9); | 1463 | printf(" %12.6f secs ", (double)timestamp/1e9); |
1430 | if (new_shortname) { | 1464 | if (new_shortname) { |
1431 | printf("%s => %s:%d\n", | 1465 | printf("%s => %s:%d\n", |
1432 | sched_in->shortname, sched_in->comm, sched_in->pid); | 1466 | sched_in->shortname, sched_in->comm, sched_in->pid); |
1433 | } else { | 1467 | } else { |
1434 | printf("\n"); | 1468 | printf("\n"); |
1435 | } | 1469 | } |
1436 | } | 1470 | } |
1437 | 1471 | ||
1438 | 1472 | ||
1439 | static void | 1473 | static void |
1440 | process_sched_switch_event(struct raw_event_sample *raw, | 1474 | process_sched_switch_event(struct raw_event_sample *raw, |
1441 | struct event *event, | 1475 | struct event *event, |
1442 | int this_cpu, | 1476 | int this_cpu, |
1443 | u64 timestamp __used, | 1477 | u64 timestamp __used, |
1444 | struct thread *thread __used) | 1478 | struct thread *thread __used) |
1445 | { | 1479 | { |
1446 | struct trace_switch_event switch_event; | 1480 | struct trace_switch_event switch_event; |
1447 | 1481 | ||
1448 | FILL_COMMON_FIELDS(switch_event, event, raw->data); | 1482 | FILL_COMMON_FIELDS(switch_event, event, raw->data); |
1449 | 1483 | ||
1450 | FILL_ARRAY(switch_event, prev_comm, event, raw->data); | 1484 | FILL_ARRAY(switch_event, prev_comm, event, raw->data); |
1451 | FILL_FIELD(switch_event, prev_pid, event, raw->data); | 1485 | FILL_FIELD(switch_event, prev_pid, event, raw->data); |
1452 | FILL_FIELD(switch_event, prev_prio, event, raw->data); | 1486 | FILL_FIELD(switch_event, prev_prio, event, raw->data); |
1453 | FILL_FIELD(switch_event, prev_state, event, raw->data); | 1487 | FILL_FIELD(switch_event, prev_state, event, raw->data); |
1454 | FILL_ARRAY(switch_event, next_comm, event, raw->data); | 1488 | FILL_ARRAY(switch_event, next_comm, event, raw->data); |
1455 | FILL_FIELD(switch_event, next_pid, event, raw->data); | 1489 | FILL_FIELD(switch_event, next_pid, event, raw->data); |
1456 | FILL_FIELD(switch_event, next_prio, event, raw->data); | 1490 | FILL_FIELD(switch_event, next_prio, event, raw->data); |
1457 | 1491 | ||
1458 | if (curr_pid[this_cpu] != (u32)-1) { | 1492 | if (curr_pid[this_cpu] != (u32)-1) { |
1459 | /* | 1493 | /* |
1460 | * Are we trying to switch away a PID that is | 1494 | * Are we trying to switch away a PID that is |
1461 | * not current? | 1495 | * not current? |
1462 | */ | 1496 | */ |
1463 | if (curr_pid[this_cpu] != switch_event.prev_pid) | 1497 | if (curr_pid[this_cpu] != switch_event.prev_pid) |
1464 | nr_context_switch_bugs++; | 1498 | nr_context_switch_bugs++; |
1465 | } | 1499 | } |
1466 | if (trace_handler->switch_event) | 1500 | if (trace_handler->switch_event) |
1467 | trace_handler->switch_event(&switch_event, event, this_cpu, timestamp, thread); | 1501 | trace_handler->switch_event(&switch_event, event, this_cpu, timestamp, thread); |
1468 | 1502 | ||
1469 | curr_pid[this_cpu] = switch_event.next_pid; | 1503 | curr_pid[this_cpu] = switch_event.next_pid; |
1470 | } | 1504 | } |
1471 | 1505 | ||
1472 | static void | 1506 | static void |
1473 | process_sched_runtime_event(struct raw_event_sample *raw, | 1507 | process_sched_runtime_event(struct raw_event_sample *raw, |
1474 | struct event *event, | 1508 | struct event *event, |
1475 | int cpu __used, | 1509 | int cpu __used, |
1476 | u64 timestamp __used, | 1510 | u64 timestamp __used, |
1477 | struct thread *thread __used) | 1511 | struct thread *thread __used) |
1478 | { | 1512 | { |
1479 | struct trace_runtime_event runtime_event; | 1513 | struct trace_runtime_event runtime_event; |
1480 | 1514 | ||
1481 | FILL_ARRAY(runtime_event, comm, event, raw->data); | 1515 | FILL_ARRAY(runtime_event, comm, event, raw->data); |
1482 | FILL_FIELD(runtime_event, pid, event, raw->data); | 1516 | FILL_FIELD(runtime_event, pid, event, raw->data); |
1483 | FILL_FIELD(runtime_event, runtime, event, raw->data); | 1517 | FILL_FIELD(runtime_event, runtime, event, raw->data); |
1484 | FILL_FIELD(runtime_event, vruntime, event, raw->data); | 1518 | FILL_FIELD(runtime_event, vruntime, event, raw->data); |
1485 | 1519 | ||
1486 | if (trace_handler->runtime_event) | 1520 | if (trace_handler->runtime_event) |
1487 | trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread); | 1521 | trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread); |
1488 | } | 1522 | } |
1489 | 1523 | ||
1490 | static void | 1524 | static void |
1491 | process_sched_fork_event(struct raw_event_sample *raw, | 1525 | process_sched_fork_event(struct raw_event_sample *raw, |
1492 | struct event *event, | 1526 | struct event *event, |
1493 | int cpu __used, | 1527 | int cpu __used, |
1494 | u64 timestamp __used, | 1528 | u64 timestamp __used, |
1495 | struct thread *thread __used) | 1529 | struct thread *thread __used) |
1496 | { | 1530 | { |
1497 | struct trace_fork_event fork_event; | 1531 | struct trace_fork_event fork_event; |
1498 | 1532 | ||
1499 | FILL_COMMON_FIELDS(fork_event, event, raw->data); | 1533 | FILL_COMMON_FIELDS(fork_event, event, raw->data); |
1500 | 1534 | ||
1501 | FILL_ARRAY(fork_event, parent_comm, event, raw->data); | 1535 | FILL_ARRAY(fork_event, parent_comm, event, raw->data); |
1502 | FILL_FIELD(fork_event, parent_pid, event, raw->data); | 1536 | FILL_FIELD(fork_event, parent_pid, event, raw->data); |
1503 | FILL_ARRAY(fork_event, child_comm, event, raw->data); | 1537 | FILL_ARRAY(fork_event, child_comm, event, raw->data); |
1504 | FILL_FIELD(fork_event, child_pid, event, raw->data); | 1538 | FILL_FIELD(fork_event, child_pid, event, raw->data); |
1505 | 1539 | ||
1506 | if (trace_handler->fork_event) | 1540 | if (trace_handler->fork_event) |
1507 | trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread); | 1541 | trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread); |
1508 | } | 1542 | } |
1509 | 1543 | ||
1510 | static void | 1544 | static void |
1511 | process_sched_exit_event(struct event *event, | 1545 | process_sched_exit_event(struct event *event, |
1512 | int cpu __used, | 1546 | int cpu __used, |
1513 | u64 timestamp __used, | 1547 | u64 timestamp __used, |
1514 | struct thread *thread __used) | 1548 | struct thread *thread __used) |
1515 | { | 1549 | { |
1516 | if (verbose) | 1550 | if (verbose) |
1517 | printf("sched_exit event %p\n", event); | 1551 | printf("sched_exit event %p\n", event); |
1518 | } | 1552 | } |
1519 | 1553 | ||
1520 | static void | 1554 | static void |
1521 | process_raw_event(event_t *raw_event __used, void *more_data, | 1555 | process_raw_event(event_t *raw_event __used, void *more_data, |
1522 | int cpu, u64 timestamp, struct thread *thread) | 1556 | int cpu, u64 timestamp, struct thread *thread) |
1523 | { | 1557 | { |
1524 | struct raw_event_sample *raw = more_data; | 1558 | struct raw_event_sample *raw = more_data; |
1525 | struct event *event; | 1559 | struct event *event; |
1526 | int type; | 1560 | int type; |
1527 | 1561 | ||
1528 | type = trace_parse_common_type(raw->data); | 1562 | type = trace_parse_common_type(raw->data); |
1529 | event = trace_find_event(type); | 1563 | event = trace_find_event(type); |
1530 | 1564 | ||
1531 | if (!strcmp(event->name, "sched_switch")) | 1565 | if (!strcmp(event->name, "sched_switch")) |
1532 | process_sched_switch_event(raw, event, cpu, timestamp, thread); | 1566 | process_sched_switch_event(raw, event, cpu, timestamp, thread); |
1533 | if (!strcmp(event->name, "sched_stat_runtime")) | 1567 | if (!strcmp(event->name, "sched_stat_runtime")) |
1534 | process_sched_runtime_event(raw, event, cpu, timestamp, thread); | 1568 | process_sched_runtime_event(raw, event, cpu, timestamp, thread); |
1535 | if (!strcmp(event->name, "sched_wakeup")) | 1569 | if (!strcmp(event->name, "sched_wakeup")) |
1536 | process_sched_wakeup_event(raw, event, cpu, timestamp, thread); | 1570 | process_sched_wakeup_event(raw, event, cpu, timestamp, thread); |
1537 | if (!strcmp(event->name, "sched_wakeup_new")) | 1571 | if (!strcmp(event->name, "sched_wakeup_new")) |
1538 | process_sched_wakeup_event(raw, event, cpu, timestamp, thread); | 1572 | process_sched_wakeup_event(raw, event, cpu, timestamp, thread); |
1539 | if (!strcmp(event->name, "sched_process_fork")) | 1573 | if (!strcmp(event->name, "sched_process_fork")) |
1540 | process_sched_fork_event(raw, event, cpu, timestamp, thread); | 1574 | process_sched_fork_event(raw, event, cpu, timestamp, thread); |
1541 | if (!strcmp(event->name, "sched_process_exit")) | 1575 | if (!strcmp(event->name, "sched_process_exit")) |
1542 | process_sched_exit_event(event, cpu, timestamp, thread); | 1576 | process_sched_exit_event(event, cpu, timestamp, thread); |
1543 | } | 1577 | } |
1544 | 1578 | ||
1545 | static int | 1579 | static int |
1546 | process_sample_event(event_t *event, unsigned long offset, unsigned long head) | 1580 | process_sample_event(event_t *event, unsigned long offset, unsigned long head) |
1547 | { | 1581 | { |
1548 | struct thread *thread; | 1582 | struct thread *thread; |
1549 | u64 ip = event->ip.ip; | 1583 | u64 ip = event->ip.ip; |
1550 | u64 timestamp = -1; | 1584 | u64 timestamp = -1; |
1551 | u32 cpu = -1; | 1585 | u32 cpu = -1; |
1552 | u64 period = 1; | 1586 | u64 period = 1; |
1553 | void *more_data = event->ip.__more_data; | 1587 | void *more_data = event->ip.__more_data; |
1554 | 1588 | ||
1555 | if (!(sample_type & PERF_SAMPLE_RAW)) | 1589 | if (!(sample_type & PERF_SAMPLE_RAW)) |
1556 | return 0; | 1590 | return 0; |
1557 | 1591 | ||
1558 | thread = threads__findnew(event->ip.pid, &threads, &last_match); | 1592 | thread = threads__findnew(event->ip.pid, &threads, &last_match); |
1559 | 1593 | ||
1560 | if (sample_type & PERF_SAMPLE_TIME) { | 1594 | if (sample_type & PERF_SAMPLE_TIME) { |
1561 | timestamp = *(u64 *)more_data; | 1595 | timestamp = *(u64 *)more_data; |
1562 | more_data += sizeof(u64); | 1596 | more_data += sizeof(u64); |
1563 | } | 1597 | } |
1564 | 1598 | ||
1565 | if (sample_type & PERF_SAMPLE_CPU) { | 1599 | if (sample_type & PERF_SAMPLE_CPU) { |
1566 | cpu = *(u32 *)more_data; | 1600 | cpu = *(u32 *)more_data; |
1567 | more_data += sizeof(u32); | 1601 | more_data += sizeof(u32); |
1568 | more_data += sizeof(u32); /* reserved */ | 1602 | more_data += sizeof(u32); /* reserved */ |
1569 | } | 1603 | } |
1570 | 1604 | ||
1571 | if (sample_type & PERF_SAMPLE_PERIOD) { | 1605 | if (sample_type & PERF_SAMPLE_PERIOD) { |
1572 | period = *(u64 *)more_data; | 1606 | period = *(u64 *)more_data; |
1573 | more_data += sizeof(u64); | 1607 | more_data += sizeof(u64); |
1574 | } | 1608 | } |
1575 | 1609 | ||
1576 | dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", | 1610 | dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", |
1577 | (void *)(offset + head), | 1611 | (void *)(offset + head), |
1578 | (void *)(long)(event->header.size), | 1612 | (void *)(long)(event->header.size), |
1579 | event->header.misc, | 1613 | event->header.misc, |
1580 | event->ip.pid, event->ip.tid, | 1614 | event->ip.pid, event->ip.tid, |
1581 | (void *)(long)ip, | 1615 | (void *)(long)ip, |
1582 | (long long)period); | 1616 | (long long)period); |
1583 | 1617 | ||
1584 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); | 1618 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); |
1585 | 1619 | ||
1586 | if (thread == NULL) { | 1620 | if (thread == NULL) { |
1587 | eprintf("problem processing %d event, skipping it.\n", | 1621 | eprintf("problem processing %d event, skipping it.\n", |
1588 | event->header.type); | 1622 | event->header.type); |
1589 | return -1; | 1623 | return -1; |
1590 | } | 1624 | } |
1591 | 1625 | ||
1592 | process_raw_event(event, more_data, cpu, timestamp, thread); | 1626 | process_raw_event(event, more_data, cpu, timestamp, thread); |
1593 | 1627 | ||
1594 | return 0; | 1628 | return 0; |
1595 | } | 1629 | } |
1596 | 1630 | ||
1597 | static int | 1631 | static int |
1598 | process_lost_event(event_t *event __used, | 1632 | process_lost_event(event_t *event __used, |
1599 | unsigned long offset __used, | 1633 | unsigned long offset __used, |
1600 | unsigned long head __used) | 1634 | unsigned long head __used) |
1601 | { | 1635 | { |
1602 | nr_lost_chunks++; | 1636 | nr_lost_chunks++; |
1603 | nr_lost_events += event->lost.lost; | 1637 | nr_lost_events += event->lost.lost; |
1604 | 1638 | ||
1605 | return 0; | 1639 | return 0; |
1606 | } | 1640 | } |
1607 | 1641 | ||
1608 | static int sample_type_check(u64 type) | 1642 | static int sample_type_check(u64 type) |
1609 | { | 1643 | { |
1610 | sample_type = type; | 1644 | sample_type = type; |
1611 | 1645 | ||
1612 | if (!(sample_type & PERF_SAMPLE_RAW)) { | 1646 | if (!(sample_type & PERF_SAMPLE_RAW)) { |
1613 | fprintf(stderr, | 1647 | fprintf(stderr, |
1614 | "No trace sample to read. Did you call perf record " | 1648 | "No trace sample to read. Did you call perf record " |
1615 | "without -R?"); | 1649 | "without -R?"); |
1616 | return -1; | 1650 | return -1; |
1617 | } | 1651 | } |
1618 | 1652 | ||
1619 | return 0; | 1653 | return 0; |
1620 | } | 1654 | } |
1621 | 1655 | ||
1622 | static struct perf_file_handler file_handler = { | 1656 | static struct perf_file_handler file_handler = { |
1623 | .process_sample_event = process_sample_event, | 1657 | .process_sample_event = process_sample_event, |
1624 | .process_comm_event = process_comm_event, | 1658 | .process_comm_event = process_comm_event, |
1625 | .process_lost_event = process_lost_event, | 1659 | .process_lost_event = process_lost_event, |
1626 | .sample_type_check = sample_type_check, | 1660 | .sample_type_check = sample_type_check, |
1627 | }; | 1661 | }; |
1628 | 1662 | ||
1629 | static int read_events(void) | 1663 | static int read_events(void) |
1630 | { | 1664 | { |
1631 | register_idle_thread(&threads, &last_match); | 1665 | register_idle_thread(&threads, &last_match); |
1632 | register_perf_file_handler(&file_handler); | 1666 | register_perf_file_handler(&file_handler); |
1633 | 1667 | ||
1634 | return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); | 1668 | return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); |
1635 | } | 1669 | } |
1636 | 1670 | ||
1637 | static void print_bad_events(void) | 1671 | static void print_bad_events(void) |
1638 | { | 1672 | { |
1639 | if (nr_unordered_timestamps && nr_timestamps) { | 1673 | if (nr_unordered_timestamps && nr_timestamps) { |
1640 | printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", | 1674 | printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", |
1641 | (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, | 1675 | (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, |
1642 | nr_unordered_timestamps, nr_timestamps); | 1676 | nr_unordered_timestamps, nr_timestamps); |
1643 | } | 1677 | } |
1644 | if (nr_lost_events && nr_events) { | 1678 | if (nr_lost_events && nr_events) { |
1645 | printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", | 1679 | printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", |
1646 | (double)nr_lost_events/(double)nr_events*100.0, | 1680 | (double)nr_lost_events/(double)nr_events*100.0, |
1647 | nr_lost_events, nr_events, nr_lost_chunks); | 1681 | nr_lost_events, nr_events, nr_lost_chunks); |
1648 | } | 1682 | } |
1649 | if (nr_state_machine_bugs && nr_timestamps) { | 1683 | if (nr_state_machine_bugs && nr_timestamps) { |
1650 | printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", | 1684 | printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", |
1651 | (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, | 1685 | (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, |
1652 | nr_state_machine_bugs, nr_timestamps); | 1686 | nr_state_machine_bugs, nr_timestamps); |
1653 | if (nr_lost_events) | 1687 | if (nr_lost_events) |
1654 | printf(" (due to lost events?)"); | 1688 | printf(" (due to lost events?)"); |
1655 | printf("\n"); | 1689 | printf("\n"); |
1656 | } | 1690 | } |
1657 | if (nr_context_switch_bugs && nr_timestamps) { | 1691 | if (nr_context_switch_bugs && nr_timestamps) { |
1658 | printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", | 1692 | printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", |
1659 | (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, | 1693 | (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, |
1660 | nr_context_switch_bugs, nr_timestamps); | 1694 | nr_context_switch_bugs, nr_timestamps); |
1661 | if (nr_lost_events) | 1695 | if (nr_lost_events) |
1662 | printf(" (due to lost events?)"); | 1696 | printf(" (due to lost events?)"); |
1663 | printf("\n"); | 1697 | printf("\n"); |
1664 | } | 1698 | } |
1665 | } | 1699 | } |
1666 | 1700 | ||
1667 | static void __cmd_lat(void) | 1701 | static void __cmd_lat(void) |
1668 | { | 1702 | { |
1669 | struct rb_node *next; | 1703 | struct rb_node *next; |
1670 | 1704 | ||
1671 | setup_pager(); | 1705 | setup_pager(); |
1672 | read_events(); | 1706 | read_events(); |
1673 | sort_lat(); | 1707 | sort_lat(); |
1674 | 1708 | ||
1675 | printf("\n -----------------------------------------------------------------------------------------\n"); | 1709 | printf("\n -----------------------------------------------------------------------------------------\n"); |
1676 | printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n"); | 1710 | printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n"); |
1677 | printf(" -----------------------------------------------------------------------------------------\n"); | 1711 | printf(" -----------------------------------------------------------------------------------------\n"); |
1678 | 1712 | ||
1679 | next = rb_first(&sorted_atom_root); | 1713 | next = rb_first(&sorted_atom_root); |
1680 | 1714 | ||
1681 | while (next) { | 1715 | while (next) { |
1682 | struct work_atoms *work_list; | 1716 | struct work_atoms *work_list; |
1683 | 1717 | ||
1684 | work_list = rb_entry(next, struct work_atoms, node); | 1718 | work_list = rb_entry(next, struct work_atoms, node); |
1685 | output_lat_thread(work_list); | 1719 | output_lat_thread(work_list); |
1686 | next = rb_next(next); | 1720 | next = rb_next(next); |
1687 | } | 1721 | } |
1688 | 1722 | ||
1689 | printf(" -----------------------------------------------------------------------------------------\n"); | 1723 | printf(" -----------------------------------------------------------------------------------------\n"); |
1690 | printf(" TOTAL: |%11.3f ms |%9Ld |\n", | 1724 | printf(" TOTAL: |%11.3f ms |%9Ld |\n", |
1691 | (double)all_runtime/1e6, all_count); | 1725 | (double)all_runtime/1e6, all_count); |
1692 | 1726 | ||
1693 | printf(" ---------------------------------------------------\n"); | 1727 | printf(" ---------------------------------------------------\n"); |
1694 | 1728 | ||
1695 | print_bad_events(); | 1729 | print_bad_events(); |
1696 | printf("\n"); | 1730 | printf("\n"); |
1697 | 1731 | ||
1698 | } | 1732 | } |
1699 | 1733 | ||
1700 | static struct trace_sched_handler map_ops = { | 1734 | static struct trace_sched_handler map_ops = { |
1701 | .wakeup_event = NULL, | 1735 | .wakeup_event = NULL, |
1702 | .switch_event = map_switch_event, | 1736 | .switch_event = map_switch_event, |
1703 | .runtime_event = NULL, | 1737 | .runtime_event = NULL, |
1704 | .fork_event = NULL, | 1738 | .fork_event = NULL, |
1705 | }; | 1739 | }; |
1706 | 1740 | ||
1707 | static void __cmd_map(void) | 1741 | static void __cmd_map(void) |
1708 | { | 1742 | { |
1709 | max_cpu = sysconf(_SC_NPROCESSORS_CONF); | 1743 | max_cpu = sysconf(_SC_NPROCESSORS_CONF); |
1710 | 1744 | ||
1711 | setup_pager(); | 1745 | setup_pager(); |
1712 | read_events(); | 1746 | read_events(); |
1713 | print_bad_events(); | 1747 | print_bad_events(); |
1714 | } | 1748 | } |
1715 | 1749 | ||
1716 | static void __cmd_replay(void) | 1750 | static void __cmd_replay(void) |
1717 | { | 1751 | { |
1718 | unsigned long i; | 1752 | unsigned long i; |
1719 | 1753 | ||
1720 | calibrate_run_measurement_overhead(); | 1754 | calibrate_run_measurement_overhead(); |
1721 | calibrate_sleep_measurement_overhead(); | 1755 | calibrate_sleep_measurement_overhead(); |
1722 | 1756 | ||
1723 | test_calibrations(); | 1757 | test_calibrations(); |
1724 | 1758 | ||
1725 | read_events(); | 1759 | read_events(); |
1726 | 1760 | ||
1727 | printf("nr_run_events: %ld\n", nr_run_events); | 1761 | printf("nr_run_events: %ld\n", nr_run_events); |
1728 | printf("nr_sleep_events: %ld\n", nr_sleep_events); | 1762 | printf("nr_sleep_events: %ld\n", nr_sleep_events); |
1729 | printf("nr_wakeup_events: %ld\n", nr_wakeup_events); | 1763 | printf("nr_wakeup_events: %ld\n", nr_wakeup_events); |
1730 | 1764 | ||
1731 | if (targetless_wakeups) | 1765 | if (targetless_wakeups) |
1732 | printf("target-less wakeups: %ld\n", targetless_wakeups); | 1766 | printf("target-less wakeups: %ld\n", targetless_wakeups); |
1733 | if (multitarget_wakeups) | 1767 | if (multitarget_wakeups) |
1734 | printf("multi-target wakeups: %ld\n", multitarget_wakeups); | 1768 | printf("multi-target wakeups: %ld\n", multitarget_wakeups); |
1735 | if (nr_run_events_optimized) | 1769 | if (nr_run_events_optimized) |
1736 | printf("run atoms optimized: %ld\n", | 1770 | printf("run atoms optimized: %ld\n", |
1737 | nr_run_events_optimized); | 1771 | nr_run_events_optimized); |
1738 | 1772 | ||
1739 | print_task_traces(); | 1773 | print_task_traces(); |
1740 | add_cross_task_wakeups(); | 1774 | add_cross_task_wakeups(); |
1741 | 1775 | ||
1742 | create_tasks(); | 1776 | create_tasks(); |
1743 | printf("------------------------------------------------------------\n"); | 1777 | printf("------------------------------------------------------------\n"); |
1744 | for (i = 0; i < replay_repeat; i++) | 1778 | for (i = 0; i < replay_repeat; i++) |
1745 | run_one_test(); | 1779 | run_one_test(); |
1746 | } | 1780 | } |
1747 | 1781 | ||
1748 | 1782 | ||
1749 | static const char * const sched_usage[] = { | 1783 | static const char * const sched_usage[] = { |
1750 | "perf sched [<options>] {record|latency|map|replay|trace}", | 1784 | "perf sched [<options>] {record|latency|map|replay|trace}", |
1751 | NULL | 1785 | NULL |
1752 | }; | 1786 | }; |
1753 | 1787 | ||
1754 | static const struct option sched_options[] = { | 1788 | static const struct option sched_options[] = { |
1755 | OPT_STRING('i', "input", &input_name, "file", | 1789 | OPT_STRING('i', "input", &input_name, "file", |
1756 | "input file name"), | 1790 | "input file name"), |
1757 | OPT_BOOLEAN('v', "verbose", &verbose, | 1791 | OPT_BOOLEAN('v', "verbose", &verbose, |
1758 | "be more verbose (show symbol address, etc)"), | 1792 | "be more verbose (show symbol address, etc)"), |
1759 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 1793 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
1760 | "dump raw trace in ASCII"), | 1794 | "dump raw trace in ASCII"), |
1761 | OPT_END() | 1795 | OPT_END() |
1762 | }; | 1796 | }; |
1763 | 1797 | ||
1764 | static const char * const latency_usage[] = { | 1798 | static const char * const latency_usage[] = { |
1765 | "perf sched latency [<options>]", | 1799 | "perf sched latency [<options>]", |
1766 | NULL | 1800 | NULL |
1767 | }; | 1801 | }; |
1768 | 1802 | ||
1769 | static const struct option latency_options[] = { | 1803 | static const struct option latency_options[] = { |
1770 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 1804 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
1771 | "sort by key(s): runtime, switch, avg, max"), | 1805 | "sort by key(s): runtime, switch, avg, max"), |
1772 | OPT_BOOLEAN('v', "verbose", &verbose, | 1806 | OPT_BOOLEAN('v', "verbose", &verbose, |
1773 | "be more verbose (show symbol address, etc)"), | 1807 | "be more verbose (show symbol address, etc)"), |
1774 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 1808 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
1775 | "dump raw trace in ASCII"), | 1809 | "dump raw trace in ASCII"), |
1776 | OPT_END() | 1810 | OPT_END() |
1777 | }; | 1811 | }; |
1778 | 1812 | ||
1779 | static const char * const replay_usage[] = { | 1813 | static const char * const replay_usage[] = { |
1780 | "perf sched replay [<options>]", | 1814 | "perf sched replay [<options>]", |
1781 | NULL | 1815 | NULL |
1782 | }; | 1816 | }; |
1783 | 1817 | ||
1784 | static const struct option replay_options[] = { | 1818 | static const struct option replay_options[] = { |
1785 | OPT_INTEGER('r', "repeat", &replay_repeat, | 1819 | OPT_INTEGER('r', "repeat", &replay_repeat, |
1786 | "repeat the workload replay N times (-1: infinite)"), | 1820 | "repeat the workload replay N times (-1: infinite)"), |
1787 | OPT_BOOLEAN('v', "verbose", &verbose, | 1821 | OPT_BOOLEAN('v', "verbose", &verbose, |
1788 | "be more verbose (show symbol address, etc)"), | 1822 | "be more verbose (show symbol address, etc)"), |
1789 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 1823 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
1790 | "dump raw trace in ASCII"), | 1824 | "dump raw trace in ASCII"), |
1791 | OPT_END() | 1825 | OPT_END() |
1792 | }; | 1826 | }; |
1793 | 1827 | ||
1794 | static void setup_sorting(void) | 1828 | static void setup_sorting(void) |
1795 | { | 1829 | { |
1796 | char *tmp, *tok, *str = strdup(sort_order); | 1830 | char *tmp, *tok, *str = strdup(sort_order); |
1797 | 1831 | ||
1798 | for (tok = strtok_r(str, ", ", &tmp); | 1832 | for (tok = strtok_r(str, ", ", &tmp); |
1799 | tok; tok = strtok_r(NULL, ", ", &tmp)) { | 1833 | tok; tok = strtok_r(NULL, ", ", &tmp)) { |
1800 | if (sort_dimension__add(tok, &sort_list) < 0) { | 1834 | if (sort_dimension__add(tok, &sort_list) < 0) { |
1801 | error("Unknown --sort key: `%s'", tok); | 1835 | error("Unknown --sort key: `%s'", tok); |
1802 | usage_with_options(latency_usage, latency_options); | 1836 | usage_with_options(latency_usage, latency_options); |
1803 | } | 1837 | } |
1804 | } | 1838 | } |
1805 | 1839 | ||
1806 | free(str); | 1840 | free(str); |
1807 | 1841 | ||
1808 | sort_dimension__add((char *)"pid", &cmp_pid); | 1842 | sort_dimension__add((char *)"pid", &cmp_pid); |
1809 | } | 1843 | } |
1810 | 1844 | ||
1811 | static const char *record_args[] = { | 1845 | static const char *record_args[] = { |
1812 | "record", | 1846 | "record", |
1813 | "-a", | 1847 | "-a", |
1814 | "-R", | 1848 | "-R", |
1815 | "-M", | 1849 | "-M", |
1816 | "-f", | 1850 | "-f", |
1817 | "-m", "1024", | 1851 | "-m", "1024", |
1818 | "-c", "1", | 1852 | "-c", "1", |
1819 | "-e", "sched:sched_switch:r", | 1853 | "-e", "sched:sched_switch:r", |
1820 | "-e", "sched:sched_stat_wait:r", | 1854 | "-e", "sched:sched_stat_wait:r", |
1821 | "-e", "sched:sched_stat_sleep:r", | 1855 | "-e", "sched:sched_stat_sleep:r", |
1822 | "-e", "sched:sched_stat_iowait:r", | 1856 | "-e", "sched:sched_stat_iowait:r", |
1823 | "-e", "sched:sched_stat_runtime:r", | 1857 | "-e", "sched:sched_stat_runtime:r", |
1824 | "-e", "sched:sched_process_exit:r", | 1858 | "-e", "sched:sched_process_exit:r", |
1825 | "-e", "sched:sched_process_fork:r", | 1859 | "-e", "sched:sched_process_fork:r", |
1826 | "-e", "sched:sched_wakeup:r", | 1860 | "-e", "sched:sched_wakeup:r", |
1827 | "-e", "sched:sched_migrate_task:r", | 1861 | "-e", "sched:sched_migrate_task:r", |
1828 | }; | 1862 | }; |
1829 | 1863 | ||
1830 | static int __cmd_record(int argc, const char **argv) | 1864 | static int __cmd_record(int argc, const char **argv) |
1831 | { | 1865 | { |
1832 | unsigned int rec_argc, i, j; | 1866 | unsigned int rec_argc, i, j; |
1833 | const char **rec_argv; | 1867 | const char **rec_argv; |
1834 | 1868 | ||
1835 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | 1869 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; |
1836 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | 1870 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
1837 | 1871 | ||
1838 | for (i = 0; i < ARRAY_SIZE(record_args); i++) | 1872 | for (i = 0; i < ARRAY_SIZE(record_args); i++) |
1839 | rec_argv[i] = strdup(record_args[i]); | 1873 | rec_argv[i] = strdup(record_args[i]); |
1840 | 1874 | ||
1841 | for (j = 1; j < (unsigned int)argc; j++, i++) | 1875 | for (j = 1; j < (unsigned int)argc; j++, i++) |
1842 | rec_argv[i] = argv[j]; | 1876 | rec_argv[i] = argv[j]; |
1843 | 1877 | ||
1844 | BUG_ON(i != rec_argc); | 1878 | BUG_ON(i != rec_argc); |
1845 | 1879 | ||
1846 | return cmd_record(i, rec_argv, NULL); | 1880 | return cmd_record(i, rec_argv, NULL); |
1847 | } | 1881 | } |
1848 | 1882 | ||
1849 | int cmd_sched(int argc, const char **argv, const char *prefix __used) | 1883 | int cmd_sched(int argc, const char **argv, const char *prefix __used) |
1850 | { | 1884 | { |
1851 | symbol__init(); | 1885 | symbol__init(); |
1852 | 1886 | ||
1853 | argc = parse_options(argc, argv, sched_options, sched_usage, | 1887 | argc = parse_options(argc, argv, sched_options, sched_usage, |
1854 | PARSE_OPT_STOP_AT_NON_OPTION); | 1888 | PARSE_OPT_STOP_AT_NON_OPTION); |
1855 | if (!argc) | 1889 | if (!argc) |
1856 | usage_with_options(sched_usage, sched_options); | 1890 | usage_with_options(sched_usage, sched_options); |
1857 | 1891 | ||
1858 | if (!strncmp(argv[0], "rec", 3)) { | 1892 | if (!strncmp(argv[0], "rec", 3)) { |
1859 | return __cmd_record(argc, argv); | 1893 | return __cmd_record(argc, argv); |
1860 | } else if (!strncmp(argv[0], "lat", 3)) { | 1894 | } else if (!strncmp(argv[0], "lat", 3)) { |
1861 | trace_handler = &lat_ops; | 1895 | trace_handler = &lat_ops; |
1862 | if (argc > 1) { | 1896 | if (argc > 1) { |
1863 | argc = parse_options(argc, argv, latency_options, latency_usage, 0); | 1897 | argc = parse_options(argc, argv, latency_options, latency_usage, 0); |
1864 | if (argc) | 1898 | if (argc) |
1865 | usage_with_options(latency_usage, latency_options); | 1899 | usage_with_options(latency_usage, latency_options); |
1866 | } | 1900 | } |
1867 | setup_sorting(); | 1901 | setup_sorting(); |
1868 | __cmd_lat(); | 1902 | __cmd_lat(); |
1869 | } else if (!strcmp(argv[0], "map")) { | 1903 | } else if (!strcmp(argv[0], "map")) { |
1870 | trace_handler = &map_ops; | 1904 | trace_handler = &map_ops; |
1871 | setup_sorting(); | 1905 | setup_sorting(); |
1872 | __cmd_map(); | 1906 | __cmd_map(); |
1873 | } else if (!strncmp(argv[0], "rep", 3)) { | 1907 | } else if (!strncmp(argv[0], "rep", 3)) { |
1874 | trace_handler = &replay_ops; | 1908 | trace_handler = &replay_ops; |
1875 | if (argc) { | 1909 | if (argc) { |
1876 | argc = parse_options(argc, argv, replay_options, replay_usage, 0); | 1910 | argc = parse_options(argc, argv, replay_options, replay_usage, 0); |
1877 | if (argc) | 1911 | if (argc) |
1878 | usage_with_options(replay_usage, replay_options); | 1912 | usage_with_options(replay_usage, replay_options); |
1879 | } | 1913 | } |
1880 | __cmd_replay(); | 1914 | __cmd_replay(); |
1881 | } else if (!strcmp(argv[0], "trace")) { | 1915 | } else if (!strcmp(argv[0], "trace")) { |
1882 | /* | 1916 | /* |
1883 | * Aliased to 'perf trace' for now: | 1917 | * Aliased to 'perf trace' for now: |
1884 | */ | 1918 | */ |
1885 | return cmd_trace(argc, argv, prefix); | 1919 | return cmd_trace(argc, argv, prefix); |
1886 | } else { | 1920 | } else { |
1887 | usage_with_options(sched_usage, sched_options); | 1921 | usage_with_options(sched_usage, sched_options); |
1888 | } | 1922 | } |
1889 | 1923 | ||
1890 | return 0; | 1924 | return 0; |
1891 | } | 1925 | } |
1892 | 1926 |
tools/perf/util/thread.c
1 | #include "../perf.h" | 1 | #include "../perf.h" |
2 | #include <stdlib.h> | 2 | #include <stdlib.h> |
3 | #include <stdio.h> | 3 | #include <stdio.h> |
4 | #include <string.h> | 4 | #include <string.h> |
5 | #include "thread.h" | 5 | #include "thread.h" |
6 | #include "util.h" | 6 | #include "util.h" |
7 | #include "debug.h" | 7 | #include "debug.h" |
8 | 8 | ||
9 | static struct thread *thread__new(pid_t pid) | 9 | static struct thread *thread__new(pid_t pid, int set_comm) |
10 | { | 10 | { |
11 | struct thread *self = calloc(1, sizeof(*self)); | 11 | struct thread *self = calloc(1, sizeof(*self)); |
12 | 12 | ||
13 | if (self != NULL) { | 13 | if (self != NULL) { |
14 | self->pid = pid; | 14 | self->pid = pid; |
15 | self->comm = malloc(32); | 15 | if (set_comm) { |
16 | if (self->comm) | 16 | self->comm = malloc(32); |
17 | snprintf(self->comm, 32, ":%d", self->pid); | 17 | if (self->comm) |
18 | snprintf(self->comm, 32, ":%d", self->pid); | ||
19 | } | ||
18 | self->maps = RB_ROOT; | 20 | self->maps = RB_ROOT; |
19 | INIT_LIST_HEAD(&self->removed_maps); | 21 | INIT_LIST_HEAD(&self->removed_maps); |
20 | } | 22 | } |
21 | 23 | ||
22 | return self; | 24 | return self; |
23 | } | 25 | } |
24 | 26 | ||
25 | int thread__set_comm(struct thread *self, const char *comm) | 27 | int thread__set_comm(struct thread *self, const char *comm) |
26 | { | 28 | { |
27 | if (self->comm) | 29 | if (self->comm) |
28 | free(self->comm); | 30 | free(self->comm); |
29 | self->comm = strdup(comm); | 31 | self->comm = strdup(comm); |
30 | return self->comm ? 0 : -ENOMEM; | 32 | return self->comm ? 0 : -ENOMEM; |
31 | } | 33 | } |
32 | 34 | ||
33 | static size_t thread__fprintf(struct thread *self, FILE *fp) | 35 | static size_t thread__fprintf(struct thread *self, FILE *fp) |
34 | { | 36 | { |
35 | struct rb_node *nd; | 37 | struct rb_node *nd; |
36 | struct map *pos; | 38 | struct map *pos; |
37 | size_t ret = fprintf(fp, "Thread %d %s\nCurrent maps:\n", | 39 | size_t ret = fprintf(fp, "Thread %d %s\nCurrent maps:\n", |
38 | self->pid, self->comm); | 40 | self->pid, self->comm); |
39 | 41 | ||
40 | for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) { | 42 | for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) { |
41 | pos = rb_entry(nd, struct map, rb_node); | 43 | pos = rb_entry(nd, struct map, rb_node); |
42 | ret += map__fprintf(pos, fp); | 44 | ret += map__fprintf(pos, fp); |
43 | } | 45 | } |
44 | 46 | ||
45 | ret = fprintf(fp, "Removed maps:\n"); | 47 | ret = fprintf(fp, "Removed maps:\n"); |
46 | 48 | ||
47 | list_for_each_entry(pos, &self->removed_maps, node) | 49 | list_for_each_entry(pos, &self->removed_maps, node) |
48 | ret += map__fprintf(pos, fp); | 50 | ret += map__fprintf(pos, fp); |
49 | 51 | ||
50 | return ret; | 52 | return ret; |
51 | } | 53 | } |
52 | 54 | ||
53 | struct thread * | 55 | static struct thread * |
54 | threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) | 56 | __threads__findnew(pid_t pid, struct rb_root *threads, |
57 | struct thread **last_match, | ||
58 | int set_comm) | ||
55 | { | 59 | { |
56 | struct rb_node **p = &threads->rb_node; | 60 | struct rb_node **p = &threads->rb_node; |
57 | struct rb_node *parent = NULL; | 61 | struct rb_node *parent = NULL; |
58 | struct thread *th; | 62 | struct thread *th; |
59 | 63 | ||
60 | /* | 64 | /* |
61 | * Font-end cache - PID lookups come in blocks, | 65 | * Font-end cache - PID lookups come in blocks, |
62 | * so most of the time we dont have to look up | 66 | * so most of the time we dont have to look up |
63 | * the full rbtree: | 67 | * the full rbtree: |
64 | */ | 68 | */ |
65 | if (*last_match && (*last_match)->pid == pid) | 69 | if (*last_match && (*last_match)->pid == pid) |
66 | return *last_match; | 70 | return *last_match; |
67 | 71 | ||
68 | while (*p != NULL) { | 72 | while (*p != NULL) { |
69 | parent = *p; | 73 | parent = *p; |
70 | th = rb_entry(parent, struct thread, rb_node); | 74 | th = rb_entry(parent, struct thread, rb_node); |
71 | 75 | ||
72 | if (th->pid == pid) { | 76 | if (th->pid == pid) { |
73 | *last_match = th; | 77 | *last_match = th; |
74 | return th; | 78 | return th; |
75 | } | 79 | } |
76 | 80 | ||
77 | if (pid < th->pid) | 81 | if (pid < th->pid) |
78 | p = &(*p)->rb_left; | 82 | p = &(*p)->rb_left; |
79 | else | 83 | else |
80 | p = &(*p)->rb_right; | 84 | p = &(*p)->rb_right; |
81 | } | 85 | } |
82 | 86 | ||
83 | th = thread__new(pid); | 87 | th = thread__new(pid, set_comm); |
88 | |||
84 | if (th != NULL) { | 89 | if (th != NULL) { |
85 | rb_link_node(&th->rb_node, parent, p); | 90 | rb_link_node(&th->rb_node, parent, p); |
86 | rb_insert_color(&th->rb_node, threads); | 91 | rb_insert_color(&th->rb_node, threads); |
87 | *last_match = th; | 92 | *last_match = th; |
88 | } | 93 | } |
89 | 94 | ||
90 | return th; | 95 | return th; |
96 | } | ||
97 | |||
98 | struct thread * | ||
99 | threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) | ||
100 | { | ||
101 | return __threads__findnew(pid, threads, last_match, 1); | ||
102 | } | ||
103 | |||
104 | struct thread * | ||
105 | threads__findnew_nocomm(pid_t pid, struct rb_root *threads, | ||
106 | struct thread **last_match) | ||
107 | { | ||
108 | return __threads__findnew(pid, threads, last_match, 0); | ||
91 | } | 109 | } |
92 | 110 | ||
93 | struct thread * | 111 | struct thread * |
94 | register_idle_thread(struct rb_root *threads, struct thread **last_match) | 112 | register_idle_thread(struct rb_root *threads, struct thread **last_match) |
95 | { | 113 | { |
96 | struct thread *thread = threads__findnew(0, threads, last_match); | 114 | struct thread *thread = threads__findnew(0, threads, last_match); |
97 | 115 | ||
98 | if (!thread || thread__set_comm(thread, "swapper")) { | 116 | if (!thread || thread__set_comm(thread, "swapper")) { |
99 | fprintf(stderr, "problem inserting idle task.\n"); | 117 | fprintf(stderr, "problem inserting idle task.\n"); |
100 | exit(-1); | 118 | exit(-1); |
101 | } | 119 | } |
102 | 120 | ||
103 | return thread; | 121 | return thread; |
104 | } | 122 | } |
105 | 123 | ||
106 | static void thread__remove_overlappings(struct thread *self, struct map *map) | 124 | static void thread__remove_overlappings(struct thread *self, struct map *map) |
107 | { | 125 | { |
108 | struct rb_node *next = rb_first(&self->maps); | 126 | struct rb_node *next = rb_first(&self->maps); |
109 | 127 | ||
110 | while (next) { | 128 | while (next) { |
111 | struct map *pos = rb_entry(next, struct map, rb_node); | 129 | struct map *pos = rb_entry(next, struct map, rb_node); |
112 | next = rb_next(&pos->rb_node); | 130 | next = rb_next(&pos->rb_node); |
113 | 131 | ||
114 | if (!map__overlap(pos, map)) | 132 | if (!map__overlap(pos, map)) |
115 | continue; | 133 | continue; |
116 | 134 | ||
117 | if (verbose >= 2) { | 135 | if (verbose >= 2) { |
118 | printf("overlapping maps:\n"); | 136 | printf("overlapping maps:\n"); |
119 | map__fprintf(map, stdout); | 137 | map__fprintf(map, stdout); |
120 | map__fprintf(pos, stdout); | 138 | map__fprintf(pos, stdout); |
121 | } | 139 | } |
122 | 140 | ||
123 | rb_erase(&pos->rb_node, &self->maps); | 141 | rb_erase(&pos->rb_node, &self->maps); |
124 | /* | 142 | /* |
125 | * We may have references to this map, for instance in some | 143 | * We may have references to this map, for instance in some |
126 | * hist_entry instances, so just move them to a separate | 144 | * hist_entry instances, so just move them to a separate |
127 | * list. | 145 | * list. |
128 | */ | 146 | */ |
129 | list_add_tail(&pos->node, &self->removed_maps); | 147 | list_add_tail(&pos->node, &self->removed_maps); |
130 | } | 148 | } |
131 | } | 149 | } |
132 | 150 | ||
133 | void maps__insert(struct rb_root *maps, struct map *map) | 151 | void maps__insert(struct rb_root *maps, struct map *map) |
134 | { | 152 | { |
135 | struct rb_node **p = &maps->rb_node; | 153 | struct rb_node **p = &maps->rb_node; |
136 | struct rb_node *parent = NULL; | 154 | struct rb_node *parent = NULL; |
137 | const u64 ip = map->start; | 155 | const u64 ip = map->start; |
138 | struct map *m; | 156 | struct map *m; |
139 | 157 | ||
140 | while (*p != NULL) { | 158 | while (*p != NULL) { |
141 | parent = *p; | 159 | parent = *p; |
142 | m = rb_entry(parent, struct map, rb_node); | 160 | m = rb_entry(parent, struct map, rb_node); |
143 | if (ip < m->start) | 161 | if (ip < m->start) |
144 | p = &(*p)->rb_left; | 162 | p = &(*p)->rb_left; |
145 | else | 163 | else |
146 | p = &(*p)->rb_right; | 164 | p = &(*p)->rb_right; |
147 | } | 165 | } |
148 | 166 | ||
149 | rb_link_node(&map->rb_node, parent, p); | 167 | rb_link_node(&map->rb_node, parent, p); |
150 | rb_insert_color(&map->rb_node, maps); | 168 | rb_insert_color(&map->rb_node, maps); |
151 | } | 169 | } |
152 | 170 | ||
153 | struct map *maps__find(struct rb_root *maps, u64 ip) | 171 | struct map *maps__find(struct rb_root *maps, u64 ip) |
154 | { | 172 | { |
155 | struct rb_node **p = &maps->rb_node; | 173 | struct rb_node **p = &maps->rb_node; |
156 | struct rb_node *parent = NULL; | 174 | struct rb_node *parent = NULL; |
157 | struct map *m; | 175 | struct map *m; |
158 | 176 | ||
159 | while (*p != NULL) { | 177 | while (*p != NULL) { |
160 | parent = *p; | 178 | parent = *p; |
161 | m = rb_entry(parent, struct map, rb_node); | 179 | m = rb_entry(parent, struct map, rb_node); |
162 | if (ip < m->start) | 180 | if (ip < m->start) |
163 | p = &(*p)->rb_left; | 181 | p = &(*p)->rb_left; |
164 | else if (ip > m->end) | 182 | else if (ip > m->end) |
165 | p = &(*p)->rb_right; | 183 | p = &(*p)->rb_right; |
166 | else | 184 | else |
167 | return m; | 185 | return m; |
168 | } | 186 | } |
169 | 187 | ||
170 | return NULL; | 188 | return NULL; |
171 | } | 189 | } |
172 | 190 | ||
173 | void thread__insert_map(struct thread *self, struct map *map) | 191 | void thread__insert_map(struct thread *self, struct map *map) |
174 | { | 192 | { |
175 | thread__remove_overlappings(self, map); | 193 | thread__remove_overlappings(self, map); |
176 | maps__insert(&self->maps, map); | 194 | maps__insert(&self->maps, map); |
177 | } | 195 | } |
178 | 196 | ||
179 | int thread__fork(struct thread *self, struct thread *parent) | 197 | int thread__fork(struct thread *self, struct thread *parent) |
180 | { | 198 | { |
181 | struct rb_node *nd; | 199 | struct rb_node *nd; |
182 | 200 | ||
183 | if (self->comm) | 201 | if (self->comm) |
184 | free(self->comm); | 202 | free(self->comm); |
185 | self->comm = strdup(parent->comm); | 203 | self->comm = strdup(parent->comm); |
186 | if (!self->comm) | 204 | if (!self->comm) |
187 | return -ENOMEM; | 205 | return -ENOMEM; |
188 | 206 | ||
189 | for (nd = rb_first(&parent->maps); nd; nd = rb_next(nd)) { | 207 | for (nd = rb_first(&parent->maps); nd; nd = rb_next(nd)) { |
190 | struct map *map = rb_entry(nd, struct map, rb_node); | 208 | struct map *map = rb_entry(nd, struct map, rb_node); |
191 | struct map *new = map__clone(map); | 209 | struct map *new = map__clone(map); |
192 | if (!new) | 210 | if (!new) |
193 | return -ENOMEM; | 211 | return -ENOMEM; |
194 | thread__insert_map(self, new); | 212 | thread__insert_map(self, new); |
195 | } | 213 | } |
196 | 214 | ||
197 | return 0; | 215 | return 0; |
198 | } | 216 | } |
199 | 217 | ||
200 | size_t threads__fprintf(FILE *fp, struct rb_root *threads) | 218 | size_t threads__fprintf(FILE *fp, struct rb_root *threads) |
201 | { | 219 | { |
202 | size_t ret = 0; | 220 | size_t ret = 0; |
203 | struct rb_node *nd; | 221 | struct rb_node *nd; |
204 | 222 | ||
205 | for (nd = rb_first(threads); nd; nd = rb_next(nd)) { | 223 | for (nd = rb_first(threads); nd; nd = rb_next(nd)) { |
206 | struct thread *pos = rb_entry(nd, struct thread, rb_node); | 224 | struct thread *pos = rb_entry(nd, struct thread, rb_node); |
207 | 225 | ||
208 | ret += thread__fprintf(pos, fp); | 226 | ret += thread__fprintf(pos, fp); |
209 | } | 227 | } |
210 | 228 | ||
211 | return ret; | 229 | return ret; |
212 | } | 230 | } |
213 | 231 |
tools/perf/util/thread.h
1 | #ifndef __PERF_THREAD_H | 1 | #ifndef __PERF_THREAD_H |
2 | #define __PERF_THREAD_H | 2 | #define __PERF_THREAD_H |
3 | 3 | ||
4 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
5 | #include <unistd.h> | 5 | #include <unistd.h> |
6 | #include "symbol.h" | 6 | #include "symbol.h" |
7 | 7 | ||
8 | struct thread { | 8 | struct thread { |
9 | struct rb_node rb_node; | 9 | struct rb_node rb_node; |
10 | struct rb_root maps; | 10 | struct rb_root maps; |
11 | struct list_head removed_maps; | 11 | struct list_head removed_maps; |
12 | pid_t pid; | 12 | pid_t pid; |
13 | char shortname[3]; | 13 | char shortname[3]; |
14 | char *comm; | 14 | char *comm; |
15 | }; | 15 | }; |
16 | 16 | ||
17 | int thread__set_comm(struct thread *self, const char *comm); | 17 | int thread__set_comm(struct thread *self, const char *comm); |
18 | struct thread * | 18 | struct thread * |
19 | threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match); | 19 | threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match); |
20 | struct thread * | 20 | struct thread * |
21 | threads__findnew_nocomm(pid_t pid, struct rb_root *threads, | ||
22 | struct thread **last_match); | ||
23 | struct thread * | ||
21 | register_idle_thread(struct rb_root *threads, struct thread **last_match); | 24 | register_idle_thread(struct rb_root *threads, struct thread **last_match); |
22 | void thread__insert_map(struct thread *self, struct map *map); | 25 | void thread__insert_map(struct thread *self, struct map *map); |
23 | int thread__fork(struct thread *self, struct thread *parent); | 26 | int thread__fork(struct thread *self, struct thread *parent); |
24 | size_t threads__fprintf(FILE *fp, struct rb_root *threads); | 27 | size_t threads__fprintf(FILE *fp, struct rb_root *threads); |
25 | 28 | ||
26 | void maps__insert(struct rb_root *maps, struct map *map); | 29 | void maps__insert(struct rb_root *maps, struct map *map); |
27 | struct map *maps__find(struct rb_root *maps, u64 ip); | 30 | struct map *maps__find(struct rb_root *maps, u64 ip); |
28 | 31 | ||
29 | struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp); | 32 | struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp); |
30 | struct map *kernel_maps__find_by_dso_name(const char *name); | 33 | struct map *kernel_maps__find_by_dso_name(const char *name); |
31 | 34 | ||
32 | static inline struct map *thread__find_map(struct thread *self, u64 ip) | 35 | static inline struct map *thread__find_map(struct thread *self, u64 ip) |
33 | { | 36 | { |
34 | return self ? maps__find(&self->maps, ip) : NULL; | 37 | return self ? maps__find(&self->maps, ip) : NULL; |
35 | } | 38 | } |
36 | 39 | ||
37 | #endif /* __PERF_THREAD_H */ | 40 | #endif /* __PERF_THREAD_H */ |
38 | 41 |