Commit 5d2cd90922c778908bd0cd669e572a5b5eafd737
1 parent
db9a9cbc81
Exists in
master
and in
39 other branches
perf evsel: Fix use of inherit
perf stat doesn't mmap and its perfectly fine for it to use task-bound counters with inheritance. So set the attr.inherit on the caller and leave the syscall itself to validate it. When the mmap fails perf_evlist__mmap will just emit a warning if this is the failure reason. Reported-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Tom Zanussi <tzanussi@gmail.com> Link: http://lkml.kernel.org/r/20110414170121.GC3229@ghostprotocols.net Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Showing 8 changed files with 41 additions and 42 deletions Side-by-side Diff
tools/perf/builtin-record.c
... | ... | @@ -163,6 +163,7 @@ |
163 | 163 | struct perf_event_attr *attr = &evsel->attr; |
164 | 164 | int track = !evsel->idx; /* only the first counter needs these */ |
165 | 165 | |
166 | + attr->inherit = !no_inherit; | |
166 | 167 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
167 | 168 | PERF_FORMAT_TOTAL_TIME_RUNNING | |
168 | 169 | PERF_FORMAT_ID; |
... | ... | @@ -251,6 +252,9 @@ |
251 | 252 | { |
252 | 253 | struct perf_evsel *pos; |
253 | 254 | |
255 | + if (evlist->cpus->map[0] < 0) | |
256 | + no_inherit = true; | |
257 | + | |
254 | 258 | list_for_each_entry(pos, &evlist->entries, node) { |
255 | 259 | struct perf_event_attr *attr = &pos->attr; |
256 | 260 | /* |
... | ... | @@ -271,8 +275,7 @@ |
271 | 275 | retry_sample_id: |
272 | 276 | attr->sample_id_all = sample_id_all_avail ? 1 : 0; |
273 | 277 | try_again: |
274 | - if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group, | |
275 | - !no_inherit) < 0) { | |
278 | + if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) { | |
276 | 279 | int err = errno; |
277 | 280 | |
278 | 281 | if (err == EPERM || err == EACCES) { |
tools/perf/builtin-stat.c
... | ... | @@ -167,16 +167,17 @@ |
167 | 167 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
168 | 168 | PERF_FORMAT_TOTAL_TIME_RUNNING; |
169 | 169 | |
170 | + attr->inherit = !no_inherit; | |
171 | + | |
170 | 172 | if (system_wide) |
171 | - return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false, false); | |
173 | + return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false); | |
172 | 174 | |
173 | - attr->inherit = !no_inherit; | |
174 | 175 | if (target_pid == -1 && target_tid == -1) { |
175 | 176 | attr->disabled = 1; |
176 | 177 | attr->enable_on_exec = 1; |
177 | 178 | } |
178 | 179 | |
179 | - return perf_evsel__open_per_thread(evsel, evsel_list->threads, false, false); | |
180 | + return perf_evsel__open_per_thread(evsel, evsel_list->threads, false); | |
180 | 181 | } |
181 | 182 | |
182 | 183 | /* |
tools/perf/builtin-test.c
... | ... | @@ -290,7 +290,7 @@ |
290 | 290 | goto out_thread_map_delete; |
291 | 291 | } |
292 | 292 | |
293 | - if (perf_evsel__open_per_thread(evsel, threads, false, false) < 0) { | |
293 | + if (perf_evsel__open_per_thread(evsel, threads, false) < 0) { | |
294 | 294 | pr_debug("failed to open counter: %s, " |
295 | 295 | "tweak /proc/sys/kernel/perf_event_paranoid?\n", |
296 | 296 | strerror(errno)); |
... | ... | @@ -303,7 +303,7 @@ |
303 | 303 | } |
304 | 304 | |
305 | 305 | if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) { |
306 | - pr_debug("perf_evsel__open_read_on_cpu\n"); | |
306 | + pr_debug("perf_evsel__read_on_cpu\n"); | |
307 | 307 | goto out_close_fd; |
308 | 308 | } |
309 | 309 | |
... | ... | @@ -365,7 +365,7 @@ |
365 | 365 | goto out_thread_map_delete; |
366 | 366 | } |
367 | 367 | |
368 | - if (perf_evsel__open(evsel, cpus, threads, false, false) < 0) { | |
368 | + if (perf_evsel__open(evsel, cpus, threads, false) < 0) { | |
369 | 369 | pr_debug("failed to open counter: %s, " |
370 | 370 | "tweak /proc/sys/kernel/perf_event_paranoid?\n", |
371 | 371 | strerror(errno)); |
... | ... | @@ -418,7 +418,7 @@ |
418 | 418 | continue; |
419 | 419 | |
420 | 420 | if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { |
421 | - pr_debug("perf_evsel__open_read_on_cpu\n"); | |
421 | + pr_debug("perf_evsel__read_on_cpu\n"); | |
422 | 422 | err = -1; |
423 | 423 | break; |
424 | 424 | } |
... | ... | @@ -529,7 +529,7 @@ |
529 | 529 | |
530 | 530 | perf_evlist__add(evlist, evsels[i]); |
531 | 531 | |
532 | - if (perf_evsel__open(evsels[i], cpus, threads, false, false) < 0) { | |
532 | + if (perf_evsel__open(evsels[i], cpus, threads, false) < 0) { | |
533 | 533 | pr_debug("failed to open counter: %s, " |
534 | 534 | "tweak /proc/sys/kernel/perf_event_paranoid?\n", |
535 | 535 | strerror(errno)); |
tools/perf/builtin-top.c
... | ... | @@ -845,9 +845,10 @@ |
845 | 845 | } |
846 | 846 | |
847 | 847 | attr->mmap = 1; |
848 | + attr->inherit = inherit; | |
848 | 849 | try_again: |
849 | 850 | if (perf_evsel__open(counter, top.evlist->cpus, |
850 | - top.evlist->threads, group, inherit) < 0) { | |
851 | + top.evlist->threads, group) < 0) { | |
851 | 852 | int err = errno; |
852 | 853 | |
853 | 854 | if (err == EPERM || err == EACCES) { |
tools/perf/util/evlist.c
... | ... | @@ -12,6 +12,7 @@ |
12 | 12 | #include "evlist.h" |
13 | 13 | #include "evsel.h" |
14 | 14 | #include "util.h" |
15 | +#include "debug.h" | |
15 | 16 | |
16 | 17 | #include <sys/mman.h> |
17 | 18 | |
18 | 19 | |
19 | 20 | |
... | ... | @@ -250,15 +251,19 @@ |
250 | 251 | return evlist->mmap != NULL ? 0 : -ENOMEM; |
251 | 252 | } |
252 | 253 | |
253 | -static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot, | |
254 | - int mask, int fd) | |
254 | +static int __perf_evlist__mmap(struct perf_evlist *evlist, struct perf_evsel *evsel, | |
255 | + int cpu, int prot, int mask, int fd) | |
255 | 256 | { |
256 | 257 | evlist->mmap[cpu].prev = 0; |
257 | 258 | evlist->mmap[cpu].mask = mask; |
258 | 259 | evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot, |
259 | 260 | MAP_SHARED, fd, 0); |
260 | - if (evlist->mmap[cpu].base == MAP_FAILED) | |
261 | + if (evlist->mmap[cpu].base == MAP_FAILED) { | |
262 | + if (evlist->cpus->map[cpu] == -1 && evsel->attr.inherit) | |
263 | + ui__warning("Inherit is not allowed on per-task " | |
264 | + "events using mmap.\n"); | |
261 | 265 | return -1; |
266 | + } | |
262 | 267 | |
263 | 268 | perf_evlist__add_pollfd(evlist, fd); |
264 | 269 | return 0; |
... | ... | @@ -312,7 +317,8 @@ |
312 | 317 | if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, |
313 | 318 | FD(first_evsel, cpu, 0)) != 0) |
314 | 319 | goto out_unmap; |
315 | - } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0) | |
320 | + } else if (__perf_evlist__mmap(evlist, evsel, cpu, | |
321 | + prot, mask, fd) < 0) | |
316 | 322 | goto out_unmap; |
317 | 323 | |
318 | 324 | if ((evsel->attr.read_format & PERF_FORMAT_ID) && |
tools/perf/util/evsel.c
... | ... | @@ -175,7 +175,7 @@ |
175 | 175 | } |
176 | 176 | |
177 | 177 | static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, |
178 | - struct thread_map *threads, bool group, bool inherit) | |
178 | + struct thread_map *threads, bool group) | |
179 | 179 | { |
180 | 180 | int cpu, thread; |
181 | 181 | unsigned long flags = 0; |
... | ... | @@ -192,19 +192,6 @@ |
192 | 192 | |
193 | 193 | for (cpu = 0; cpu < cpus->nr; cpu++) { |
194 | 194 | int group_fd = -1; |
195 | - /* | |
196 | - * Don't allow mmap() of inherited per-task counters. This | |
197 | - * would create a performance issue due to all children writing | |
198 | - * to the same buffer. | |
199 | - * | |
200 | - * FIXME: | |
201 | - * Proper fix is not to pass 'inherit' to perf_evsel__open*, | |
202 | - * but a 'flags' parameter, with 'group' folded there as well, | |
203 | - * then introduce a PERF_O_{MMAP,GROUP,INHERIT} enum, and if | |
204 | - * O_MMAP is set, emit a warning if cpu < 0 and O_INHERIT is | |
205 | - * set. Lets go for the minimal fix first tho. | |
206 | - */ | |
207 | - evsel->attr.inherit = (cpus->map[cpu] >= 0) && inherit; | |
208 | 195 | |
209 | 196 | for (thread = 0; thread < threads->nr; thread++) { |
210 | 197 | |
... | ... | @@ -253,7 +240,7 @@ |
253 | 240 | }; |
254 | 241 | |
255 | 242 | int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, |
256 | - struct thread_map *threads, bool group, bool inherit) | |
243 | + struct thread_map *threads, bool group) | |
257 | 244 | { |
258 | 245 | if (cpus == NULL) { |
259 | 246 | /* Work around old compiler warnings about strict aliasing */ |
260 | 247 | |
261 | 248 | |
262 | 249 | |
263 | 250 | |
... | ... | @@ -263,19 +250,19 @@ |
263 | 250 | if (threads == NULL) |
264 | 251 | threads = &empty_thread_map.map; |
265 | 252 | |
266 | - return __perf_evsel__open(evsel, cpus, threads, group, inherit); | |
253 | + return __perf_evsel__open(evsel, cpus, threads, group); | |
267 | 254 | } |
268 | 255 | |
269 | 256 | int perf_evsel__open_per_cpu(struct perf_evsel *evsel, |
270 | - struct cpu_map *cpus, bool group, bool inherit) | |
257 | + struct cpu_map *cpus, bool group) | |
271 | 258 | { |
272 | - return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, inherit); | |
259 | + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group); | |
273 | 260 | } |
274 | 261 | |
275 | 262 | int perf_evsel__open_per_thread(struct perf_evsel *evsel, |
276 | - struct thread_map *threads, bool group, bool inherit) | |
263 | + struct thread_map *threads, bool group) | |
277 | 264 | { |
278 | - return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit); | |
265 | + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group); | |
279 | 266 | } |
280 | 267 | |
281 | 268 | static int perf_event__parse_id_sample(const union perf_event *event, u64 type, |
tools/perf/util/evsel.h
... | ... | @@ -81,11 +81,11 @@ |
81 | 81 | void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); |
82 | 82 | |
83 | 83 | int perf_evsel__open_per_cpu(struct perf_evsel *evsel, |
84 | - struct cpu_map *cpus, bool group, bool inherit); | |
84 | + struct cpu_map *cpus, bool group); | |
85 | 85 | int perf_evsel__open_per_thread(struct perf_evsel *evsel, |
86 | - struct thread_map *threads, bool group, bool inherit); | |
86 | + struct thread_map *threads, bool group); | |
87 | 87 | int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, |
88 | - struct thread_map *threads, bool group, bool inherit); | |
88 | + struct thread_map *threads, bool group); | |
89 | 89 | |
90 | 90 | #define perf_evsel__match(evsel, t, c) \ |
91 | 91 | (evsel->attr.type == PERF_TYPE_##t && \ |
tools/perf/util/python.c
... | ... | @@ -498,11 +498,11 @@ |
498 | 498 | struct cpu_map *cpus = NULL; |
499 | 499 | struct thread_map *threads = NULL; |
500 | 500 | PyObject *pcpus = NULL, *pthreads = NULL; |
501 | - int group = 0, overwrite = 0; | |
502 | - static char *kwlist[] = {"cpus", "threads", "group", "overwrite", NULL, NULL}; | |
501 | + int group = 0, inherit = 0; | |
502 | + static char *kwlist[] = {"cpus", "threads", "group", "inherit", NULL, NULL}; | |
503 | 503 | |
504 | 504 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, |
505 | - &pcpus, &pthreads, &group, &overwrite)) | |
505 | + &pcpus, &pthreads, &group, &inherit)) | |
506 | 506 | return NULL; |
507 | 507 | |
508 | 508 | if (pthreads != NULL) |
... | ... | @@ -511,7 +511,8 @@ |
511 | 511 | if (pcpus != NULL) |
512 | 512 | cpus = ((struct pyrf_cpu_map *)pcpus)->cpus; |
513 | 513 | |
514 | - if (perf_evsel__open(evsel, cpus, threads, group, overwrite) < 0) { | |
514 | + evsel->attr.inherit = inherit; | |
515 | + if (perf_evsel__open(evsel, cpus, threads, group) < 0) { | |
515 | 516 | PyErr_SetFromErrno(PyExc_OSError); |
516 | 517 | return NULL; |
517 | 518 | } |