Commit e6e18ec79b023d5fe84226cef533cf0e3770ce93
Committed by
Ingo Molnar
1 parent
bfbd3381e6
perf_counter: Rework the sample ABI
The PERF_EVENT_READ implementation made me realize we don't actually need the sample_type int the output sample, since we already have that in the perf_counter_attr information. Therefore, remove the PERF_EVENT_MISC_OVERFLOW bit and the event->type overloading, and imply put counter overflow samples in a PERF_EVENT_SAMPLE type. This also fixes the issue that event->type was only 32-bit and sample_type had 64 usable bits. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 5 changed files with 49 additions and 48 deletions Side-by-side Diff
include/linux/perf_counter.h
... | ... | @@ -262,7 +262,6 @@ |
262 | 262 | #define PERF_EVENT_MISC_KERNEL (1 << 0) |
263 | 263 | #define PERF_EVENT_MISC_USER (2 << 0) |
264 | 264 | #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) |
265 | -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) | |
266 | 265 | |
267 | 266 | struct perf_event_header { |
268 | 267 | __u32 type; |
... | ... | @@ -348,9 +347,6 @@ |
348 | 347 | PERF_EVENT_READ = 8, |
349 | 348 | |
350 | 349 | /* |
351 | - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | |
352 | - * will be PERF_SAMPLE_* | |
353 | - * | |
354 | 350 | * struct { |
355 | 351 | * struct perf_event_header header; |
356 | 352 | * |
357 | 353 | |
... | ... | @@ -358,8 +354,9 @@ |
358 | 354 | * { u32 pid, tid; } && PERF_SAMPLE_TID |
359 | 355 | * { u64 time; } && PERF_SAMPLE_TIME |
360 | 356 | * { u64 addr; } && PERF_SAMPLE_ADDR |
361 | - * { u64 config; } && PERF_SAMPLE_CONFIG | |
357 | + * { u64 id; } && PERF_SAMPLE_ID | |
362 | 358 | * { u32 cpu, res; } && PERF_SAMPLE_CPU |
359 | + * { u64 period; } && PERF_SAMPLE_PERIOD | |
363 | 360 | * |
364 | 361 | * { u64 nr; |
365 | 362 | * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP |
... | ... | @@ -368,6 +365,9 @@ |
368 | 365 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN |
369 | 366 | * }; |
370 | 367 | */ |
368 | + PERF_EVENT_SAMPLE = 9, | |
369 | + | |
370 | + PERF_EVENT_MAX, /* non-ABI */ | |
371 | 371 | }; |
372 | 372 | |
373 | 373 | enum perf_callchain_context { |
kernel/perf_counter.c
... | ... | @@ -2575,15 +2575,14 @@ |
2575 | 2575 | u32 cpu, reserved; |
2576 | 2576 | } cpu_entry; |
2577 | 2577 | |
2578 | - header.type = 0; | |
2578 | + header.type = PERF_EVENT_SAMPLE; | |
2579 | 2579 | header.size = sizeof(header); |
2580 | 2580 | |
2581 | - header.misc = PERF_EVENT_MISC_OVERFLOW; | |
2581 | + header.misc = 0; | |
2582 | 2582 | header.misc |= perf_misc_flags(data->regs); |
2583 | 2583 | |
2584 | 2584 | if (sample_type & PERF_SAMPLE_IP) { |
2585 | 2585 | ip = perf_instruction_pointer(data->regs); |
2586 | - header.type |= PERF_SAMPLE_IP; | |
2587 | 2586 | header.size += sizeof(ip); |
2588 | 2587 | } |
2589 | 2588 | |
... | ... | @@ -2592,7 +2591,6 @@ |
2592 | 2591 | tid_entry.pid = perf_counter_pid(counter, current); |
2593 | 2592 | tid_entry.tid = perf_counter_tid(counter, current); |
2594 | 2593 | |
2595 | - header.type |= PERF_SAMPLE_TID; | |
2596 | 2594 | header.size += sizeof(tid_entry); |
2597 | 2595 | } |
2598 | 2596 | |
2599 | 2597 | |
2600 | 2598 | |
2601 | 2599 | |
2602 | 2600 | |
2603 | 2601 | |
2604 | 2602 | |
2605 | 2603 | |
2606 | 2604 | |
... | ... | @@ -2602,34 +2600,25 @@ |
2602 | 2600 | */ |
2603 | 2601 | time = sched_clock(); |
2604 | 2602 | |
2605 | - header.type |= PERF_SAMPLE_TIME; | |
2606 | 2603 | header.size += sizeof(u64); |
2607 | 2604 | } |
2608 | 2605 | |
2609 | - if (sample_type & PERF_SAMPLE_ADDR) { | |
2610 | - header.type |= PERF_SAMPLE_ADDR; | |
2606 | + if (sample_type & PERF_SAMPLE_ADDR) | |
2611 | 2607 | header.size += sizeof(u64); |
2612 | - } | |
2613 | 2608 | |
2614 | - if (sample_type & PERF_SAMPLE_ID) { | |
2615 | - header.type |= PERF_SAMPLE_ID; | |
2609 | + if (sample_type & PERF_SAMPLE_ID) | |
2616 | 2610 | header.size += sizeof(u64); |
2617 | - } | |
2618 | 2611 | |
2619 | 2612 | if (sample_type & PERF_SAMPLE_CPU) { |
2620 | - header.type |= PERF_SAMPLE_CPU; | |
2621 | 2613 | header.size += sizeof(cpu_entry); |
2622 | 2614 | |
2623 | 2615 | cpu_entry.cpu = raw_smp_processor_id(); |
2624 | 2616 | } |
2625 | 2617 | |
2626 | - if (sample_type & PERF_SAMPLE_PERIOD) { | |
2627 | - header.type |= PERF_SAMPLE_PERIOD; | |
2618 | + if (sample_type & PERF_SAMPLE_PERIOD) | |
2628 | 2619 | header.size += sizeof(u64); |
2629 | - } | |
2630 | 2620 | |
2631 | 2621 | if (sample_type & PERF_SAMPLE_GROUP) { |
2632 | - header.type |= PERF_SAMPLE_GROUP; | |
2633 | 2622 | header.size += sizeof(u64) + |
2634 | 2623 | counter->nr_siblings * sizeof(group_entry); |
2635 | 2624 | } |
2636 | 2625 | |
... | ... | @@ -2639,10 +2628,9 @@ |
2639 | 2628 | |
2640 | 2629 | if (callchain) { |
2641 | 2630 | callchain_size = (1 + callchain->nr) * sizeof(u64); |
2642 | - | |
2643 | - header.type |= PERF_SAMPLE_CALLCHAIN; | |
2644 | 2631 | header.size += callchain_size; |
2645 | - } | |
2632 | + } else | |
2633 | + header.size += sizeof(u64); | |
2646 | 2634 | } |
2647 | 2635 | |
2648 | 2636 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); |
... | ... | @@ -2693,8 +2681,14 @@ |
2693 | 2681 | } |
2694 | 2682 | } |
2695 | 2683 | |
2696 | - if (callchain) | |
2697 | - perf_output_copy(&handle, callchain, callchain_size); | |
2684 | + if (sample_type & PERF_SAMPLE_CALLCHAIN) { | |
2685 | + if (callchain) | |
2686 | + perf_output_copy(&handle, callchain, callchain_size); | |
2687 | + else { | |
2688 | + u64 nr = 0; | |
2689 | + perf_output_put(&handle, nr); | |
2690 | + } | |
2691 | + } | |
2698 | 2692 | |
2699 | 2693 | perf_output_end(&handle); |
2700 | 2694 | } |
tools/perf/builtin-annotate.c
... | ... | @@ -855,7 +855,7 @@ |
855 | 855 | total_unknown = 0; |
856 | 856 | |
857 | 857 | static int |
858 | -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |
858 | +process_sample_event(event_t *event, unsigned long offset, unsigned long head) | |
859 | 859 | { |
860 | 860 | char level; |
861 | 861 | int show = 0; |
862 | 862 | |
... | ... | @@ -1013,10 +1013,10 @@ |
1013 | 1013 | static int |
1014 | 1014 | process_event(event_t *event, unsigned long offset, unsigned long head) |
1015 | 1015 | { |
1016 | - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) | |
1017 | - return process_overflow_event(event, offset, head); | |
1018 | - | |
1019 | 1016 | switch (event->header.type) { |
1017 | + case PERF_EVENT_SAMPLE: | |
1018 | + return process_sample_event(event, offset, head); | |
1019 | + | |
1020 | 1020 | case PERF_EVENT_MMAP: |
1021 | 1021 | return process_mmap_event(event, offset, head); |
1022 | 1022 |
tools/perf/builtin-report.c
... | ... | @@ -53,6 +53,8 @@ |
53 | 53 | |
54 | 54 | static int exclude_other = 1; |
55 | 55 | |
56 | +static u64 sample_type; | |
57 | + | |
56 | 58 | struct ip_event { |
57 | 59 | struct perf_event_header header; |
58 | 60 | u64 ip; |
... | ... | @@ -1135,7 +1137,7 @@ |
1135 | 1137 | } |
1136 | 1138 | |
1137 | 1139 | static int |
1138 | -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |
1140 | +process_sample_event(event_t *event, unsigned long offset, unsigned long head) | |
1139 | 1141 | { |
1140 | 1142 | char level; |
1141 | 1143 | int show = 0; |
1142 | 1144 | |
... | ... | @@ -1147,12 +1149,12 @@ |
1147 | 1149 | void *more_data = event->ip.__more_data; |
1148 | 1150 | struct ip_callchain *chain = NULL; |
1149 | 1151 | |
1150 | - if (event->header.type & PERF_SAMPLE_PERIOD) { | |
1152 | + if (sample_type & PERF_SAMPLE_PERIOD) { | |
1151 | 1153 | period = *(u64 *)more_data; |
1152 | 1154 | more_data += sizeof(u64); |
1153 | 1155 | } |
1154 | 1156 | |
1155 | - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", | |
1157 | + dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", | |
1156 | 1158 | (void *)(offset + head), |
1157 | 1159 | (void *)(long)(event->header.size), |
1158 | 1160 | event->header.misc, |
... | ... | @@ -1160,7 +1162,7 @@ |
1160 | 1162 | (void *)(long)ip, |
1161 | 1163 | (long long)period); |
1162 | 1164 | |
1163 | - if (event->header.type & PERF_SAMPLE_CALLCHAIN) { | |
1165 | + if (sample_type & PERF_SAMPLE_CALLCHAIN) { | |
1164 | 1166 | int i; |
1165 | 1167 | |
1166 | 1168 | chain = (void *)more_data; |
1167 | 1169 | |
... | ... | @@ -1352,10 +1354,10 @@ |
1352 | 1354 | { |
1353 | 1355 | trace_event(event); |
1354 | 1356 | |
1355 | - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) | |
1356 | - return process_overflow_event(event, offset, head); | |
1357 | - | |
1358 | 1357 | switch (event->header.type) { |
1358 | + case PERF_EVENT_SAMPLE: | |
1359 | + return process_sample_event(event, offset, head); | |
1360 | + | |
1359 | 1361 | case PERF_EVENT_MMAP: |
1360 | 1362 | return process_mmap_event(event, offset, head); |
1361 | 1363 | |
1362 | 1364 | |
1363 | 1365 | |
1364 | 1366 | |
... | ... | @@ -1388,18 +1390,21 @@ |
1388 | 1390 | |
1389 | 1391 | static struct perf_header *header; |
1390 | 1392 | |
1391 | -static int perf_header__has_sample(u64 sample_mask) | |
1393 | +static u64 perf_header__sample_type(void) | |
1392 | 1394 | { |
1395 | + u64 sample_type = 0; | |
1393 | 1396 | int i; |
1394 | 1397 | |
1395 | 1398 | for (i = 0; i < header->attrs; i++) { |
1396 | 1399 | struct perf_header_attr *attr = header->attr[i]; |
1397 | 1400 | |
1398 | - if (!(attr->attr.sample_type & sample_mask)) | |
1399 | - return 0; | |
1401 | + if (!sample_type) | |
1402 | + sample_type = attr->attr.sample_type; | |
1403 | + else if (sample_type != attr->attr.sample_type) | |
1404 | + die("non matching sample_type"); | |
1400 | 1405 | } |
1401 | 1406 | |
1402 | - return 1; | |
1407 | + return sample_type; | |
1403 | 1408 | } |
1404 | 1409 | |
1405 | 1410 | static int __cmd_report(void) |
... | ... | @@ -1437,8 +1442,9 @@ |
1437 | 1442 | header = perf_header__read(input); |
1438 | 1443 | head = header->data_offset; |
1439 | 1444 | |
1440 | - if (sort__has_parent && | |
1441 | - !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) { | |
1445 | + sample_type = perf_header__sample_type(); | |
1446 | + | |
1447 | + if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { | |
1442 | 1448 | fprintf(stderr, "selected --sort parent, but no callchain data\n"); |
1443 | 1449 | exit(-1); |
1444 | 1450 | } |
tools/perf/builtin-top.c
... | ... | @@ -392,11 +392,11 @@ |
392 | 392 | samples--; |
393 | 393 | } |
394 | 394 | |
395 | -static void process_event(u64 ip, int counter) | |
395 | +static void process_event(u64 ip, int counter, int user) | |
396 | 396 | { |
397 | 397 | samples++; |
398 | 398 | |
399 | - if (ip < min_ip || ip > max_ip) { | |
399 | + if (user) { | |
400 | 400 | userspace_samples++; |
401 | 401 | return; |
402 | 402 | } |
... | ... | @@ -509,9 +509,10 @@ |
509 | 509 | |
510 | 510 | old += size; |
511 | 511 | |
512 | - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { | |
513 | - if (event->header.type & PERF_SAMPLE_IP) | |
514 | - process_event(event->ip.ip, md->counter); | |
512 | + if (event->header.type == PERF_EVENT_SAMPLE) { | |
513 | + int user = | |
514 | + (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER; | |
515 | + process_event(event->ip.ip, md->counter, user); | |
515 | 516 | } |
516 | 517 | } |
517 | 518 |