Commit e6e18ec79b023d5fe84226cef533cf0e3770ce93

Authored by Peter Zijlstra
Committed by Ingo Molnar
1 parent bfbd3381e6

perf_counter: Rework the sample ABI

The PERF_EVENT_READ implementation made me realize we don't
actually need the sample_type int the output sample, since
we already have that in the perf_counter_attr information.

Therefore, remove the PERF_EVENT_MISC_OVERFLOW bit and the
event->type overloading, and imply put counter overflow
samples in a PERF_EVENT_SAMPLE type.

This also fixes the issue that event->type was only 32-bit
and sample_type had 64 usable bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 5 changed files with 49 additions and 48 deletions Side-by-side Diff

include/linux/perf_counter.h
... ... @@ -262,7 +262,6 @@
262 262 #define PERF_EVENT_MISC_KERNEL (1 << 0)
263 263 #define PERF_EVENT_MISC_USER (2 << 0)
264 264 #define PERF_EVENT_MISC_HYPERVISOR (3 << 0)
265   -#define PERF_EVENT_MISC_OVERFLOW (1 << 2)
266 265  
267 266 struct perf_event_header {
268 267 __u32 type;
... ... @@ -348,9 +347,6 @@
348 347 PERF_EVENT_READ = 8,
349 348  
350 349 /*
351   - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
352   - * will be PERF_SAMPLE_*
353   - *
354 350 * struct {
355 351 * struct perf_event_header header;
356 352 *
357 353  
... ... @@ -358,8 +354,9 @@
358 354 * { u32 pid, tid; } && PERF_SAMPLE_TID
359 355 * { u64 time; } && PERF_SAMPLE_TIME
360 356 * { u64 addr; } && PERF_SAMPLE_ADDR
361   - * { u64 config; } && PERF_SAMPLE_CONFIG
  357 + * { u64 id; } && PERF_SAMPLE_ID
362 358 * { u32 cpu, res; } && PERF_SAMPLE_CPU
  359 + * { u64 period; } && PERF_SAMPLE_PERIOD
363 360 *
364 361 * { u64 nr;
365 362 * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
... ... @@ -368,6 +365,9 @@
368 365 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
369 366 * };
370 367 */
  368 + PERF_EVENT_SAMPLE = 9,
  369 +
  370 + PERF_EVENT_MAX, /* non-ABI */
371 371 };
372 372  
373 373 enum perf_callchain_context {
kernel/perf_counter.c
... ... @@ -2575,15 +2575,14 @@
2575 2575 u32 cpu, reserved;
2576 2576 } cpu_entry;
2577 2577  
2578   - header.type = 0;
  2578 + header.type = PERF_EVENT_SAMPLE;
2579 2579 header.size = sizeof(header);
2580 2580  
2581   - header.misc = PERF_EVENT_MISC_OVERFLOW;
  2581 + header.misc = 0;
2582 2582 header.misc |= perf_misc_flags(data->regs);
2583 2583  
2584 2584 if (sample_type & PERF_SAMPLE_IP) {
2585 2585 ip = perf_instruction_pointer(data->regs);
2586   - header.type |= PERF_SAMPLE_IP;
2587 2586 header.size += sizeof(ip);
2588 2587 }
2589 2588  
... ... @@ -2592,7 +2591,6 @@
2592 2591 tid_entry.pid = perf_counter_pid(counter, current);
2593 2592 tid_entry.tid = perf_counter_tid(counter, current);
2594 2593  
2595   - header.type |= PERF_SAMPLE_TID;
2596 2594 header.size += sizeof(tid_entry);
2597 2595 }
2598 2596  
2599 2597  
2600 2598  
2601 2599  
2602 2600  
2603 2601  
2604 2602  
2605 2603  
2606 2604  
... ... @@ -2602,34 +2600,25 @@
2602 2600 */
2603 2601 time = sched_clock();
2604 2602  
2605   - header.type |= PERF_SAMPLE_TIME;
2606 2603 header.size += sizeof(u64);
2607 2604 }
2608 2605  
2609   - if (sample_type & PERF_SAMPLE_ADDR) {
2610   - header.type |= PERF_SAMPLE_ADDR;
  2606 + if (sample_type & PERF_SAMPLE_ADDR)
2611 2607 header.size += sizeof(u64);
2612   - }
2613 2608  
2614   - if (sample_type & PERF_SAMPLE_ID) {
2615   - header.type |= PERF_SAMPLE_ID;
  2609 + if (sample_type & PERF_SAMPLE_ID)
2616 2610 header.size += sizeof(u64);
2617   - }
2618 2611  
2619 2612 if (sample_type & PERF_SAMPLE_CPU) {
2620   - header.type |= PERF_SAMPLE_CPU;
2621 2613 header.size += sizeof(cpu_entry);
2622 2614  
2623 2615 cpu_entry.cpu = raw_smp_processor_id();
2624 2616 }
2625 2617  
2626   - if (sample_type & PERF_SAMPLE_PERIOD) {
2627   - header.type |= PERF_SAMPLE_PERIOD;
  2618 + if (sample_type & PERF_SAMPLE_PERIOD)
2628 2619 header.size += sizeof(u64);
2629   - }
2630 2620  
2631 2621 if (sample_type & PERF_SAMPLE_GROUP) {
2632   - header.type |= PERF_SAMPLE_GROUP;
2633 2622 header.size += sizeof(u64) +
2634 2623 counter->nr_siblings * sizeof(group_entry);
2635 2624 }
2636 2625  
... ... @@ -2639,10 +2628,9 @@
2639 2628  
2640 2629 if (callchain) {
2641 2630 callchain_size = (1 + callchain->nr) * sizeof(u64);
2642   -
2643   - header.type |= PERF_SAMPLE_CALLCHAIN;
2644 2631 header.size += callchain_size;
2645   - }
  2632 + } else
  2633 + header.size += sizeof(u64);
2646 2634 }
2647 2635  
2648 2636 ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
... ... @@ -2693,8 +2681,14 @@
2693 2681 }
2694 2682 }
2695 2683  
2696   - if (callchain)
2697   - perf_output_copy(&handle, callchain, callchain_size);
  2684 + if (sample_type & PERF_SAMPLE_CALLCHAIN) {
  2685 + if (callchain)
  2686 + perf_output_copy(&handle, callchain, callchain_size);
  2687 + else {
  2688 + u64 nr = 0;
  2689 + perf_output_put(&handle, nr);
  2690 + }
  2691 + }
2698 2692  
2699 2693 perf_output_end(&handle);
2700 2694 }
tools/perf/builtin-annotate.c
... ... @@ -855,7 +855,7 @@
855 855 total_unknown = 0;
856 856  
857 857 static int
858   -process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
  858 +process_sample_event(event_t *event, unsigned long offset, unsigned long head)
859 859 {
860 860 char level;
861 861 int show = 0;
862 862  
... ... @@ -1013,10 +1013,10 @@
1013 1013 static int
1014 1014 process_event(event_t *event, unsigned long offset, unsigned long head)
1015 1015 {
1016   - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
1017   - return process_overflow_event(event, offset, head);
1018   -
1019 1016 switch (event->header.type) {
  1017 + case PERF_EVENT_SAMPLE:
  1018 + return process_sample_event(event, offset, head);
  1019 +
1020 1020 case PERF_EVENT_MMAP:
1021 1021 return process_mmap_event(event, offset, head);
1022 1022  
tools/perf/builtin-report.c
... ... @@ -53,6 +53,8 @@
53 53  
54 54 static int exclude_other = 1;
55 55  
  56 +static u64 sample_type;
  57 +
56 58 struct ip_event {
57 59 struct perf_event_header header;
58 60 u64 ip;
... ... @@ -1135,7 +1137,7 @@
1135 1137 }
1136 1138  
1137 1139 static int
1138   -process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
  1140 +process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1139 1141 {
1140 1142 char level;
1141 1143 int show = 0;
1142 1144  
... ... @@ -1147,12 +1149,12 @@
1147 1149 void *more_data = event->ip.__more_data;
1148 1150 struct ip_callchain *chain = NULL;
1149 1151  
1150   - if (event->header.type & PERF_SAMPLE_PERIOD) {
  1152 + if (sample_type & PERF_SAMPLE_PERIOD) {
1151 1153 period = *(u64 *)more_data;
1152 1154 more_data += sizeof(u64);
1153 1155 }
1154 1156  
1155   - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n",
  1157 + dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n",
1156 1158 (void *)(offset + head),
1157 1159 (void *)(long)(event->header.size),
1158 1160 event->header.misc,
... ... @@ -1160,7 +1162,7 @@
1160 1162 (void *)(long)ip,
1161 1163 (long long)period);
1162 1164  
1163   - if (event->header.type & PERF_SAMPLE_CALLCHAIN) {
  1165 + if (sample_type & PERF_SAMPLE_CALLCHAIN) {
1164 1166 int i;
1165 1167  
1166 1168 chain = (void *)more_data;
1167 1169  
... ... @@ -1352,10 +1354,10 @@
1352 1354 {
1353 1355 trace_event(event);
1354 1356  
1355   - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
1356   - return process_overflow_event(event, offset, head);
1357   -
1358 1357 switch (event->header.type) {
  1358 + case PERF_EVENT_SAMPLE:
  1359 + return process_sample_event(event, offset, head);
  1360 +
1359 1361 case PERF_EVENT_MMAP:
1360 1362 return process_mmap_event(event, offset, head);
1361 1363  
1362 1364  
1363 1365  
1364 1366  
... ... @@ -1388,18 +1390,21 @@
1388 1390  
1389 1391 static struct perf_header *header;
1390 1392  
1391   -static int perf_header__has_sample(u64 sample_mask)
  1393 +static u64 perf_header__sample_type(void)
1392 1394 {
  1395 + u64 sample_type = 0;
1393 1396 int i;
1394 1397  
1395 1398 for (i = 0; i < header->attrs; i++) {
1396 1399 struct perf_header_attr *attr = header->attr[i];
1397 1400  
1398   - if (!(attr->attr.sample_type & sample_mask))
1399   - return 0;
  1401 + if (!sample_type)
  1402 + sample_type = attr->attr.sample_type;
  1403 + else if (sample_type != attr->attr.sample_type)
  1404 + die("non matching sample_type");
1400 1405 }
1401 1406  
1402   - return 1;
  1407 + return sample_type;
1403 1408 }
1404 1409  
1405 1410 static int __cmd_report(void)
... ... @@ -1437,8 +1442,9 @@
1437 1442 header = perf_header__read(input);
1438 1443 head = header->data_offset;
1439 1444  
1440   - if (sort__has_parent &&
1441   - !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) {
  1445 + sample_type = perf_header__sample_type();
  1446 +
  1447 + if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
1442 1448 fprintf(stderr, "selected --sort parent, but no callchain data\n");
1443 1449 exit(-1);
1444 1450 }
tools/perf/builtin-top.c
... ... @@ -392,11 +392,11 @@
392 392 samples--;
393 393 }
394 394  
395   -static void process_event(u64 ip, int counter)
  395 +static void process_event(u64 ip, int counter, int user)
396 396 {
397 397 samples++;
398 398  
399   - if (ip < min_ip || ip > max_ip) {
  399 + if (user) {
400 400 userspace_samples++;
401 401 return;
402 402 }
... ... @@ -509,9 +509,10 @@
509 509  
510 510 old += size;
511 511  
512   - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
513   - if (event->header.type & PERF_SAMPLE_IP)
514   - process_event(event->ip.ip, md->counter);
  512 + if (event->header.type == PERF_EVENT_SAMPLE) {
  513 + int user =
  514 + (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER;
  515 + process_event(event->ip.ip, md->counter, user);
515 516 }
516 517 }
517 518