Commit c3305257cd4df63e03e21e331a0140ae9c0faccc

Authored by Ingo Molnar
1 parent 2cba3ffb9a

perf stat: Add more cache-miss percentage printouts

Print out the cache-miss percentage as well if the cache refs were
collected, for all the generic cache event types.

Before:

   11,103,723,230 dTLB-loads                #  622.471 M/sec                    ( +-  0.30% )
       87,065,337 dTLB-load-misses          #    4.881 M/sec                    ( +-  0.90% )

After:

   11,353,713,242 dTLB-loads                #  626.020 M/sec                    ( +-  0.35% )
      113,393,472 dTLB-load-misses          #    1.00% of all dTLB cache hits   ( +-  0.49% )

Also ASCII color highlight too high percentages, them when it's executed on the console.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-lkhwxsevdbd9a8nymx0vxc3y@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 1 changed file with 136 additions and 2 deletions Side-by-side Diff

tools/perf/builtin-stat.c
... ... @@ -261,6 +261,10 @@
261 261 struct stats runtime_branches_stats[MAX_NR_CPUS];
262 262 struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
263 263 struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
  264 +struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
  265 +struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
  266 +struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
  267 +struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
264 268 struct stats walltime_nsecs_stats;
265 269  
266 270 static int create_perf_stat_counter(struct perf_evsel *evsel)
... ... @@ -317,6 +321,14 @@
317 321 update_stats(&runtime_cacherefs_stats[0], count[0]);
318 322 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
319 323 update_stats(&runtime_l1_dcache_stats[0], count[0]);
  324 + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
  325 + update_stats(&runtime_l1_icache_stats[0], count[0]);
  326 + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
  327 + update_stats(&runtime_ll_cache_stats[0], count[0]);
  328 + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
  329 + update_stats(&runtime_dtlb_cache_stats[0], count[0]);
  330 + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
  331 + update_stats(&runtime_itlb_cache_stats[0], count[0]);
320 332 }
321 333  
322 334 /*
... ... @@ -630,6 +642,98 @@
630 642 fprintf(stderr, " of all L1-dcache hits ");
631 643 }
632 644  
  645 +static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
  646 +{
  647 + double total, ratio = 0.0;
  648 + const char *color;
  649 +
  650 + total = avg_stats(&runtime_l1_icache_stats[cpu]);
  651 +
  652 + if (total)
  653 + ratio = avg / total * 100.0;
  654 +
  655 + color = PERF_COLOR_NORMAL;
  656 + if (ratio > 20.0)
  657 + color = PERF_COLOR_RED;
  658 + else if (ratio > 10.0)
  659 + color = PERF_COLOR_MAGENTA;
  660 + else if (ratio > 5.0)
  661 + color = PERF_COLOR_YELLOW;
  662 +
  663 + fprintf(stderr, " # ");
  664 + color_fprintf(stderr, color, "%6.2f%%", ratio);
  665 + fprintf(stderr, " of all L1-icache hits ");
  666 +}
  667 +
  668 +static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
  669 +{
  670 + double total, ratio = 0.0;
  671 + const char *color;
  672 +
  673 + total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
  674 +
  675 + if (total)
  676 + ratio = avg / total * 100.0;
  677 +
  678 + color = PERF_COLOR_NORMAL;
  679 + if (ratio > 20.0)
  680 + color = PERF_COLOR_RED;
  681 + else if (ratio > 10.0)
  682 + color = PERF_COLOR_MAGENTA;
  683 + else if (ratio > 5.0)
  684 + color = PERF_COLOR_YELLOW;
  685 +
  686 + fprintf(stderr, " # ");
  687 + color_fprintf(stderr, color, "%6.2f%%", ratio);
  688 + fprintf(stderr, " of all dTLB cache hits ");
  689 +}
  690 +
  691 +static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
  692 +{
  693 + double total, ratio = 0.0;
  694 + const char *color;
  695 +
  696 + total = avg_stats(&runtime_itlb_cache_stats[cpu]);
  697 +
  698 + if (total)
  699 + ratio = avg / total * 100.0;
  700 +
  701 + color = PERF_COLOR_NORMAL;
  702 + if (ratio > 20.0)
  703 + color = PERF_COLOR_RED;
  704 + else if (ratio > 10.0)
  705 + color = PERF_COLOR_MAGENTA;
  706 + else if (ratio > 5.0)
  707 + color = PERF_COLOR_YELLOW;
  708 +
  709 + fprintf(stderr, " # ");
  710 + color_fprintf(stderr, color, "%6.2f%%", ratio);
  711 + fprintf(stderr, " of all iTLB cache hits ");
  712 +}
  713 +
  714 +static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
  715 +{
  716 + double total, ratio = 0.0;
  717 + const char *color;
  718 +
  719 + total = avg_stats(&runtime_ll_cache_stats[cpu]);
  720 +
  721 + if (total)
  722 + ratio = avg / total * 100.0;
  723 +
  724 + color = PERF_COLOR_NORMAL;
  725 + if (ratio > 20.0)
  726 + color = PERF_COLOR_RED;
  727 + else if (ratio > 10.0)
  728 + color = PERF_COLOR_MAGENTA;
  729 + else if (ratio > 5.0)
  730 + color = PERF_COLOR_YELLOW;
  731 +
  732 + fprintf(stderr, " # ");
  733 + color_fprintf(stderr, color, "%6.2f%%", ratio);
  734 + fprintf(stderr, " of all LL-cache hits ");
  735 +}
  736 +
633 737 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
634 738 {
635 739 double total, ratio = 0.0;
... ... @@ -684,6 +788,34 @@
684 788 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
685 789 runtime_l1_dcache_stats[cpu].n != 0) {
686 790 print_l1_dcache_misses(cpu, evsel, avg);
  791 + } else if (
  792 + evsel->attr.type == PERF_TYPE_HW_CACHE &&
  793 + evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
  794 + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  795 + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
  796 + runtime_l1_icache_stats[cpu].n != 0) {
  797 + print_l1_icache_misses(cpu, evsel, avg);
  798 + } else if (
  799 + evsel->attr.type == PERF_TYPE_HW_CACHE &&
  800 + evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
  801 + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  802 + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
  803 + runtime_dtlb_cache_stats[cpu].n != 0) {
  804 + print_dtlb_cache_misses(cpu, evsel, avg);
  805 + } else if (
  806 + evsel->attr.type == PERF_TYPE_HW_CACHE &&
  807 + evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
  808 + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  809 + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
  810 + runtime_itlb_cache_stats[cpu].n != 0) {
  811 + print_itlb_cache_misses(cpu, evsel, avg);
  812 + } else if (
  813 + evsel->attr.type == PERF_TYPE_HW_CACHE &&
  814 + evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
  815 + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  816 + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
  817 + runtime_ll_cache_stats[cpu].n != 0) {
  818 + print_ll_cache_misses(cpu, evsel, avg);
687 819 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
688 820 runtime_cacherefs_stats[cpu].n != 0) {
689 821 total = avg_stats(&runtime_cacherefs_stats[cpu]);
690 822  
... ... @@ -842,10 +974,12 @@
842 974 }
843 975  
844 976 if (!csv_output) {
845   - fprintf(stderr, "\n");
846   - fprintf(stderr, " %18.9f seconds time elapsed",
  977 + if (!null_run)
  978 + fprintf(stderr, "\n");
  979 + fprintf(stderr, " %17.9f seconds time elapsed",
847 980 avg_stats(&walltime_nsecs_stats)/1e9);
848 981 if (run_count > 1) {
  982 + fprintf(stderr, " ");
849 983 print_noise_pct(stddev_stats(&walltime_nsecs_stats),
850 984 avg_stats(&walltime_nsecs_stats));
851 985 }