Commit c3305257cd4df63e03e21e331a0140ae9c0faccc
1 parent
2cba3ffb9a
Exists in
master
and in
20 other branches
perf stat: Add more cache-miss percentage printouts
Print out the cache-miss percentage as well if the cache refs were collected, for all the generic cache event types. Before: 11,103,723,230 dTLB-loads # 622.471 M/sec ( +- 0.30% ) 87,065,337 dTLB-load-misses # 4.881 M/sec ( +- 0.90% ) After: 11,353,713,242 dTLB-loads # 626.020 M/sec ( +- 0.35% ) 113,393,472 dTLB-load-misses # 1.00% of all dTLB cache hits ( +- 0.49% ) Also ASCII color highlight too high percentages, them when it's executed on the console. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/n/tip-lkhwxsevdbd9a8nymx0vxc3y@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 1 changed file with 136 additions and 2 deletions Side-by-side Diff
tools/perf/builtin-stat.c
... | ... | @@ -261,6 +261,10 @@ |
261 | 261 | struct stats runtime_branches_stats[MAX_NR_CPUS]; |
262 | 262 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; |
263 | 263 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; |
264 | +struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; | |
265 | +struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; | |
266 | +struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; | |
267 | +struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; | |
264 | 268 | struct stats walltime_nsecs_stats; |
265 | 269 | |
266 | 270 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
... | ... | @@ -317,6 +321,14 @@ |
317 | 321 | update_stats(&runtime_cacherefs_stats[0], count[0]); |
318 | 322 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) |
319 | 323 | update_stats(&runtime_l1_dcache_stats[0], count[0]); |
324 | + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | |
325 | + update_stats(&runtime_l1_icache_stats[0], count[0]); | |
326 | + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | |
327 | + update_stats(&runtime_ll_cache_stats[0], count[0]); | |
328 | + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | |
329 | + update_stats(&runtime_dtlb_cache_stats[0], count[0]); | |
330 | + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | |
331 | + update_stats(&runtime_itlb_cache_stats[0], count[0]); | |
320 | 332 | } |
321 | 333 | |
322 | 334 | /* |
... | ... | @@ -630,6 +642,98 @@ |
630 | 642 | fprintf(stderr, " of all L1-dcache hits "); |
631 | 643 | } |
632 | 644 | |
645 | +static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | |
646 | +{ | |
647 | + double total, ratio = 0.0; | |
648 | + const char *color; | |
649 | + | |
650 | + total = avg_stats(&runtime_l1_icache_stats[cpu]); | |
651 | + | |
652 | + if (total) | |
653 | + ratio = avg / total * 100.0; | |
654 | + | |
655 | + color = PERF_COLOR_NORMAL; | |
656 | + if (ratio > 20.0) | |
657 | + color = PERF_COLOR_RED; | |
658 | + else if (ratio > 10.0) | |
659 | + color = PERF_COLOR_MAGENTA; | |
660 | + else if (ratio > 5.0) | |
661 | + color = PERF_COLOR_YELLOW; | |
662 | + | |
663 | + fprintf(stderr, " # "); | |
664 | + color_fprintf(stderr, color, "%6.2f%%", ratio); | |
665 | + fprintf(stderr, " of all L1-icache hits "); | |
666 | +} | |
667 | + | |
668 | +static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | |
669 | +{ | |
670 | + double total, ratio = 0.0; | |
671 | + const char *color; | |
672 | + | |
673 | + total = avg_stats(&runtime_dtlb_cache_stats[cpu]); | |
674 | + | |
675 | + if (total) | |
676 | + ratio = avg / total * 100.0; | |
677 | + | |
678 | + color = PERF_COLOR_NORMAL; | |
679 | + if (ratio > 20.0) | |
680 | + color = PERF_COLOR_RED; | |
681 | + else if (ratio > 10.0) | |
682 | + color = PERF_COLOR_MAGENTA; | |
683 | + else if (ratio > 5.0) | |
684 | + color = PERF_COLOR_YELLOW; | |
685 | + | |
686 | + fprintf(stderr, " # "); | |
687 | + color_fprintf(stderr, color, "%6.2f%%", ratio); | |
688 | + fprintf(stderr, " of all dTLB cache hits "); | |
689 | +} | |
690 | + | |
691 | +static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | |
692 | +{ | |
693 | + double total, ratio = 0.0; | |
694 | + const char *color; | |
695 | + | |
696 | + total = avg_stats(&runtime_itlb_cache_stats[cpu]); | |
697 | + | |
698 | + if (total) | |
699 | + ratio = avg / total * 100.0; | |
700 | + | |
701 | + color = PERF_COLOR_NORMAL; | |
702 | + if (ratio > 20.0) | |
703 | + color = PERF_COLOR_RED; | |
704 | + else if (ratio > 10.0) | |
705 | + color = PERF_COLOR_MAGENTA; | |
706 | + else if (ratio > 5.0) | |
707 | + color = PERF_COLOR_YELLOW; | |
708 | + | |
709 | + fprintf(stderr, " # "); | |
710 | + color_fprintf(stderr, color, "%6.2f%%", ratio); | |
711 | + fprintf(stderr, " of all iTLB cache hits "); | |
712 | +} | |
713 | + | |
714 | +static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | |
715 | +{ | |
716 | + double total, ratio = 0.0; | |
717 | + const char *color; | |
718 | + | |
719 | + total = avg_stats(&runtime_ll_cache_stats[cpu]); | |
720 | + | |
721 | + if (total) | |
722 | + ratio = avg / total * 100.0; | |
723 | + | |
724 | + color = PERF_COLOR_NORMAL; | |
725 | + if (ratio > 20.0) | |
726 | + color = PERF_COLOR_RED; | |
727 | + else if (ratio > 10.0) | |
728 | + color = PERF_COLOR_MAGENTA; | |
729 | + else if (ratio > 5.0) | |
730 | + color = PERF_COLOR_YELLOW; | |
731 | + | |
732 | + fprintf(stderr, " # "); | |
733 | + color_fprintf(stderr, color, "%6.2f%%", ratio); | |
734 | + fprintf(stderr, " of all LL-cache hits "); | |
735 | +} | |
736 | + | |
633 | 737 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) |
634 | 738 | { |
635 | 739 | double total, ratio = 0.0; |
... | ... | @@ -684,6 +788,34 @@ |
684 | 788 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && |
685 | 789 | runtime_l1_dcache_stats[cpu].n != 0) { |
686 | 790 | print_l1_dcache_misses(cpu, evsel, avg); |
791 | + } else if ( | |
792 | + evsel->attr.type == PERF_TYPE_HW_CACHE && | |
793 | + evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | |
794 | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
795 | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | |
796 | + runtime_l1_icache_stats[cpu].n != 0) { | |
797 | + print_l1_icache_misses(cpu, evsel, avg); | |
798 | + } else if ( | |
799 | + evsel->attr.type == PERF_TYPE_HW_CACHE && | |
800 | + evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | |
801 | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
802 | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | |
803 | + runtime_dtlb_cache_stats[cpu].n != 0) { | |
804 | + print_dtlb_cache_misses(cpu, evsel, avg); | |
805 | + } else if ( | |
806 | + evsel->attr.type == PERF_TYPE_HW_CACHE && | |
807 | + evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | |
808 | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
809 | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | |
810 | + runtime_itlb_cache_stats[cpu].n != 0) { | |
811 | + print_itlb_cache_misses(cpu, evsel, avg); | |
812 | + } else if ( | |
813 | + evsel->attr.type == PERF_TYPE_HW_CACHE && | |
814 | + evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | |
815 | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
816 | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | |
817 | + runtime_ll_cache_stats[cpu].n != 0) { | |
818 | + print_ll_cache_misses(cpu, evsel, avg); | |
687 | 819 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && |
688 | 820 | runtime_cacherefs_stats[cpu].n != 0) { |
689 | 821 | total = avg_stats(&runtime_cacherefs_stats[cpu]); |
690 | 822 | |
... | ... | @@ -842,10 +974,12 @@ |
842 | 974 | } |
843 | 975 | |
844 | 976 | if (!csv_output) { |
845 | - fprintf(stderr, "\n"); | |
846 | - fprintf(stderr, " %18.9f seconds time elapsed", | |
977 | + if (!null_run) | |
978 | + fprintf(stderr, "\n"); | |
979 | + fprintf(stderr, " %17.9f seconds time elapsed", | |
847 | 980 | avg_stats(&walltime_nsecs_stats)/1e9); |
848 | 981 | if (run_count > 1) { |
982 | + fprintf(stderr, " "); | |
849 | 983 | print_noise_pct(stddev_stats(&walltime_nsecs_stats), |
850 | 984 | avg_stats(&walltime_nsecs_stats)); |
851 | 985 | } |